commit 16c00db4bb607a7b42359858386fd54392f90377
Author: Linus Torvalds <torvalds@linux-foundation.org>
Date:   Tue May 15 10:48:36 2018 -0700

    Merge tag 'afs-fixes-20180514' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
    
    Pull AFS fixes from David Howells:
     "Here's a set of patches that fix a number of bugs in the in-kernel AFS
      client, including:
    
       - Fix directory locking to not use individual page locks for
         directory reading/scanning but rather to use a semaphore on the
         afs_vnode struct as the directory contents must be read in a single
         blob and data from different reads must not be mixed as the entire
         contents may be shuffled about between reads.
    
       - Fix address list parsing to handle port specifiers correctly.
    
       - Only give up callback records on a server if we actually talked to
         that server (we might not be able to access a server).
    
       - Fix some callback handling bugs, including refcounting,
         whole-volume callbacks and when callbacks actually get broken in
         response to a CB.CallBack op.
    
       - Fix some server/address rotation bugs, including giving up if we
         can't probe a server; giving up if a server says it doesn't have a
         volume, but there are more servers to try.
    
       - Fix the decoding of fetched statuses to be OpenAFS compatible.
    
       - Fix the handling of server lookups in Cache Manager ops (such as
         CB.InitCallBackState3) to use a UUID if possible and to handle no
         server being found.
    
       - Fix a bug in server lookup where not all addresses are compared.
    
       - Fix the non-encryption of calls that prevents some servers from
         being accessed (this also requires an AF_RXRPC patch that has
         already gone in through the net tree).
    
      There's also a patch that adds tracepoints to log Cache Manager ops
      that don't find a matching server, either by UUID or by address"
    
    * tag 'afs-fixes-20180514' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
      afs: Fix the non-encryption of calls
      afs: Fix CB.CallBack handling
      afs: Fix whole-volume callback handling
      afs: Fix afs_find_server search loop
      afs: Fix the handling of an unfound server in CM operations
      afs: Add a tracepoint to record callbacks from unlisted servers
      afs: Fix the handling of CB.InitCallBackState3 to find the server by UUID
      afs: Fix VNOVOL handling in address rotation
      afs: Fix AFSFetchStatus decoder to provide OpenAFS compatibility
      afs: Fix server rotation's handling of fileserver probe failure
      afs: Fix refcounting in callback registration
      afs: Fix giving up callbacks on server destruction
      afs: Fix address list parsing
      afs: Fix directory page locking

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3e5135d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,36 @@
+PERF-CFLAGS
+PERF-GUI-VARS
+PERF-VERSION-FILE
+FEATURE-DUMP
+perf
+perf-read-vdso32
+perf-read-vdsox32
+perf-help
+perf-record
+perf-report
+perf-stat
+perf-top
+perf*.1
+perf*.xml
+perf*.html
+common-cmds.h
+perf.data
+perf.data.old
+output.svg
+perf-archive
+perf-with-kcore
+tags
+TAGS
+cscope*
+config.mak
+config.mak.autogen
+*-bison.*
+*-flex.*
+*.pyc
+*.pyo
+.config-detected
+util/intel-pt-decoder/inat-tables.c
+arch/*/include/generated/
+trace/beauty/generated/
+pmu-events/pmu-events.c
+pmu-events/jevents
diff --git a/Build b/Build
new file mode 100644
index 0000000..e5232d5
--- /dev/null
+++ b/Build
@@ -0,0 +1,55 @@
+perf-y += builtin-bench.o
+perf-y += builtin-annotate.o
+perf-y += builtin-config.o
+perf-y += builtin-diff.o
+perf-y += builtin-evlist.o
+perf-y += builtin-ftrace.o
+perf-y += builtin-help.o
+perf-y += builtin-sched.o
+perf-y += builtin-buildid-list.o
+perf-y += builtin-buildid-cache.o
+perf-y += builtin-kallsyms.o
+perf-y += builtin-list.o
+perf-y += builtin-record.o
+perf-y += builtin-report.o
+perf-y += builtin-stat.o
+perf-y += builtin-timechart.o
+perf-y += builtin-top.o
+perf-y += builtin-script.o
+perf-y += builtin-kmem.o
+perf-y += builtin-lock.o
+perf-y += builtin-kvm.o
+perf-y += builtin-inject.o
+perf-y += builtin-mem.o
+perf-y += builtin-data.o
+perf-y += builtin-version.o
+perf-y += builtin-c2c.o
+
+perf-$(CONFIG_TRACE) += builtin-trace.o
+perf-$(CONFIG_LIBELF) += builtin-probe.o
+
+perf-y += bench/
+perf-y += tests/
+
+perf-y += perf.o
+
+paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))"
+paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))"
+paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
+
+CFLAGS_builtin-help.o      += $(paths)
+CFLAGS_builtin-timechart.o += $(paths)
+CFLAGS_perf.o              += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))"	\
+			      -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))"	\
+			      -DPREFIX="BUILD_STR($(prefix_SQ))"
+CFLAGS_builtin-trace.o	   += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
+CFLAGS_builtin-report.o	   += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
+CFLAGS_builtin-report.o	   += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
+
+libperf-y += util/
+libperf-y += arch/
+libperf-y += ui/
+libperf-y += scripts/
+libperf-$(CONFIG_TRACE) += trace/beauty/
+
+gtk-y += ui/gtk/
diff --git a/CREDITS b/CREDITS
new file mode 100644
index 0000000..c2ddcb3
--- /dev/null
+++ b/CREDITS
@@ -0,0 +1,30 @@
+Most of the infrastructure that 'perf' uses here has been reused
+from the Git project, as of version:
+
+    66996ec: Sync with 1.6.2.4
+
+Here is an (incomplete!) list of main contributors to those files
+in util/* and elsewhere:
+
+ Alex Riesen
+ Christian Couder
+ Dmitry Potapov
+ Jeff King
+ Johannes Schindelin
+ Johannes Sixt
+ Junio C Hamano
+ Linus Torvalds
+ Matthias Kestenholz
+ Michal Ostrowski
+ Miklos Vajna
+ Petr Baudis
+ Pierre Habouzit
+ René Scharfe
+ Samuel Tardieu
+ Shawn O. Pearce
+ Steffen Prohaska
+ Steve Haslam
+
+Thanks guys!
+
+The full history of the files can be found in the upstream Git commits.
diff --git a/Documentation/Build.txt b/Documentation/Build.txt
new file mode 100644
index 0000000..f6fc650
--- /dev/null
+++ b/Documentation/Build.txt
@@ -0,0 +1,49 @@
+
+1) perf build
+=============
+The perf build process consists of several separated building blocks,
+which are linked together to form the perf binary:
+  - libperf library (static)
+  - perf builtin commands
+  - traceevent library (static)
+  - GTK ui library
+
+Several makefiles govern the perf build:
+
+  - Makefile
+    top level Makefile working as a wrapper that calls the main
+    Makefile.perf with a -j option to do parallel builds.
+
+  - Makefile.perf
+    main makefile that triggers build of all perf objects including
+    installation and documentation processing.
+
+  - tools/build/Makefile.build
+    main makefile of the build framework
+
+  - tools/build/Build.include
+    build framework generic definitions
+
+  - Build makefiles
+    makefiles that defines build objects
+
+Please refer to tools/build/Documentation/Build.txt for more
+information about build framework.
+
+
+2) perf build
+=============
+The Makefile.perf triggers the build framework for build objects:
+   perf, libperf, gtk
+
+resulting in following objects:
+  $ ls  *-in.o
+  gtk-in.o  libperf-in.o  perf-in.o
+
+Those objects are then used in final linking:
+  libperf-gtk.so <- gtk-in.o  libperf-in.o
+  perf           <- perf-in.o libperf-in.o
+
+
+NOTE this description is omitting other libraries involved, only
+     focusing on build framework outcomes
diff --git a/Documentation/Makefile b/Documentation/Makefile
new file mode 100644
index 0000000..db11478
--- /dev/null
+++ b/Documentation/Makefile
@@ -0,0 +1,343 @@
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak
+
+MAN1_TXT= \
+	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
+		$(wildcard perf-*.txt)) \
+	perf.txt
+MAN5_TXT=
+MAN7_TXT=
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
+
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
+SP_ARTICLES += $(API_DOCS)
+SP_ARTICLES += technical/api-index
+
+_DOC_HTML = $(_MAN_HTML)
+_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
+
+_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
+DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
+DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/perf-doc
+pdfdir?=$(prefix)/share/doc/perf-doc
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+man7dir=$(mandir)/man7
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA = --unsafe
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+DOC_REF = origin/man
+HTML_REF = origin/html
+
+infodir?=$(prefix)/share/info
+MAKEINFO=makeinfo
+INSTALL_INFO=install-info
+DOCBOOK2X_TEXI=docbook2x-texi
+DBLATEX=dblatex
+XMLTO=xmlto
+ifndef PERL_PATH
+	PERL_PATH = /usr/bin/perl
+endif
+
+-include ../config.mak.autogen
+-include ../config.mak
+
+_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
+ifeq ($(_tmp_tool_path),)
+	missing_tools = $(ASCIIDOC)
+endif
+
+_tmp_tool_path := $(call get-executable,$(XMLTO))
+ifeq ($(_tmp_tool_path),)
+	missing_tools += $(XMLTO)
+endif
+
+#
+# For asciidoc ...
+#	-7.1.2,	no extra settings are needed.
+#	8.0-,	set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+#	1.69.0,		no extra settings are needed?
+#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
+#	1.71.1,		no extra settings are needed?
+#	1.72.0,		set DOCBOOK_XSL_172.
+#	1.73.0-,	set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+	ifdef ASCIIDOC_NO_ROFF
+	# docbook-xsl after 1.72 needs the regular XSL, but will not
+	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
+	ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+	endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+#   http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1  =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifneq ($(V),1)
+	QUIET_ASCIIDOC	= @echo '  ASCIIDOC '$@;
+	QUIET_XMLTO	= @echo '  XMLTO    '$@;
+	QUIET_DB2TEXI	= @echo '  DB2TEXI  '$@;
+	QUIET_MAKEINFO	= @echo '  MAKEINFO '$@;
+	QUIET_DBLATEX	= @echo '  DBLATEX  '$@;
+	QUIET_XSLTPROC	= @echo '  XSLTPROC '$@;
+	QUIET_GEN	= @echo '  GEN      '$@;
+	QUIET_STDERR	= 2> /dev/null
+	QUIET_SUBDIR0	= +@subdir=
+	QUIET_SUBDIR1	= ;$(NO_SUBDIR) \
+			   echo '  SUBDIR   ' $$subdir; \
+			  $(MAKE) $(PRINT_DIR) -C $$subdir
+	export V
+endif
+endif
+
+all: html man
+
+html: $(DOC_HTML)
+
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
+
+man: man1 man5 man7
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+man7: $(DOC_MAN7)
+
+info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
+
+pdf: $(OUTPUT)user-manual.pdf
+
+install: install-man
+
+check-man-tools:
+ifdef missing_tools
+	$(error "You need to install $(missing_tools) for man pages")
+endif
+
+do-install-man: man
+	$(call QUIET_INSTALL, Documentation-man) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
+#		$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
+#		$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+		$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
+#		$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
+#		$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-man: check-man-tools man do-install-man
+
+ifdef missing_tools
+  DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+  DO_INSTALL_MAN = do-install-man
+endif
+
+try-install-man: $(DO_INSTALL_MAN)
+
+install-info: info
+	$(call QUIET_INSTALL, Documentation-info) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
+		$(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
+	if test -r $(DESTDIR)$(infodir)/dir; then \
+		$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+		$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+	else \
+	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
+	fi
+
+install-pdf: pdf
+	$(call QUIET_INSTALL, Documentation-pdf) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
+		$(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
+
+#install-html: html
+#	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+
+
+#
+# Determine "include::" file references in asciidoc files.
+#
+$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+	mv $@+ $@
+
+-include $(OUPTUT)doc.dep
+
+_cmds_txt = cmds-ancillaryinterrogators.txt \
+	cmds-ancillarymanipulators.txt \
+	cmds-mainporcelain.txt \
+	cmds-plumbinginterrogators.txt \
+	cmds-plumbingmanipulators.txt \
+	cmds-synchingrepositories.txt \
+	cmds-synchelpers.txt \
+	cmds-purehelpers.txt \
+	cmds-foreignscminterface.txt
+cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
+
+$(cmds_txt): $(OUTPUT)cmd-list.made
+
+$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+	$(QUIET_GEN)$(RM) $@ && \
+	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
+	date >$@
+
+CLEAN_FILES =									\
+	$(MAN_XML) $(addsuffix +,$(MAN_XML))					\
+	$(MAN_HTML) $(addsuffix +,$(MAN_HTML))					\
+	$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)				\
+	$(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++			\
+	$(OUTPUT)perf.info $(OUTPUT)perfman.info				\
+	$(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep		\
+	$(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt		\
+	$(cmds_txt) $(OUTPUT)*.made
+clean:
+	$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
+	$(QUIET_XMLTO)$(RM) $@ && \
+	$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+$(OUTPUT)%.xml : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+
+XSLT = docbook.xsl
+XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
+
+$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
+	$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
+
+$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
+
+$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+	$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
+	$(QUIET_DBLATEX)$(RM) $@+ $@ && \
+	$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
+	mv $@+ $@
+
+$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
+		--to-stdout $(xml) &&) true) > $@++ && \
+	$(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
+	rm $@++ && \
+	mv $@+ $@
+
+$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
+	$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+
+$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
+	$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+	$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+	mv $@+ $@
+
+howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+	$(QUIET_GEN)$(RM) $@+ $@ && \
+	'$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
+	mv $@+ $@
+
+$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+
+WEBDOC_DEST = /pub/software/tools/perf/docs
+
+$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+	mv $@+ $@
+
+# UNIMPLEMENTED
+#install-webdoc : html
+#	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+
+# quick-install: quick-install-man
+
+# quick-install-man:
+#	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+
+#quick-install-html:
+#	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
diff --git a/Documentation/android.txt b/Documentation/android.txt
new file mode 100644
index 0000000..24a5999
--- /dev/null
+++ b/Documentation/android.txt
@@ -0,0 +1,78 @@
+How to compile perf for Android
+=========================================
+
+I. Set the Android NDK environment
+------------------------------------------------
+
+(a). Use the Android NDK
+------------------------------------------------
+1. You need to download and install the Android Native Development Kit (NDK).
+Set the NDK variable to point to the path where you installed the NDK:
+  export NDK=/path/to/android-ndk
+
+2. Set cross-compiling environment variables for NDK toolchain and sysroot.
+For arm:
+  export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-
+  export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm
+For x86:
+  export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android-
+  export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86
+
+This method is only tested for Android NDK versions Revision 11b and later.
+perf uses some bionic enhancements that are not included in prior NDK versions.
+You can use method (b) described below instead.
+
+(b). Use the Android source tree
+-----------------------------------------------
+1. Download the master branch of the Android source tree.
+Set the environment for the target you want using:
+  source build/envsetup.sh
+  lunch
+
+2. Build your own NDK sysroot to contain latest bionic changes and set the
+NDK sysroot environment variable.
+  cd ${ANDROID_BUILD_TOP}/ndk
+For arm:
+  ./build/tools/build-ndk-sysroot.sh --abi=arm
+  export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
+For x86:
+  ./build/tools/build-ndk-sysroot.sh --abi=x86
+  export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
+
+3. Set the NDK toolchain environment variable.
+For arm:
+  export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
+For x86:
+  export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
+
+II. Compile perf for Android
+------------------------------------------------
+You need to run make with the NDK toolchain and sysroot defined above:
+For arm:
+  make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
+For x86:
+  make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
+
+III. Install perf
+-----------------------------------------------
+You need to connect to your Android device/emulator using adb.
+Install perf using:
+  adb push perf /data/perf
+
+If you also want to use perf-archive you need busybox tools for Android.
+For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
+  sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
+  chmod +x /tmp/perf-archive
+  adb push /tmp/perf-archive /data/perf-archive
+
+IV. Environment settings for running perf
+------------------------------------------------
+Some perf features need environment variables to run properly.
+You need to set these before running perf on the target:
+  adb shell
+  # PERF_PAGER=cat
+
+IV. Run perf
+------------------------------------------------
+Run perf on your device/emulator to which you previously connected using adb:
+  # ./data/perf
diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf
new file mode 100644
index 0000000..356b23a
--- /dev/null
+++ b/Documentation/asciidoc.conf
@@ -0,0 +1,91 @@
+## linkperf: macro
+#
+# Usage: linkperf:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show PERF link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linkperf-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::perf-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::perf-asciidoc-no-roff[]
+
+ifdef::perf-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::perf-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">perf</refmiscinfo>
+<refmiscinfo class="version">{perf_version}</refmiscinfo>
+<refmiscinfo class="manual">perf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+  <refname>{manname}</refname>
+  <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linkperf-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/Documentation/callchain-overhead-calculation.txt b/Documentation/callchain-overhead-calculation.txt
new file mode 100644
index 0000000..1a75792
--- /dev/null
+++ b/Documentation/callchain-overhead-calculation.txt
@@ -0,0 +1,108 @@
+Overhead calculation
+--------------------
+The overhead can be shown in two columns as 'Children' and 'Self' when
+perf collects callchains.  The 'self' overhead is simply calculated by
+adding all period values of the entry - usually a function (symbol).
+This is the value that perf shows traditionally and sum of all the
+'self' overhead values should be 100%.
+
+The 'children' overhead is calculated by adding all period values of
+the child functions so that it can show the total overhead of the
+higher level functions even if they don't directly execute much.
+'Children' here means functions that are called from another (parent)
+function.
+
+It might be confusing that the sum of all the 'children' overhead
+values exceeds 100% since each of them is already an accumulation of
+'self' overhead of its child functions.  But with this enabled, users
+can find which function has the most overhead even if samples are
+spread over the children.
+
+Consider the following example; there are three functions like below.
+
+-----------------------
+void foo(void) {
+    /* do something */
+}
+
+void bar(void) {
+    /* do something */
+    foo();
+}
+
+int main(void) {
+    bar()
+    return 0;
+}
+-----------------------
+
+In this case 'foo' is a child of 'bar', and 'bar' is an immediate
+child of 'main' so 'foo' also is a child of 'main'.  In other words,
+'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
+
+Suppose all samples are recorded in 'foo' and 'bar' only.  When it's
+recorded with callchains the output will show something like below
+in the usual (self-overhead-only) output of perf report:
+
+----------------------------------
+Overhead  Symbol
+........  .....................
+  60.00%  foo
+          |
+          --- foo
+              bar
+              main
+              __libc_start_main
+
+  40.00%  bar
+          |
+          --- bar
+              main
+              __libc_start_main
+----------------------------------
+
+When the --children option is enabled, the 'self' overhead values of
+child functions (i.e. 'foo' and 'bar') are added to the parents to
+calculate the 'children' overhead.  In this case the report could be
+displayed as:
+
+-------------------------------------------
+Children      Self  Symbol
+........  ........  ....................
+ 100.00%     0.00%  __libc_start_main
+          |
+          --- __libc_start_main
+
+ 100.00%     0.00%  main
+          |
+          --- main
+              __libc_start_main
+
+ 100.00%    40.00%  bar
+          |
+          --- bar
+              main
+              __libc_start_main
+
+  60.00%    60.00%  foo
+          |
+          --- foo
+              bar
+              main
+              __libc_start_main
+-------------------------------------------
+
+In the above output, the 'self' overhead of 'foo' (60%) was add to the
+'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
+Likewise, the 'self' overhead of 'bar' (40%) was added to the
+'children' overhead of 'main' and '\_\_libc_start_main'.
+
+So '\_\_libc_start_main' and 'main' are shown first since they have
+same (100%) 'children' overhead (even though they have zero 'self'
+overhead) and they are the parents of 'foo' and 'bar'.
+
+Since v3.16 the 'children' overhead is shown by default and the output
+is sorted by its values. The 'children' overhead is disabled by
+specifying --no-children option on the command line or by adding
+'report.children = false' or 'top.children = false' in the perf config
+file.
diff --git a/Documentation/examples.txt b/Documentation/examples.txt
new file mode 100644
index 0000000..a4e3921
--- /dev/null
+++ b/Documentation/examples.txt
@@ -0,0 +1,225 @@
+
+		------------------------------
+		****** perf by examples ******
+		------------------------------
+
+[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
+
+
+First, discovery/enumeration of available counters can be done via
+'perf list':
+
+titan:~> perf list
+  [...]
+  kmem:kmalloc                             [Tracepoint event]
+  kmem:kmem_cache_alloc                    [Tracepoint event]
+  kmem:kmalloc_node                        [Tracepoint event]
+  kmem:kmem_cache_alloc_node               [Tracepoint event]
+  kmem:kfree                               [Tracepoint event]
+  kmem:kmem_cache_free                     [Tracepoint event]
+  kmem:mm_page_free                        [Tracepoint event]
+  kmem:mm_page_free_batched                [Tracepoint event]
+  kmem:mm_page_alloc                       [Tracepoint event]
+  kmem:mm_page_alloc_zone_locked           [Tracepoint event]
+  kmem:mm_page_pcpu_drain                  [Tracepoint event]
+  kmem:mm_page_alloc_extfrag               [Tracepoint event]
+
+Then any (or all) of the above event sources can be activated and
+measured. For example the page alloc/free properties of a 'hackbench
+run' are:
+
+ titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
+ -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10
+ Time: 0.575
+
+ Performance counter stats for './hackbench 10':
+
+          13857  kmem:mm_page_pcpu_drain
+          27576  kmem:mm_page_alloc
+           6025  kmem:mm_page_free_batched
+          20934  kmem:mm_page_free
+
+    0.613972165  seconds time elapsed
+
+You can observe the statistical properties as well, by using the
+'repeat the workload N times' feature of perf stat:
+
+ titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
+   kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+   kmem:mm_page_free ./hackbench 10
+ Time: 0.627
+ Time: 0.644
+ Time: 0.564
+ Time: 0.559
+ Time: 0.626
+
+ Performance counter stats for './hackbench 10' (5 runs):
+
+          12920  kmem:mm_page_pcpu_drain    ( +-   3.359% )
+          25035  kmem:mm_page_alloc         ( +-   3.783% )
+           6104  kmem:mm_page_free_batched  ( +-   0.934% )
+          18376  kmem:mm_page_free	    ( +-   4.941% )
+
+    0.643954516  seconds time elapsed   ( +-   2.363% )
+
+Furthermore, these tracepoints can be used to sample the workload as
+well. For example the page allocations done by a 'git gc' can be
+captured the following way:
+
+ titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
+
+To check which functions generated page allocations:
+
+ titan:~/git> perf report
+ # Samples: 10646
+ #
+ # Overhead          Command               Shared Object
+ # ........  ...............  ..........................
+ #
+    23.57%       git-repack  /lib64/libc-2.5.so
+    21.81%              git  /lib64/libc-2.5.so
+    14.59%              git  ./git
+    11.79%       git-repack  ./git
+     7.12%              git  /lib64/ld-2.5.so
+     3.16%       git-repack  /lib64/libpthread-2.5.so
+     2.09%       git-repack  /bin/bash
+     1.97%               rm  /lib64/libc-2.5.so
+     1.39%               mv  /lib64/ld-2.5.so
+     1.37%               mv  /lib64/libc-2.5.so
+     1.12%       git-repack  /lib64/ld-2.5.so
+     0.95%               rm  /lib64/ld-2.5.so
+     0.90%  git-update-serv  /lib64/libc-2.5.so
+     0.73%  git-update-serv  /lib64/ld-2.5.so
+     0.68%             perf  /lib64/libpthread-2.5.so
+     0.64%       git-repack  /usr/lib64/libz.so.1.2.3
+
+Or to see it on a more finegrained level:
+
+titan:~/git> perf report --sort comm,dso,symbol
+# Samples: 10646
+#
+# Overhead          Command               Shared Object  Symbol
+# ........  ...............  ..........................  ......
+#
+     9.35%       git-repack  ./git                       [.] insert_obj_hash
+     9.12%              git  ./git                       [.] insert_obj_hash
+     7.31%              git  /lib64/libc-2.5.so          [.] memcpy
+     6.34%       git-repack  /lib64/libc-2.5.so          [.] _int_malloc
+     6.24%       git-repack  /lib64/libc-2.5.so          [.] memcpy
+     5.82%       git-repack  /lib64/libc-2.5.so          [.] __GI___fork
+     5.47%              git  /lib64/libc-2.5.so          [.] _int_malloc
+     2.99%              git  /lib64/libc-2.5.so          [.] memset
+
+Furthermore, call-graph sampling can be done too, of page
+allocations - to see precisely what kind of page allocations there
+are:
+
+ titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
+
+ titan:~/git> perf report -g
+ # Samples: 10686
+ #
+ # Overhead          Command               Shared Object
+ # ........  ...............  ..........................
+ #
+    23.25%       git-repack  /lib64/libc-2.5.so
+                |
+                |--50.00%-- _int_free
+                |
+                |--37.50%-- __GI___fork
+                |          make_child
+                |
+                |--12.50%-- ptmalloc_unlock_all2
+                |          make_child
+                |
+                 --6.25%-- __GI_strcpy
+    21.61%              git  /lib64/libc-2.5.so
+                |
+                |--30.00%-- __GI_read
+                |          |
+                |           --83.33%-- git_config_from_file
+                |                     git_config
+                |                     |
+   [...]
+
+Or you can observe the whole system's page allocations for 10
+seconds:
+
+titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
+kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+kmem:mm_page_free sleep 10
+
+ Performance counter stats for 'sleep 10':
+
+         171585  kmem:mm_page_pcpu_drain
+         322114  kmem:mm_page_alloc
+          73623  kmem:mm_page_free_batched
+         254115  kmem:mm_page_free
+
+   10.000591410  seconds time elapsed
+
+Or observe how fluctuating the page allocations are, via statistical
+analysis done over ten 1-second intervals:
+
+ titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
+   kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+   kmem:mm_page_free sleep 1
+
+ Performance counter stats for 'sleep 1' (10 runs):
+
+          17254  kmem:mm_page_pcpu_drain    ( +-   3.709% )
+          34394  kmem:mm_page_alloc         ( +-   4.617% )
+           7509  kmem:mm_page_free_batched  ( +-   4.820% )
+          25653  kmem:mm_page_free	    ( +-   3.672% )
+
+    1.058135029  seconds time elapsed   ( +-   3.089% )
+
+Or you can annotate the recorded 'git gc' run on a per symbol basis
+and check which instructions/source-code generated page allocations:
+
+ titan:~/git> perf annotate __GI___fork
+ ------------------------------------------------
+  Percent |      Source code & Disassembly of libc-2.5.so
+ ------------------------------------------------
+          :
+          :
+          :      Disassembly of section .plt:
+          :      Disassembly of section .text:
+          :
+          :      00000031a2e95560 <__fork>:
+ [...]
+     0.00 :        31a2e95602:   b8 38 00 00 00          mov    $0x38,%eax
+     0.00 :        31a2e95607:   0f 05                   syscall
+    83.42 :        31a2e95609:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
+     0.00 :        31a2e9560f:   0f 87 4d 01 00 00       ja     31a2e95762 <__fork+0x202>
+     0.00 :        31a2e95615:   85 c0                   test   %eax,%eax
+
+( this shows that 83.42% of __GI___fork's page allocations come from
+  the 0x38 system call it performs. )
+
+etc. etc. - a lot more is possible. I could list a dozen of
+other different usecases straight away - neither of which is
+possible via /proc/vmstat.
+
+/proc/vmstat is not in the same league really, in terms of
+expressive power of system analysis and performance
+analysis.
+
+All that the above results needed were those new tracepoints
+in include/tracing/events/kmem.h.
+
+	Ingo
+
+
diff --git a/Documentation/intel-bts.txt b/Documentation/intel-bts.txt
new file mode 100644
index 0000000..8bdc93b
--- /dev/null
+++ b/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
+Intel Branch Trace Store
+========================
+
+Overview
+========
+
+Intel BTS could be regarded as a predecessor to Intel PT and has some
+similarities because it can also identify every branch a program takes.  A
+notable difference is that Intel BTS has no timing information and as a
+consequence the present implementation is limited to per-thread recording.
+
+While decoding Intel BTS does not require walking the object code, the object
+code is still needed to pair up calls and returns correctly, consequently much
+of the Intel PT documentation applies also to Intel BTS.  Refer to the Intel PT
+documentation and consider that the PMU 'intel_bts' can usually be used in
+place of 'intel_pt' in the examples provided, with the proviso that per-thread
+recording must also be stipulated i.e. the --per-thread option for
+'perf record'.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel BTS kernel driver creates a new PMU for Intel BTS.  The perf record
+option is:
+
+	-e intel_bts//
+
+Currently Intel BTS is limited to per-thread tracing so the --per-thread option
+is also needed.
+
+
+snapshot option
+---------------
+
+The snapshot option is the same as Intel PT (refer Intel PT documentation).
+
+
+auxtrace mmap size option
+-----------------------
+
+The mmap size option is the same as Intel PT (refer Intel PT documentation).
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by option --itrace.  The --itrace option is
+the same as Intel PT (refer Intel PT documentation) except that neither
+"instructions" events nor "transactions" events (and consequently call
+chains) are supported.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel BTS packets are displayed.
+
+To disable the display of Intel BTS packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+	perf inject --itrace -i perf.data -o perf.data.new
diff --git a/Documentation/intel-pt.txt b/Documentation/intel-pt.txt
new file mode 100644
index 0000000..76971d2
--- /dev/null
+++ b/Documentation/intel-pt.txt
@@ -0,0 +1,891 @@
+Intel Processor Trace
+=====================
+
+Overview
+========
+
+Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
+collects information about software execution such as control flow, execution
+modes and timings and formats it into highly compressed binary packets.
+Technical details are documented in the Intel 64 and IA-32 Architectures
+Software Developer Manuals, Chapter 36 Intel Processor Trace.
+
+Intel PT is first supported in Intel Core M and 5th generation Intel Core
+processors that are based on the Intel micro-architecture code name Broadwell.
+
+Trace data is collected by 'perf record' and stored within the perf.data file.
+See below for options to 'perf record'.
+
+Trace data must be 'decoded' which involves walking the object code and matching
+the trace data packets. For example a TNT packet only tells whether a
+conditional branch was taken or not taken, so to make use of that packet the
+decoder must know precisely which instruction was being executed.
+
+Decoding is done on-the-fly.  The decoder outputs samples in the same format as
+samples output by perf hardware events, for example as though the "instructions"
+or "branches" events had been recorded.  Presently 3 tools support this:
+'perf script', 'perf report' and 'perf inject'.  See below for more information
+on using those tools.
+
+The main distinguishing feature of Intel PT is that the decoder can determine
+the exact flow of software execution.  Intel PT can be used to understand why
+and how did software get to a certain point, or behave a certain way.  The
+software does not have to be recompiled, so Intel PT works with debug or release
+builds, however the executed images are needed - which makes use in JIT-compiled
+environments, or with self-modified code, a challenge.  Also symbols need to be
+provided to make sense of addresses.
+
+A limitation of Intel PT is that it produces huge amounts of trace data
+(hundreds of megabytes per second per core) which takes a long time to decode,
+for example two or three orders of magnitude longer than it took to collect.
+Another limitation is the performance impact of tracing, something that will
+vary depending on the use-case and architecture.
+
+
+Quickstart
+==========
+
+It is important to start small.  That is because it is easy to capture vastly
+more data than can possibly be processed.
+
+The simplest thing to do with Intel PT is userspace profiling of small programs.
+Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
+
+	perf record -e intel_pt//u ls
+
+And profiled with 'perf report' e.g.
+
+	perf report
+
+To also trace kernel space presents a problem, namely kernel self-modifying
+code.  A fairly good kernel image is available in /proc/kcore but to get an
+accurate image a copy of /proc/kcore needs to be made under the same conditions
+as the data capture.  A script perf-with-kcore can do that, but beware that the
+script makes use of 'sudo' to copy /proc/kcore.  If you have perf installed
+locally from the source tree you can do:
+
+	~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+
+which will create a directory named 'pt_ls' and put the perf.data file and
+copies of /proc/kcore, /proc/kallsyms and /proc/modules into it.  Then to use
+'perf report' becomes:
+
+	~/libexec/perf-core/perf-with-kcore report pt_ls
+
+Because samples are synthesized after-the-fact, the sampling period can be
+selected for reporting. e.g. sample every microsecond
+
+	~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+
+See the sections below for more information about the --itrace option.
+
+Beware the smaller the period, the more samples that are produced, and the
+longer it takes to process them.
+
+Also note that the coarseness of Intel PT timing information will start to
+distort the statistical value of the sampling as the sampling period becomes
+smaller.
+
+To represent software control flow, "branches" samples are produced.  By default
+a branch sample is synthesized for every single branch.  To get an idea what
+data is available you can use the 'perf script' tool with no parameters, which
+will list all the samples.
+
+	perf record -e intel_pt//u ls
+	perf script
+
+An interesting field that is not printed by default is 'flags' which can be
+displayed as follows:
+
+	perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
+
+The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
+in transaction, respectively.
+
+While it is possible to create scripts to analyze the data, an alternative
+approach is available to export the data to a sqlite or postgresql database.
+Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
+and to script call-graph-from-sql.py for an example of using the database.
+
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
+As mentioned above, it is easy to capture too much data.  One way to limit the
+data captured is to use 'snapshot' mode which is explained further below.
+Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
+
+Another problem that will be experienced is decoder errors.  They can be caused
+by inability to access the executed image, self-modified or JIT-ed code, or the
+inability to match side-band information (such as context switches and mmaps)
+which results in the decoder not knowing what code was executed.
+
+There is also the problem of perf not being able to copy the data fast enough,
+resulting in data lost because the buffer was full.  See 'Buffer handling' below
+for more details.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel PT kernel driver creates a new PMU for Intel PT.  PMU events are
+selected by providing the PMU name followed by the "config" separated by slashes.
+An enhancement has been made to allow default "config" e.g. the option
+
+	-e intel_pt//
+
+will use a default config value.  Currently that is the same as
+
+	-e intel_pt/tsc,noretcomp=0/
+
+which is the same as
+
+	-e intel_pt/tsc=1,noretcomp=0/
+
+Note there are now new config terms - see section 'config terms' further below.
+
+The config terms are listed in /sys/devices/intel_pt/format.  They are bit
+fields within the config member of the struct perf_event_attr which is
+passed to the kernel by the perf_event_open system call.  They correspond to bit
+fields in the IA32_RTIT_CTL MSR.  Here is a list of them and their definitions:
+
+	$ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+	/sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+	/sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+	/sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+	/sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+	/sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+	/sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+	/sys/bus/event_source/devices/intel_pt/format/tsc:config:10
+
+Note that the default config must be overridden for each term i.e.
+
+	-e intel_pt/noretcomp=0/
+
+is the same as:
+
+	-e intel_pt/tsc=1,noretcomp=0/
+
+So, to disable TSC packets use:
+
+	-e intel_pt/tsc=0/
+
+It is also possible to specify the config value explicitly:
+
+	-e intel_pt/config=0x400/
+
+Note that, as with all events, the event is suffixed with event modifiers:
+
+	u	userspace
+	k	kernel
+	h	hypervisor
+	G	guest
+	H	host
+	p	precise ip
+
+'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'p' is also not relevant to Intel PT.  So only options 'u' and 'k' are
+meaningful for Intel PT.
+
+perf_event_attr is displayed if the -vv option is used e.g.
+
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             6
+	size                             112
+	config                           0x400
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|CPU|IDENTIFIER
+	read_format                      ID
+	disabled                         1
+	inherit                          1
+	exclude_kernel                   1
+	exclude_hv                       1
+	enable_on_exec                   1
+	sample_id_all                    1
+	------------------------------------------------------------
+	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
+	------------------------------------------------------------
+
+
+config terms
+------------
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms.  All the config terms are
+described below.
+
+tsc		Always supported.  Produces TSC timestamp packets to provide
+		timing information.  In some cases it is possible to decode
+		without timing information, for example a per-thread context
+		that does not overlap executable memory maps.
+
+		The default config selects tsc (i.e. tsc=1).
+
+noretcomp	Always supported.  Disables "return compression" so a TIP packet
+		is produced when a function returns.  Causes more packets to be
+		produced but might make decoding more reliable.
+
+		The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period	Allows the frequency of PSB packets to be specified.
+
+		The PSB packet is a synchronization packet that provides a
+		starting point for decoding or recovery from errors.
+
+		Support for psb_period is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+		which contains "1" if the feature is supported and "0"
+		otherwise.
+
+		Valid values are given by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+		which contains a hexadecimal value, the bits of which represent
+		valid values e.g. bit 2 set means value 2 is valid.
+
+		The psb_period value is converted to the approximate number of
+		trace bytes between PSB packets as:
+
+			2 ^ (value + 11)
+
+		e.g. value 3 means 16KiB bytes between PSBs
+
+		If an invalid value is entered, the error message
+		will give a list of valid values e.g.
+
+			$ perf record -e intel_pt/psb_period=15/u uname
+			Invalid psb_period for intel_pt. Valid values are: 0-5
+
+		If MTC packets are selected, the default config selects a value
+		of 3 (i.e. psb_period=3) or the nearest lower value that is
+		supported (0 is always supported).  Otherwise the default is 0.
+
+		If decoding is expected to be reliable and the buffer is large
+		then a large PSB period can be used.
+
+		Because a TSC packet is produced with PSB, the PSB period can
+		also affect the granularity to timing information in the absence
+		of MTC or CYC.
+
+mtc		Produces MTC timing packets.
+
+		MTC packets provide finer grain timestamp information than TSC
+		packets.  MTC packets record time using the hardware crystal
+		clock (CTC) which is related to TSC packets using a TMA packet.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/mtc
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+		The frequency of MTC packets can also be specified - see
+		mtc_period below.
+
+mtc_period	Specifies how frequently MTC packets are produced - see mtc
+		above for how to determine if MTC packets are supported.
+
+		Valid values are given by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+		which contains a hexadecimal value, the bits of which represent
+		valid values e.g. bit 2 set means value 2 is valid.
+
+		The mtc_period value is converted to the MTC frequency as:
+
+			CTC-frequency / (2 ^ value)
+
+		e.g. value 3 means one eighth of CTC-frequency
+
+		Where CTC is the hardware crystal clock, the frequency of which
+		can be related to TSC via values provided in cpuid leaf 0x15.
+
+		If an invalid value is entered, the error message
+		will give a list of valid values e.g.
+
+			$ perf record -e intel_pt/mtc_period=15/u uname
+			Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+		The default value is 3 or the nearest lower value
+		that is supported (0 is always supported).
+
+cyc		Produces CYC timing packets.
+
+		CYC packets provide even finer grain timestamp information than
+		MTC and TSC packets.  A CYC packet contains the number of CPU
+		cycles since the last CYC packet. Unlike MTC and TSC packets,
+		CYC packets are only sent when another packet is also sent.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+		The number of CYC packets produced can be reduced by specifying
+		a threshold - see cyc_thresh below.
+
+cyc_thresh	Specifies how frequently CYC packets are produced - see cyc
+		above for how to determine if CYC packets are supported.
+
+		Valid cyc_thresh values are given by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+		which contains a hexadecimal value, the bits of which represent
+		valid values e.g. bit 2 set means value 2 is valid.
+
+		The cyc_thresh value represents the minimum number of CPU cycles
+		that must have passed before a CYC packet can be sent.  The
+		number of CPU cycles is:
+
+			2 ^ (value - 1)
+
+		e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+		can be sent.  Note a CYC packet is still only sent when another
+		packet is sent, not at, e.g. every 8 CPU cycles.
+
+		If an invalid value is entered, the error message
+		will give a list of valid values e.g.
+
+			$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+			Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+		CYC packets are not requested by default.
+
+pt		Specifies pass-through which enables the 'branch' config term.
+
+		The default config selects 'pt' if it is available, so a user will
+		never need to specify this term.
+
+branch		Enable branch tracing.  Branch tracing is enabled by default so to
+		disable branch tracing use 'branch=0'.
+
+		The default config selects 'branch' if it is available.
+
+ptw		Enable PTWRITE packets which are produced when a ptwrite instruction
+		is executed.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+fup_on_ptw	Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
+		provides the address of the ptwrite instruction.  In the absence of
+		fup_on_ptw, the decoder will use the address of the previous branch
+		if branch tracing is enabled, otherwise the address will be zero.
+		Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt		Enable power events.  The power events provide information about
+		changes to the CPU C-state.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+
+new snapshot option
+-------------------
+
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
+To select snapshot mode a new option has been added:
+
+	-S
+
+Optionally it can be followed by the snapshot size e.g.
+
+	-S0x100000
+
+The default snapshot size is the auxtrace mmap size.  If neither auxtrace mmap size
+nor snapshot size is specified, then the default is 4MiB for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced as described in the 'new auxtrace mmap size option' section below.
+
+The snapshot size is displayed if the option -vv is used e.g.
+
+	Intel PT snapshot size: %zu
+
+
+new auxtrace mmap size option
+---------------------------
+
+Intel PT buffer size is specified by an addition to the -m option e.g.
+
+	-m,16
+
+selects a buffer size of 16 pages i.e. 64KiB.
+
+Note that the existing functionality of -m is unchanged.  The auxtrace mmap size
+is specified by the optional addition of a comma and the value.
+
+The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+user is likely to get an error as they exceed their mlock limit (Max locked
+memory as shown in /proc/self/limits).  Note that perf does not count the first
+512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
+against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
+their mlock limit (which defaults to 64KiB but is not multiplied by the number
+of cpus).
+
+In full-trace mode, powers of two are allowed for buffer size, with a minimum
+size of 2 pages.  In snapshot mode, it is the same but the minimum size is
+1 page.
+
+The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
+
+	mmap length 528384
+	auxtrace mmap length 4198400
+
+
+Intel PT modes of operation
+---------------------------
+
+Intel PT can be used in 2 modes:
+	full-trace mode
+	snapshot mode
+
+Full-trace mode traces continuously e.g.
+
+	perf record -e intel_pt//u uname
+
+Snapshot mode captures the available data when a signal is sent e.g.
+
+	perf record -v -e intel_pt//u -S ./loopy 1000000000 &
+	[1] 11435
+	kill -USR2 11435
+	Recording AUX area tracing snapshot
+
+Note that the signal sent is SIGUSR2.
+Note that "Recording AUX area tracing snapshot" is displayed because the -v
+option is used.
+
+The 2 modes cannot be used together.
+
+
+Buffer handling
+---------------
+
+There may be buffer limitations (i.e. single ToPa entry) which means that actual
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER).  In order to
+provide other sizes, and in particular an arbitrarily large size, multiple
+buffers are logically concatenated.  However an interrupt must be used to switch
+between buffers.  That has two potential problems:
+	a) the interrupt may not be handled in time so that the current buffer
+	becomes full and some trace data is lost.
+	b) the interrupts may slow the system and affect the performance
+	results.
+
+If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
+which the tools report as an error.
+
+In full-trace mode, the driver waits for data to be copied out before allowing
+the (logical) buffer to wrap-around.  If data is not copied out quickly enough,
+again 'truncated' is set in the PERF_RECORD_AUX event.  If the driver has to
+wait, the intel_pt event gets disabled.  Because it is difficult to know when
+that happens, perf tools always re-enable the intel_pt event after copying out
+data.
+
+
+Intel PT and build ids
+----------------------
+
+By default "perf record" post-processes the event stream to find all build ids
+for executables for all addresses sampled.  Deliberately, Intel PT is not
+decoded for that purpose (it would take too long).  Instead the build ids for
+all executables encountered (due to mmap, comm or task events) are included
+in the perf.data file.
+
+To see buildids included in the perf.data file use the command:
+
+	perf buildid-list
+
+If the perf.data file contains Intel PT data, that is the same as:
+
+	perf buildid-list --with-hits
+
+
+Snapshot mode and event disabling
+---------------------------------
+
+In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
+namely PERF_EVENT_IOC_DISABLE.  However doing that can also disable the
+collection of side-band information.  In order to prevent that,  a dummy
+software event has been introduced that permits tracking events (like mmaps) to
+continue to be recorded while intel_pt is disabled.  That is important to ensure
+there is complete side-band information to allow the decoding of subsequent
+snapshots.
+
+A test has been created for that.  To find the test:
+
+	perf test list
+	...
+	23: Test using a dummy software event to keep tracking
+
+To run the test:
+
+	perf test 23
+	23: Test using a dummy software event to keep tracking     : Ok
+
+
+perf record modes (nothing new here)
+------------------------------------
+
+perf record essentially operates in one of three modes:
+	per thread
+	per cpu
+	workload only
+
+"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
+workload).
+"per cpu" is selected by -C or -a.
+"workload only" mode is selected by not using the other options but providing a
+command to run (i.e. the workload).
+
+In per-thread mode an exact list of threads is traced.  There is no inheritance.
+Each thread has its own event buffer.
+
+In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
+option, or processes selected with -p or -u) are traced.  Each cpu has its own
+buffer. Inheritance is allowed.
+
+In workload-only mode, the workload is traced but with per-cpu buffers.
+Inheritance is allowed.  Note that you can now trace a workload in per-thread
+mode by using the --per-thread option.
+
+
+Privileged vs non-privileged users
+----------------------------------
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
+have memory limits imposed upon them.  That affects what buffer sizes they can
+have as outlined above.
+
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
+not permitted to use tracepoints which means there is insufficient side-band
+information to decode Intel PT in per-cpu mode, and potentially workload-only
+mode too if the workload creates new processes.
+
+Note also, that to use tracepoints, read-access to debugfs is required.  So if
+debugfs is not mounted or the user does not have read-access, it will again not
+be possible to decode Intel PT in per-cpu mode.
+
+
+sched_switch tracepoint
+-----------------------
+
+The sched_switch tracepoint is used to provide side-band data for Intel PT
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
+
+	$ perf record -vv -e intel_pt//u uname
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             6
+	size                             112
+	config                           0x400
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|CPU|IDENTIFIER
+	read_format                      ID
+	disabled                         1
+	inherit                          1
+	exclude_kernel                   1
+	exclude_hv                       1
+	enable_on_exec                   1
+	sample_id_all                    1
+	------------------------------------------------------------
+	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             2
+	size                             112
+	config                           0x108
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
+	read_format                      ID
+	inherit                          1
+	sample_id_all                    1
+	exclude_guest                    1
+	------------------------------------------------------------
+	sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid -1  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid -1  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid -1  cpu 3  group_fd -1  flags 0x8
+	------------------------------------------------------------
+	perf_event_attr:
+	type                             1
+	size                             112
+	config                           0x9
+	{ sample_period, sample_freq }   1
+	sample_type                      IP|TID|TIME|IDENTIFIER
+	read_format                      ID
+	disabled                         1
+	inherit                          1
+	exclude_kernel                   1
+	exclude_hv                       1
+	mmap                             1
+	comm                             1
+	enable_on_exec                   1
+	task                             1
+	sample_id_all                    1
+	mmap2                            1
+	comm_exec                        1
+	------------------------------------------------------------
+	sys_perf_event_open: pid 31104  cpu 0  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 1  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 2  group_fd -1  flags 0x8
+	sys_perf_event_open: pid 31104  cpu 3  group_fd -1  flags 0x8
+	mmap size 528384B
+	AUX area mmap length 4194304
+	perf event ring buffer mmapped per cpu
+	Synthesizing auxtrace information
+	Linux
+	[ perf record: Woken up 1 times to write data ]
+	[ perf record: Captured and wrote 0.042 MB perf.data ]
+
+Note, the sched_switch event is only added if the user is permitted to use it
+and only in per-cpu mode.
+
+Note also, the sched_switch event is only added if TSC packets are requested.
+That is because, in the absence of timing information, the sched_switch events
+cannot be matched against the Intel PT trace.
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace.
+
+
+New --itrace option
+-------------------
+
+Having no option is the same as
+
+	--itrace
+
+which, in turn, is the same as
+
+	--itrace=ibxwpe
+
+The letters are:
+
+	i	synthesize "instructions" events
+	b	synthesize "branches" events
+	x	synthesize "transactions" events
+	w	synthesize "ptwrite" events
+	p	synthesize "power" events
+	c	synthesize branches events (calls only)
+	r	synthesize branches events (returns only)
+	e	synthesize tracing error events
+	d	create a debug log
+	g	synthesize a call chain (use with i or x)
+	l	synthesize last branch entries (use with i or x)
+	s	skip initial number of events
+
+"Instructions" events look like they were recorded by "perf record -e
+instructions".
+
+"Branches" events look like they were recorded by "perf record -e branches". "c"
+and "r" can be combined to get calls and returns.
+
+"Transactions" events correspond to the start or end of transactions. The
+'flags' field can be used in perf script to determine whether the event is a
+tranasaction start, commit or abort.
+
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0".  Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used.  Refer to the config terms
+section above.  perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0"  where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets.  While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used.  Refer to
+the config terms section above.  The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency.  perf script
+"event,synth" fields display information like this:
+	cbr:  cbr: 22 freq: 2189 MHz (200%)
+	mwait:  hints: 0x60 extensions: 0x1
+	pwre:  hw: 0 cstate: 2 sub-cstate: 0
+	exstop:  ip: 1
+	pwrx:  deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+	"cbr" includes the frequency and the percentage of maximum non-turbo
+	"mwait" shows mwait hints and extensions
+	"pwre" shows C-state transitions (to a C-state deeper than C0) and
+	whether	initiated by hardware
+	"exstop" indicates execution stopped and whether the IP was recorded
+	exactly,
+	"pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace.  Error events
+are quite important.  Users must know if what they are seeing is a complete
+picture or not.
+
+The "d" option will cause the creation of a file "intel_pt.log" containing all
+decoded packets and instructions.  Note that this option slows down the decoder
+and that the resulting file may be very large.
+
+In addition, the period of the "instructions" event can be specified. e.g.
+
+	--itrace=i10us
+
+sets the period to 10us i.e. one  instruction sample is synthesized for each 10
+microseconds of trace.  Alternatives to "us" are "ms" (milliseconds),
+"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
+
+"ms", "us" and "ns" are converted to TSC ticks.
+
+The timing information included with Intel PT does not give the time of every
+instruction.  Consequently, for the purpose of sampling, the decoder estimates
+the time since the last timing packet based on 1 tick per instruction.  The time
+on the sample is *not* adjusted and reflects the last known value of TSC.
+
+For Intel PT, the default period is 100us.
+
+Setting it to a zero period means "as often as possible".
+
+In the case of Intel PT that is the same as a period of 1 and a unit of
+'instructions' (i.e. --itrace=i1i).
+
+Also the call chain size (default 16, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+	--itrace=ig32
+	--itrace=xg32
+
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+       --itrace=il10
+       --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+	--itrace=i0nss1000000
+
+skips the first million instructions.
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel PT packets are displayed.  The packet decoder does not
+pay attention to PSB packets, but just decodes the bytes - so the packets seen
+by the actual decoder may not be identical in places where the data is corrupt.
+One example of that would be when the buffer-switching interrupt has been too
+slow, and the buffer has been filled completely.  In that case, the last packet
+in the buffer might be truncated and immediately followed by a PSB as the trace
+continues in the next buffer.
+
+To disable the display of Intel PT packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script, with the exception that the default is --itrace=igxe.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+	perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo.  It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5.  The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+	$ gcc-5 -O3 sort.c -o sort_optimized
+	$ ./sort_optimized 30000
+	Bubble sorting array of 30000 elements
+	2254 ms
+
+	$ cat ~/.perfconfig
+	[intel-pt]
+		mispred-all = on
+
+	$ perf record -e intel_pt//u ./sort 3000
+	Bubble sorting array of 3000 elements
+	58 ms
+	[ perf record: Woken up 2 times to write data ]
+	[ perf record: Captured and wrote 3.939 MB perf.data ]
+	$ perf inject -i perf.data -o inj --itrace=i100usle --strip
+	$ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+	$ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+	$ ./sort_autofdo 30000
+	Bubble sorting array of 30000 elements
+	2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.
diff --git a/Documentation/itrace.txt b/Documentation/itrace.txt
new file mode 100644
index 0000000..a3abe04
--- /dev/null
+++ b/Documentation/itrace.txt
@@ -0,0 +1,36 @@
+		i	synthesize instructions events
+		b	synthesize branches events
+		c	synthesize branches events (calls only)
+		r	synthesize branches events (returns only)
+		x	synthesize transactions events
+		w	synthesize ptwrite events
+		p	synthesize power events
+		e	synthesize error events
+		d	create a debug log
+		g	synthesize a call chain (use with i or x)
+		l	synthesize last branch entries (use with i or x)
+		s       skip initial number of events
+
+	The default is all events i.e. the same as --itrace=ibxwpe
+
+	In addition, the period (default 100000) for instructions events
+	can be specified in units of:
+
+		i	instructions
+		t	ticks
+		ms	milliseconds
+		us	microseconds
+		ns	nanoseconds (default)
+
+	Also the call chain size (default 16, max. 1024) for instructions or
+	transactions events can be specified.
+
+	Also the number of last branch entries (default 64, max. 1024) for
+	instructions or transactions events can be specified.
+
+	It is also possible to skip events generated (instructions, branches, transactions,
+	ptwrite, power) at the beginning. This is useful to ignore initialization code.
+
+	--itrace=i0nss1000000
+
+	skips the first million instructions.
diff --git a/Documentation/jit-interface.txt b/Documentation/jit-interface.txt
new file mode 100644
index 0000000..a8656f5
--- /dev/null
+++ b/Documentation/jit-interface.txt
@@ -0,0 +1,15 @@
+perf supports a simple JIT interface to resolve symbols for dynamic code generated
+by a JIT.
+
+The JIT has to write a /tmp/perf-%d.map  (%d = pid of process) file
+
+This is a text file.
+
+Each line has the following format, fields separated with spaces:
+
+START SIZE symbolname
+
+START and SIZE are hex numbers without 0x.
+symbolname is the rest of the line, so it could contain special characters.
+
+The ownership of the file has to match the process.
diff --git a/Documentation/jitdump-specification.txt b/Documentation/jitdump-specification.txt
new file mode 100644
index 0000000..4c62b07
--- /dev/null
+++ b/Documentation/jitdump-specification.txt
@@ -0,0 +1,170 @@
+JITDUMP specification version 2
+Last Revised: 09/15/2016
+Author: Stephane Eranian <eranian@gmail.com>
+
+--------------------------------------------------------
+| Revision  |    Date    | Description                 |
+--------------------------------------------------------
+|   1       | 09/07/2016 | Initial revision            |
+--------------------------------------------------------
+|   2       | 09/15/2016 | Add JIT_CODE_UNWINDING_INFO |
+--------------------------------------------------------
+
+
+I/ Introduction
+
+
+This document describes the jitdump file format. The file is generated by Just-In-time compiler runtimes to save meta-data information about the generated code, such as address, size, and name of generated functions, the native code generated, the source line information. The data may then be used by performance tools, such as Linux perf to generate function and assembly level profiles.
+
+The format is not specific to any particular programming language. It can be extended as need be.
+
+The format of the file is binary. It is self-describing in terms of endianness and is portable across multiple processor architectures.
+
+
+II/ Overview of the format
+
+
+The format requires only sequential accesses, i.e., append only mode. The file starts with a fixed size file header describing the version of the specification, the endianness.
+
+The header is followed by a series of records, each starting with a fixed size header describing the type of record and its size. It is, itself, followed by the payload for the record. Records can have a variable size even for a given type.
+
+Each entry in the file is timestamped. All timestamps must use the same clock source. The CLOCK_MONOTONIC clock source is recommended.
+
+
+III/ Jitdump file header format
+
+Each jitdump file starts with a fixed size header containing the following fields in order:
+
+
+* uint32_t magic     : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file
+* uint32_t version   : a 4-byte value representing the format version. It is currently set to 2
+* uint32_t total_size: size in bytes of file header
+* uint32_t elf_mach  : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
+* uint32_t pad1      : padding. Reserved for future use
+* uint32_t pid       : JIT runtime process identification (OS specific)
+* uint64_t timestamp : timestamp of when the file was created
+* uint64_t flags     : a bitmask of flags
+
+The flags currently defined are as follows:
+ * bit 0: JITDUMP_FLAGS_ARCH_TIMESTAMP : set if the jitdump file is using an architecture-specific timestamp clock source. For instance, on x86, one could use TSC directly
+
+IV/ Record header
+
+The file header is immediately followed by records. Each record starts with a fixed size header describing the record that follows.
+
+The record header is specified in order as follows:
+* uint32_t id        : a value identifying the record type (see below)
+* uint32_t total_size: the size in bytes of the record including the header.
+* uint64_t timestamp : a timestamp of when the record was created.
+
+The following record types are defined:
+ * Value 0 : JIT_CODE_LOAD      : record describing a jitted function
+ * Value 1 : JIT_CODE_MOVE      : record describing an already jitted function which is moved
+ * Value 2 : JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function
+ * Value 3 : JIT_CODE_CLOSE     : record marking the end of the jit runtime (optional)
+ * Value 4 : JIT_CODE_UNWINDING_INFO: record describing a function unwinding information
+
+ The payload of the record must immediately follow the record header without padding.
+
+V/ JIT_CODE_LOAD record
+
+
+  The record has the following fields following the fixed-size record header in order:
+  * uint32_t pid: OS process id of the runtime generating the jitted code
+  * uint32_t tid: OS thread identification of the runtime thread generating the jitted code
+  * uint64_t vma: virtual address of jitted code start
+  * uint64_t code_addr: code start address for the jitted code. By default vma = code_addr
+  * uint64_t code_size: size in bytes of the generated jitted code
+  * uint64_t code_index: unique identifier for the jitted code (see below)
+  * char[n]: function name in ASCII including the null termination
+  * native code: raw byte encoding of the jitted code
+
+  The record header total_size field is inclusive of all components:
+  * record header
+  * fixed-sized fields
+  * function name string, including termination
+  * native code length
+  * record specific variable data (e.g., array of data entries)
+
+The code_index is used to uniquely identify each jitted function. The index can be a monotonically increasing 64-bit value. Each time a function is jitted it gets a new number. This value is used in case the code for a function is moved and avoids having to issue another JIT_CODE_LOAD record.
+
+The format supports empty functions with no native code.
+
+
+VI/ JIT_CODE_MOVE record
+
+  The record type is optional.
+
+  The record has the following fields following the fixed-size record header in order:
+  * uint32_t pid          : OS process id of the runtime generating the jitted code
+  * uint32_t tid          : OS thread identification of the runtime thread generating the jitted code
+  * uint64_t vma          : new virtual address of jitted code start
+  * uint64_t old_code_addr: previous code address for the same function
+  * uint64_t new_code_addr: alternate new code started address for the jitted code. By default it should be equal to the vma address.
+  * uint64_t code_size    : size in bytes of the jitted code
+  * uint64_t code_index   : index referring to the JIT_CODE_LOAD code_index record of when the function was initially jitted
+
+
+The MOVE record can be used in case an already jitted function is simply moved by the runtime inside the code cache.
+
+The JIT_CODE_MOVE record cannot come before the JIT_CODE_LOAD record for the same function name. The function cannot have changed name, otherwise a new JIT_CODE_LOAD record must be emitted.
+
+The code size of the function cannot change.
+
+
+VII/ JIT_DEBUG_INFO record
+
+The record type is optional.
+
+The record contains source lines debug information, i.e., a way to map a code address back to a source line. This information may be used by the performance tool.
+
+The record has the following fields following the fixed-size record header in order:
+  * uint64_t code_addr: address of function for which the debug information is generated
+  * uint64_t nr_entry : number of debug entries for the function
+  * debug_entry[n]: array of nr_entry debug entries for the function
+
+The debug_entry describes the source line information. It is defined as follows in order:
+* uint64_t code_addr: address of function for which the debug information is generated
+* uint32_t line     : source file line number (starting at 1)
+* uint32_t discrim  : column discriminator, 0 is default
+* char name[n]      : source file name in ASCII, including null termination
+
+The debug_entry entries are saved in sequence but given that they have variable sizes due to the file name string, they cannot be indexed directly.
+They need to be walked sequentially. The next debug_entry is found at sizeof(debug_entry) + strlen(name) + 1.
+
+IMPORTANT:
+  The JIT_CODE_DEBUG for a given function must always be generated BEFORE the JIT_CODE_LOAD for the function. This facilitates greatly the parser for the jitdump file.
+
+
+VIII/ JIT_CODE_CLOSE record
+
+
+The record type is optional.
+
+The record is used as a marker for the end of the jitted runtime. It can be replaced by the end of the file.
+
+The JIT_CODE_CLOSE record does not have any specific fields, the record header contains all the information needed.
+
+
+IX/ JIT_CODE_UNWINDING_INFO
+
+
+The record type is optional.
+
+The record is used to describe the unwinding information for a jitted function.
+
+The record has the following fields following the fixed-size record header in order:
+
+uint64_t unwind_data_size   : the size in bytes of the unwinding data table at the end of the record
+uint64_t eh_frame_hdr_size  : the size in bytes of the DWARF EH Frame Header at the start of the unwinding data table at the end of the record
+uint64_t mapped_size        : the size of the unwinding data mapped in memory
+const char unwinding_data[n]: an array of unwinding data, consisting of the EH Frame Header, followed by the actual EH Frame
+
+
+The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
+
+
+The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
+
+
+NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information.
diff --git a/Documentation/manpage-1.72.xsl b/Documentation/manpage-1.72.xsl
new file mode 100644
index 0000000..b4d315c
--- /dev/null
+++ b/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+     needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-base.xsl b/Documentation/manpage-base.xsl
new file mode 100644
index 0000000..a264fa6
--- /dev/null
+++ b/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+     git.docbook.backslash and git.docbook.dot params
+     must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB(',
+			      substring-after(@id,'-'),')',
+			      $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>sp&#10;</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+	<xsl:value-of select="concat(
+			      $git.docbook.backslash,'fB',
+			      substring-after(@arearefs,'-'),
+			      '. ',$git.docbook.backslash,'fR')"/>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.dot"/>
+	<xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-bold-literal.xsl b/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 0000000..608eb5d
--- /dev/null
+++ b/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+     special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+     this makes literal text easier to distinguish in manpages
+     viewed on a tty -->
+<xsl:template match="literal">
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fB</xsl:text>
+	<xsl:apply-templates/>
+	<xsl:value-of select="$git.docbook.backslash"/>
+	<xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-normal.xsl b/Documentation/manpage-normal.xsl
new file mode 100644
index 0000000..a48f5b1
--- /dev/null
+++ b/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot"	>.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/Documentation/manpage-suppress-sp.xsl b/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 0000000..a63c763
--- /dev/null
+++ b/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+     special settings for manpages rendered from asciidoc+docbook
+     handles erroneous, inline .sp in manpage output of some
+     versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+     that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+  <xsl:variable name="content">
+    <xsl:apply-templates/>
+  </xsl:variable>
+  <xsl:value-of select="normalize-space($content)"/>
+  <xsl:if test="not(ancestor::authorblurb) and
+                not(ancestor::personblurb)">
+    <xsl:text>&#10;&#10;</xsl:text>
+  </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/Documentation/perf-annotate.txt b/Documentation/perf-annotate.txt
new file mode 100644
index 0000000..749cc60
--- /dev/null
+++ b/Documentation/perf-annotate.txt
@@ -0,0 +1,123 @@
+perf-annotate(1)
+================
+
+NAME
+----
+perf-annotate - Read perf.data (created by perf record) and display annotated code
+
+SYNOPSIS
+--------
+[verse]
+'perf annotate' [-i <file> | --input=file] [symbol_name]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays an annotated version of the
+code. If the object file has debug symbols then the source code will be
+displayed alongside assembly code.
+
+If there is no debug info in the object, then annotated assembly is displayed.
+
+OPTIONS
+-------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-d::
+--dsos=<dso[,dso...]>::
+        Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+        Symbol to annotate.
+
+-f::
+--force::
+        Don't do ownership validation.
+
+-v::
+--verbose::
+        Be more verbose. (Show symbol address, etc)
+
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
+-n::
+--show-nr-samples::
+	Show the number of samples for each symbol
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname.
+
+--ignore-vmlinux::
+	Ignore vmlinux files.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+        Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+        Don't shorten the displayed pathnames.
+
+--stdio:: Use the stdio interface.
+
+--stdio2:: Use the stdio2 interface, non-interactive, uses the TUI formatting.
+
+--stdio-color=<mode>::
+	'always', 'never' or 'auto', allowing configuring color output
+	via the command line, in addition to via "color.ui" .perfconfig.
+	Use '--stdio-color always' to generate color even when redirecting
+	to a pipe or file. Using just '--stdio-color' is equivalent to
+	using 'always'.
+
+--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
+	present, as when piping to other commands, the stdio interface is
+	used. This interfaces starts by centering on the line with more
+	samples, TAB/UNTAB cycles through the lines with more samples.
+
+--gtk:: Use the GTK interface.
+
+-C::
+--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
+--asm-raw::
+	Show raw instruction encoding of assembly instructions.
+
+--show-total-period:: Show a column with the sum of periods.
+
+--source::
+	Interleave source code with assembly code. Enabled by default,
+	disable with --no-source.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--objdump=<path>::
+        Path to objdump binary.
+
+--skip-missing::
+	Skip symbols that cannot be annotated.
+
+--group::
+	Show event group information together
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/Documentation/perf-archive.txt b/Documentation/perf-archive.txt
new file mode 100644
index 0000000..ac6ecbb
--- /dev/null
+++ b/Documentation/perf-archive.txt
@@ -0,0 +1,22 @@
+perf-archive(1)
+===============
+
+NAME
+----
+perf-archive - Create archive with object files with build-ids found in perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf archive' [file]
+
+DESCRIPTION
+-----------
+This command runs perf-buildid-list --with-hits, and collects the files with the
+buildids found so that analysis of perf.data contents can be possible on another
+machine.
+
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]
diff --git a/Documentation/perf-bench.txt b/Documentation/perf-bench.txt
new file mode 100644
index 0000000..34750fc
--- /dev/null
+++ b/Documentation/perf-bench.txt
@@ -0,0 +1,209 @@
+perf-bench(1)
+=============
+
+NAME
+----
+perf-bench - General framework for benchmark suites
+
+SYNOPSIS
+--------
+[verse]
+'perf bench' [<common options>] <subsystem> <suite> [<options>]
+
+DESCRIPTION
+-----------
+This 'perf bench' command is a general framework for benchmark suites.
+
+COMMON OPTIONS
+--------------
+-r::
+--repeat=::
+Specify amount of times to repeat the run (default 10).
+
+-f::
+--format=::
+Specify format style.
+Current available format styles are:
+
+'default'::
+Default style. This is mainly for human reading.
+---------------------
+% perf bench sched pipe                      # with no style specified
+(executing 1000000 pipe operations between two tasks)
+        Total time:5.855 sec
+                5.855061 usecs/op
+		170792 ops/sec
+---------------------
+
+'simple'::
+This simple style is friendly for automated
+processing by scripts.
+---------------------
+% perf bench --format=simple sched pipe      # specified simple
+5.988
+---------------------
+
+SUBSYSTEM
+---------
+
+'sched'::
+	Scheduler and IPC mechanisms.
+
+'mem'::
+	Memory access performance.
+
+'numa'::
+	NUMA scheduling and MM benchmarks.
+
+'futex'::
+	Futex stressing benchmarks.
+
+'all'::
+	All benchmark subsystems.
+
+SUITES FOR 'sched'
+~~~~~~~~~~~~~~~~~~
+*messaging*::
+Suite for evaluating performance of scheduler and IPC mechanisms.
+Based on hackbench by Rusty Russell.
+
+Options of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+-p::
+--pipe::
+Use pipe() instead of socketpair()
+
+-t::
+--thread::
+Be multi thread instead of multi process
+
+-g::
+--group=::
+Specify number of groups
+
+-l::
+--nr_loops=::
+Specify number of loops
+
+Example of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched messaging                 # run with default
+options (20 sender and receiver processes per group)
+(10 groups == 400 processes run)
+
+      Total time:0.308 sec
+
+% perf bench sched messaging -t -g 20        # be multi-thread, with 20 groups
+(20 sender and receiver threads per group)
+(20 groups == 800 threads run)
+
+      Total time:0.582 sec
+---------------------
+
+*pipe*::
+Suite for pipe() system call.
+Based on pipe-test-1m.c by Ingo Molnar.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-l::
+--loop=::
+Specify number of loops.
+
+Example of *pipe*
+^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched pipe
+(executing 1000000 pipe operations between two tasks)
+
+        Total time:8.091 sec
+                8.091833 usecs/op
+                123581 ops/sec
+
+% perf bench sched pipe -l 1000              # loop 1000
+(executing 1000 pipe operations between two tasks)
+
+        Total time:0.016 sec
+                16.948000 usecs/op
+                59004 ops/sec
+---------------------
+
+SUITES FOR 'mem'
+~~~~~~~~~~~~~~~~
+*memcpy*::
+Suite for evaluating performance of simple memory copy in various ways.
+
+Options of *memcpy*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--size::
+Specify size of memory to copy (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-f::
+--function::
+Specify function to copy (default: default).
+Available functions are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
+
+-l::
+--nr_loops::
+Repeat memcpy invocation this number of times.
+
+-c::
+--cycles::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+*memset*::
+Suite for evaluating performance of simple memory set in various ways.
+
+Options of *memset*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--size::
+Specify size of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-f::
+--function::
+Specify function to set (default: default).
+Available functions are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
+
+-l::
+--nr_loops::
+Repeat memset invocation this number of times.
+
+-c::
+--cycles::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+SUITES FOR 'numa'
+~~~~~~~~~~~~~~~~~
+*mem*::
+Suite for evaluating NUMA workloads.
+
+SUITES FOR 'futex'
+~~~~~~~~~~~~~~~~~~
+*hash*::
+Suite for evaluating hash tables.
+
+*wake*::
+Suite for evaluating wake calls.
+
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
+*requeue*::
+Suite for evaluating requeue calls.
+
+*lock-pi*::
+Suite for evaluating futex lock_pi calls.
+
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/Documentation/perf-buildid-cache.txt b/Documentation/perf-buildid-cache.txt
new file mode 100644
index 0000000..73c2650
--- /dev/null
+++ b/Documentation/perf-buildid-cache.txt
@@ -0,0 +1,74 @@
+perf-buildid-cache(1)
+=====================
+
+NAME
+----
+perf-buildid-cache - Manage build-id cache.
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-cache <options>'
+
+DESCRIPTION
+-----------
+This command manages the build-id cache. It can add, remove, update and purge
+files to/from the cache. In the future it should as well set upper limits for
+the space used by the cache, etc.
+This also scans the target binary for SDT (Statically Defined Tracing) and
+record it along with the buildid-cache, which will be used by perf-probe.
+For more details, see linkperf:perf-probe[1].
+
+OPTIONS
+-------
+-a::
+--add=::
+        Add specified file to the cache.
+-f::
+--force::
+	Don't complain, do it.
+-k::
+--kcore::
+        Add specified kcore file to the cache. For the current host that is
+        /proc/kcore which requires root permissions to read. Be aware that
+        running 'perf buildid-cache' as root may update root's build-id cache
+        not the user's. Use the -v option to see where the file is created.
+        Note that the copied file contains only code sections not the whole core
+        image. Note also that files "kallsyms" and "modules" must also be in the
+        same directory and are also copied.  All 3 files are created with read
+        permissions for root only. kcore will not be added if there is already a
+        kcore in the cache (with the same build-id) that has the same modules at
+        the same addresses. Use the -v option to see if a copy of kcore is
+        actually made.
+-r::
+--remove=::
+        Remove a cached binary which has same build-id of specified file
+        from the cache.
+-p::
+--purge=::
+        Purge all cached binaries including older caches which have specified
+	path from the cache.
+-M::
+--missing=::
+	List missing build ids in the cache for the specified file.
+-u::
+--update=::
+	Update specified file of the cache. Note that this doesn't remove
+	older entires since those may be still needed for annotating old
+	(or remote) perf.data. Only if there is already a cache which has
+	exactly same build-id, that is replaced by new one. It can be used
+	to update kallsyms and kernel dso to vmlinux in order to support
+	annotation.
+
+-v::
+--verbose::
+	Be more verbose.
+
+--target-ns=PID:
+	Obtain mount namespace information from the target pid.  This is
+	used when creating a uprobe for a process that resides in a
+	different mount namespace from the perf(1) utility.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
diff --git a/Documentation/perf-buildid-list.txt b/Documentation/perf-buildid-list.txt
new file mode 100644
index 0000000..25c52ef
--- /dev/null
+++ b/Documentation/perf-buildid-list.txt
@@ -0,0 +1,43 @@
+perf-buildid-list(1)
+====================
+
+NAME
+----
+perf-buildid-list - List the buildids in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-list <options>'
+
+DESCRIPTION
+-----------
+This command displays the buildids found in a perf.data file, so that other
+tools can be used to fetch packages with matching symbol tables for use by
+perf report.
+
+It can also be used to show the build id of the running kernel or in an ELF
+file using -i/--input.
+
+OPTIONS
+-------
+-H::
+--with-hits::
+        Show only DSOs with hits.
+-i::
+--input=::
+        Input file name. (default: perf.data unless stdin is a fifo)
+-f::
+--force::
+	Don't do ownership validation.
+-k::
+--kernel::
+	Show running kernel build id.
+-v::
+--verbose::
+	Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-top[1],
+linkperf:perf-report[1]
diff --git a/Documentation/perf-c2c.txt b/Documentation/perf-c2c.txt
new file mode 100644
index 0000000..095aebd
--- /dev/null
+++ b/Documentation/perf-c2c.txt
@@ -0,0 +1,290 @@
+perf-c2c(1)
+===========
+
+NAME
+----
+perf-c2c - Shared Data C2C/HITM Analyzer.
+
+SYNOPSIS
+--------
+[verse]
+'perf c2c record' [<options>] <command>
+'perf c2c record' [<options>] -- [<record command options>] <command>
+'perf c2c report' [<options>]
+
+DESCRIPTION
+-----------
+C2C stands for Cache To Cache.
+
+The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
+you to track down the cacheline contentions.
+
+The tool is based on x86's load latency and precise store facility events
+provided by Intel CPUs. These events provide:
+  - memory address of the access
+  - type of the access (load and store details)
+  - latency (in cycles) of the load access
+
+The c2c tool provide means to record this data and report back access details
+for cachelines with highest contention - highest number of HITM accesses.
+
+The basic workflow with this tool follows the standard record/report phase.
+User uses the record command to record events data and report command to
+display it.
+
+
+RECORD OPTIONS
+--------------
+-e::
+--event=::
+	Select the PMU event. Use 'perf mem record -e list'
+	to list available events.
+
+-v::
+--verbose::
+	Be more verbose (show counter open errors, etc).
+
+-l::
+--ldlat::
+	Configure mem-loads latency.
+
+-k::
+--all-kernel::
+	Configure all used events to run in kernel space.
+
+-u::
+--all-user::
+	Configure all used events to run in user space.
+
+REPORT OPTIONS
+--------------
+-k::
+--vmlinux=<file>::
+	vmlinux pathname
+
+-v::
+--verbose::
+	Be more verbose (show counter open errors, etc).
+
+-i::
+--input::
+	Specify the input file to process.
+
+-N::
+--node-info::
+	Show extra node info in report (see NODE INFO section)
+
+-c::
+--coalesce::
+	Specify sorting fields for single cacheline display.
+	Following fields are available: tid,pid,iaddr,dso
+	(see COALESCE)
+
+-g::
+--call-graph::
+	Setup callchains parameters.
+	Please refer to perf-report man page for details.
+
+--stdio::
+	Force the stdio output (see STDIO OUTPUT)
+
+--stats::
+	Display only statistic tables and force stdio mode.
+
+--full-symbols::
+	Display full length of symbols.
+
+--no-source::
+	Do not display Source:Line column.
+
+--show-all::
+	Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
+
+-f::
+--force::
+	Don't do ownership validation.
+
+-d::
+--display::
+	Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+
+C2C RECORD
+----------
+The perf c2c record command setup options related to HITM cacheline analysis
+and calls standard perf record command.
+
+Following perf record options are configured by default:
+(check perf record man page for details)
+
+  -W,-d,--phys-data,--sample-cpu
+
+Unless specified otherwise with '-e' option, following events are monitored by
+default:
+
+  cpu/mem-loads,ldlat=30/P
+  cpu/mem-stores/P
+
+User can pass any 'perf record' option behind '--' mark, like (to enable
+callchains and system wide monitoring):
+
+  $ perf c2c record -- -g -a
+
+Please check RECORD OPTIONS section for specific c2c record options.
+
+C2C REPORT
+----------
+The perf c2c report command displays shared data analysis.  It comes in two
+display modes: stdio and tui (default).
+
+The report command workflow is following:
+  - sort all the data based on the cacheline address
+  - store access details for each cacheline
+  - sort all cachelines based on user settings
+  - display data
+
+In general perf report output consist of 2 basic views:
+  1) most expensive cachelines list
+  2) offsets details for each cacheline
+
+For each cacheline in the 1) list we display following data:
+(Both stdio and TUI modes follow the same fields output)
+
+  Index
+  - zero based index to identify the cacheline
+
+  Cacheline
+  - cacheline address (hex number)
+
+  Total records
+  - sum of all cachelines accesses
+
+  Rmt/Lcl Hitm
+  - cacheline percentage of all Remote/Local HITM accesses
+
+  LLC Load Hitm - Total, Lcl, Rmt
+  - count of Total/Local/Remote load HITMs
+
+  Store Reference - Total, L1Hit, L1Miss
+    Total - all store accesses
+    L1Hit - store accesses that hit L1
+    L1Hit - store accesses that missed L1
+
+  Load Dram
+  - count of local and remote DRAM accesses
+
+  LLC Ld Miss
+  - count of all accesses that missed LLC
+
+  Total Loads
+  - sum of all load accesses
+
+  Core Load Hit - FB, L1, L2
+  - count of load hits in FB (Fill Buffer), L1 and L2 cache
+
+  LLC Load Hit - Llc, Rmt
+  - count of LLC and Remote load hits
+
+For each offset in the 2) list we display following data:
+
+  HITM - Rmt, Lcl
+  - % of Remote/Local HITM accesses for given offset within cacheline
+
+  Store Refs - L1 Hit, L1 Miss
+  - % of store accesses that hit/missed L1 for given offset within cacheline
+
+  Data address - Offset
+  - offset address
+
+  Pid
+  - pid of the process responsible for the accesses
+
+  Tid
+  - tid of the process responsible for the accesses
+
+  Code address
+  - code address responsible for the accesses
+
+  cycles - rmt hitm, lcl hitm, load
+    - sum of cycles for given accesses - Remote/Local HITM and generic load
+
+  cpu cnt
+    - number of cpus that participated on the access
+
+  Symbol
+    - code symbol related to the 'Code address' value
+
+  Shared Object
+    - shared object name related to the 'Code address' value
+
+  Source:Line
+    - source information related to the 'Code address' value
+
+  Node
+    - nodes participating on the access (see NODE INFO section)
+
+NODE INFO
+---------
+The 'Node' field displays nodes that accesses given cacheline
+offset. Its output comes in 3 flavors:
+  - node IDs separated by ','
+  - node IDs with stats for each ID, in following format:
+      Node{cpus %hitms %stores}
+  - node IDs with list of affected CPUs in following format:
+      Node{cpu list}
+
+User can switch between above flavors with -N option or
+use 'n' key to interactively switch in TUI mode.
+
+COALESCE
+--------
+User can specify how to sort offsets for cacheline.
+
+Following fields are available and governs the final
+output fields set for caheline offsets output:
+
+  tid   - coalesced by process TIDs
+  pid   - coalesced by process PIDs
+  iaddr - coalesced by code address, following fields are displayed:
+             Code address, Code symbol, Shared Object, Source line
+  dso   - coalesced by shared object
+
+By default the coalescing is setup with 'pid,iaddr'.
+
+STDIO OUTPUT
+------------
+The stdio output displays data on standard output.
+
+Following tables are displayed:
+  Trace Event Information
+  - overall statistics of memory accesses
+
+  Global Shared Cache Line Event Information
+  - overall statistics on shared cachelines
+
+  Shared Data Cache Line Table
+  - list of most expensive cachelines
+
+  Shared Cache Line Distribution Pareto
+  - list of all accessed offsets for each cacheline
+
+TUI OUTPUT
+----------
+The TUI output provides interactive interface to navigate
+through cachelines list and to display offset details.
+
+For details please refer to the help window by pressing '?' key.
+
+CREDITS
+-------
+Although Don Zickus, Dick Fowles and Joe Mario worked together
+to get this implemented, we got lots of early help from Arnaldo
+Carvalho de Melo, Stephane Eranian, Jiri Olsa and Andi Kleen.
+
+C2C BLOG
+--------
+Check Joe's blog on c2c tool for detailed use case explanation:
+  https://joemario.github.io/blog/2016/09/01/c2c-blog/
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-mem[1]
diff --git a/Documentation/perf-config.txt b/Documentation/perf-config.txt
new file mode 100644
index 0000000..32f4a89
--- /dev/null
+++ b/Documentation/perf-config.txt
@@ -0,0 +1,520 @@
+perf-config(1)
+==============
+
+NAME
+----
+perf-config - Get and set variables in a configuration file.
+
+SYNOPSIS
+--------
+[verse]
+'perf config' [<file-option>] [section.name[=value] ...]
+or
+'perf config' [<file-option>] -l | --list
+
+DESCRIPTION
+-----------
+You can manage variables in a configuration file with this command.
+
+OPTIONS
+-------
+
+-l::
+--list::
+	Show current config variables, name and value, for all sections.
+
+--user::
+	For writing and reading options: write to user
+	'$HOME/.perfconfig' file or read it.
+
+--system::
+	For writing and reading options: write to system-wide
+	'$(sysconfdir)/perfconfig' or read it.
+
+CONFIGURATION FILE
+------------------
+
+The perf configuration file contains many variables to change various
+aspects of each of its tools, including output, disk usage, etc.
+The '$HOME/.perfconfig' file is used to store a per-user configuration.
+The file '$(sysconfdir)/perfconfig' can be used to
+store a system-wide default configuration.
+
+When reading or writing, the values are read from the system and user
+configuration files by default, and options '--system' and '--user'
+can be used to tell the command to read from or write to only that location.
+
+Syntax
+~~~~~~
+
+The file consist of sections. A section starts with its name
+surrounded by square brackets and continues till the next section
+begins. Each variable must be in a section, and have the form
+'name = value', for example:
+
+	[section]
+		name1 = value1
+		name2 = value2
+
+Section names are case sensitive and can contain any characters except
+newline (double quote `"` and backslash have to be escaped as `\"` and `\\`,
+respectively). Section headers can't span multiple lines.
+
+Example
+~~~~~~~
+
+Given a $HOME/.perfconfig like this:
+
+#
+# This is the config file, and
+# a '#' and ';' character indicates a comment
+#
+
+	[colors]
+		# Color variables
+		top = red, default
+		medium = green, default
+		normal = lightgray, default
+		selected = white, lightgray
+		jump_arrows = blue, default
+		addr = magenta, default
+		root = white, blue
+
+	[tui]
+		# Defaults if linked with libslang
+		report = on
+		annotate = on
+		top = on
+
+	[buildid]
+		# Default, disable using /dev/null
+		dir = ~/.debug
+
+	[annotate]
+		# Defaults
+		hide_src_code = false
+		use_offset = true
+		jump_arrows = true
+		show_nr_jumps = false
+
+	[help]
+		# Format can be man, info, web or html
+		format = man
+		autocorrect = 0
+
+	[ui]
+		show-headers = true
+
+	[call-graph]
+		# fp (framepointer), dwarf
+		record-mode = fp
+		print-type = graph
+		order = caller
+		sort-key = function
+
+	[report]
+		# Defaults
+		sort-order = comm,dso,symbol
+		percent-limit = 0
+		queue-size = 0
+		children = true
+		group = true
+
+You can hide source code of annotate feature setting the config to false with
+
+	% perf config annotate.hide_src_code=true
+
+If you want to add or modify several config items, you can do like
+
+	% perf config ui.show-headers=false kmem.default=slab
+
+To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
+
+	% perf config --user report sort-order=srcline
+
+To change colors of selected line to other foreground and background colors
+in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+	% perf config --system colors.selected=yellow,green
+
+To query the record mode of call graph, do
+
+	% perf config call-graph.record-mode
+
+If you want to know multiple config key/value pairs, you can do like
+
+	% perf config report.queue-size call-graph.order report.children
+
+To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do
+
+	% perf config --user call-graph.sort-order
+
+To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+	% perf config --system buildid.dir
+
+Variables
+~~~~~~~~~
+
+colors.*::
+	The variables for customizing the colors used in the output for the
+	'report', 'top' and 'annotate' in the TUI. They should specify the
+	foreground and background colors, separated by a comma, for example:
+
+		medium = green, lightgray
+
+	If you want to use the color configured for you terminal, just leave it
+	as 'default', for example:
+
+		medium = default, lightgray
+
+	Available colors:
+	red, yellow, green, cyan, gray, black, blue,
+	white, default, magenta, lightgray
+
+	colors.top::
+		'top' means a overhead percentage which is more than 5%.
+		And values of this variable specify percentage colors.
+		Basic key values are foreground-color 'red' and
+		background-color 'default'.
+	colors.medium::
+		'medium' means a overhead percentage which has more than 0.5%.
+		Default values are 'green' and 'default'.
+	colors.normal::
+		'normal' means the rest of overhead percentages
+		except 'top', 'medium', 'selected'.
+		Default values are 'lightgray' and 'default'.
+	colors.selected::
+		This selects the colors for the current entry in a list of entries
+		from sub-commands (top, report, annotate).
+		Default values are 'black' and 'lightgray'.
+	colors.jump_arrows::
+		Colors for jump arrows on assembly code listings
+		such as 'jns', 'jmp', 'jane', etc.
+		Default values are 'blue', 'default'.
+	colors.addr::
+		This selects colors for addresses from 'annotate'.
+		Default values are 'magenta', 'default'.
+	colors.root::
+		Colors for headers in the output of a sub-commands (top, report).
+		Default values are 'white', 'blue'.
+
+tui.*, gtk.*::
+	Subcommands that can be configured here are 'top', 'report' and 'annotate'.
+	These values are booleans, for example:
+
+	[tui]
+		top = true
+
+	will make the TUI be the default for the 'top' subcommand. Those will be
+	available if the required libs were detected at tool build time.
+
+buildid.*::
+	buildid.dir::
+		Each executable and shared library in modern distributions comes with a
+		content based identifier that, if available, will be inserted in a
+		'perf.data' file header to, at analysis time find what is needed to do
+		symbol resolution, code annotation, etc.
+
+		The recording tools also stores a hard link or copy in a per-user
+		directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
+		and /proc/kcore files to be used at analysis time.
+
+		The buildid.dir variable can be used to either change this directory
+		cache location, or to disable it altogether. If you want to disable it,
+		set buildid.dir to /dev/null. The default is $HOME/.debug
+
+annotate.*::
+	These options work only for TUI.
+	These are in control of addresses, jump function, source code
+	in lines of assembly code from a specific program.
+
+	annotate.hide_src_code::
+		If a program which is analyzed has source code,
+		this option lets 'annotate' print a list of assembly code with the source code.
+		For example, let's see a part of a program. There're four lines.
+		If this option is 'true', they can be printed
+		without source code from a program as below.
+
+		│        push   %rbp
+		│        mov    %rsp,%rbp
+		│        sub    $0x10,%rsp
+		│        mov    (%rdi),%rdx
+
+		But if this option is 'false', source code of the part
+		can be also printed as below. Default is 'false'.
+
+		│      struct rb_node *rb_next(const struct rb_node *node)
+		│      {
+		│        push   %rbp
+		│        mov    %rsp,%rbp
+		│        sub    $0x10,%rsp
+		│              struct rb_node *parent;
+		│
+		│              if (RB_EMPTY_NODE(node))
+		│        mov    (%rdi),%rdx
+		│              return n;
+
+        annotate.use_offset::
+		Basing on a first address of a loaded function, offset can be used.
+		Instead of using original addresses of assembly code,
+		addresses subtracted from a base address can be printed.
+		Let's illustrate an example.
+		If a base address is 0XFFFFFFFF81624d50 as below,
+
+		ffffffff81624d50 <load0>
+
+		an address on assembly code has a specific absolute address as below
+
+		ffffffff816250b8:│  mov    0x8(%r14),%rdi
+
+		but if use_offset is 'true', an address subtracted from a base address is printed.
+		Default is true. This option is only applied to TUI.
+
+		             368:│  mov    0x8(%r14),%rdi
+
+	annotate.jump_arrows::
+		There can be jump instruction among assembly code.
+		Depending on a boolean value of jump_arrows,
+		arrows can be printed or not which represent
+		where do the instruction jump into as below.
+
+		│     ┌──jmp    1333
+		│     │  xchg   %ax,%ax
+		│1330:│  mov    %r15,%r10
+		│1333:└─→cmp    %r15,%r14
+
+		If jump_arrow is 'false', the arrows isn't printed as below.
+		Default is 'false'.
+
+		│      ↓ jmp    1333
+		│        xchg   %ax,%ax
+		│1330:   mov    %r15,%r10
+		│1333:   cmp    %r15,%r14
+
+        annotate.show_linenr::
+		When showing source code if this option is 'true',
+		line numbers are printed as below.
+
+		│1628         if (type & PERF_SAMPLE_IDENTIFIER) {
+		│     ↓ jne    508
+		│1628                 data->id = *array;
+		│1629                 array++;
+		│1630         }
+
+		However if this option is 'false', they aren't printed as below.
+		Default is 'false'.
+
+		│             if (type & PERF_SAMPLE_IDENTIFIER) {
+		│     ↓ jne    508
+		│                     data->id = *array;
+		│                     array++;
+		│             }
+
+        annotate.show_nr_jumps::
+		Let's see a part of assembly code.
+
+		│1382:   movb   $0x1,-0x270(%rbp)
+
+		If use this, the number of branches jumping to that address can be printed as below.
+		Default is 'false'.
+
+		│1 1382:   movb   $0x1,-0x270(%rbp)
+
+        annotate.show_total_period::
+		To compare two records on an instruction base, with this option
+		provided, display total number of samples that belong to a line
+		in assembly code. If this option is 'true', total periods are printed
+		instead of percent values as below.
+
+		  302 │      mov    %eax,%eax
+
+		But if this option is 'false', percent values for overhead are printed i.e.
+		Default is 'false'.
+
+		99.93 │      mov    %eax,%eax
+
+	annotate.offset_level::
+		Default is '1', meaning just jump targets will have offsets show right beside
+		the instruction. When set to '2' 'call' instructions will also have its offsets
+		shown, 3 or higher will show offsets for all instructions.
+
+hist.*::
+	hist.percentage::
+		This option control the way to calculate overhead of filtered entries -
+		that means the value of this option is effective only if there's a
+		filter (by comm, dso or symbol name). Suppose a following example:
+
+		       Overhead  Symbols
+		       ........  .......
+		        33.33%     foo
+		        33.33%     bar
+		        33.33%     baz
+
+	       This is an original overhead and we'll filter out the first 'foo'
+	       entry. The value of 'relative' would increase the overhead of 'bar'
+	       and 'baz' to 50.00% for each, while 'absolute' would show their
+	       current overhead (33.33%).
+
+ui.*::
+	ui.show-headers::
+		This option controls display of column headers (like 'Overhead' and 'Symbol')
+		in 'report' and 'top'. If this option is false, they are hidden.
+		This option is only applied to TUI.
+
+call-graph.*::
+	When sub-commands 'top' and 'report' work with -g/—-children
+	there're options in control of call-graph.
+
+	call-graph.record-mode::
+		The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
+		The value of 'dwarf' is effective only if perf detect needed library
+		(libunwind or a recent version of libdw).
+		'lbr' only work for cpus that support it.
+
+	call-graph.dump-size::
+		The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
+		When using dwarf into record-mode, the default size will be used if omitted.
+
+	call-graph.print-type::
+		The print-types can be graph (graph absolute), fractal (graph relative),
+		flat and folded. This option controls a way to show overhead for each callchain
+		entry. Suppose a following example.
+
+                Overhead  Symbols
+                ........  .......
+                  40.00%  foo
+                          |
+                          ---foo
+                             |
+                             |--50.00%--bar
+                             |          main
+                             |
+                              --50.00%--baz
+                                        main
+
+		This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
+		half and half so 'fractal' shows 50.00% for each
+		(meaning that it assumes 100% total overhead of 'foo').
+
+		The 'graph' uses absolute overhead value of 'foo' as total so each of
+		'bar' and 'baz' callchain will have 20.00% of overhead.
+		If 'flat' is used, single column and linear exposure of call chains.
+		'folded' mean call chains are displayed in a line, separated by semicolons.
+
+	call-graph.order::
+		This option controls print order of callchains. The default is
+		'callee' which means callee is printed at top and then followed by its
+		caller and so on. The 'caller' prints it in reverse order.
+
+		If this option is not set and report.children or top.children is
+		set to true (or the equivalent command line option is given),
+		the default value of this option is changed to 'caller' for the
+		execution of 'perf report' or 'perf top'. Other commands will
+		still default to 'callee'.
+
+	call-graph.sort-key::
+		The callchains are merged if they contain same information.
+		The sort-key option determines a way to compare the callchains.
+		A value of 'sort-key' can be 'function' or 'address'.
+		The default is 'function'.
+
+	call-graph.threshold::
+		When there're many callchains it'd print tons of lines. So perf omits
+		small callchains under a certain overhead (threshold) and this option
+		control the threshold. Default is 0.5 (%). The overhead is calculated
+		by value depends on call-graph.print-type.
+
+	call-graph.print-limit::
+		This is a maximum number of lines of callchain printed for a single
+		histogram entry. Default is 0 which means no limitation.
+
+report.*::
+	report.sort_order::
+		Allows changing the default sort order from "comm,dso,symbol" to
+		some other default, for instance "sym,dso" may be more fitting for
+		kernel developers.
+	report.percent-limit::
+		This one is mostly the same as call-graph.threshold but works for
+		histogram entries. Entries having an overhead lower than this
+		percentage will not be printed. Default is '0'. If percent-limit
+		is '10', only entries which have more than 10% of overhead will be
+		printed.
+
+	report.queue-size::
+		This option sets up the maximum allocation size of the internal
+		event queue for ordering events. Default is 0, meaning no limit.
+
+	report.children::
+		'Children' means functions called from another function.
+		If this option is true, 'perf report' cumulates callchains of children
+		and show (accumulated) total overhead as well as 'Self' overhead.
+		Please refer to the 'perf report' manual. The default is 'true'.
+
+	report.group::
+		This option is to show event group information together.
+		Example output with this turned on, notice that there is one column
+		per event in the group, ref-cycles and cycles:
+
+		# group: {ref-cycles,cycles}
+		# ========
+		#
+		# Samples: 7K of event 'anon group { ref-cycles, cycles }'
+		# Event count (approx.): 6876107743
+		#
+		#         Overhead  Command      Shared Object               Symbol
+		# ................  .......  .................  ...................
+		#
+		    99.84%  99.76%  noploop  noploop            [.] main
+		     0.07%   0.00%  noploop  ld-2.15.so         [.] strcmp
+		     0.03%   0.00%  noploop  [kernel.kallsyms]  [k] timerqueue_del
+
+top.*::
+	top.children::
+		Same as 'report.children'. So if it is enabled, the output of 'top'
+		command will have 'Children' overhead column as well as 'Self' overhead
+		column by default.
+		The default is 'true'.
+
+man.*::
+	man.viewer::
+		This option can assign a tool to view manual pages when 'help'
+		subcommand was invoked. Supported tools are 'man', 'woman'
+		(with emacs client) and 'konqueror'. Default is 'man'.
+
+		New man viewer tool can be also added using 'man.<tool>.cmd'
+		or use different path using 'man.<tool>.path' config option.
+
+pager.*::
+	pager.<subcommand>::
+		When the subcommand is run on stdio, determine whether it uses
+		pager or not based on this value. Default is 'unspecified'.
+
+kmem.*::
+	kmem.default::
+		This option decides which allocator is to be analyzed if neither
+		'--slab' nor '--page' option is used. Default is 'slab'.
+
+record.*::
+	record.build-id::
+		This option can be 'cache', 'no-cache' or 'skip'.
+		'cache' is to post-process data and save/update the binaries into
+		the build-id cache (in ~/.debug). This is the default.
+		But if this option is 'no-cache', it will not update the build-id cache.
+		'skip' skips post-processing and does not update the cache.
+
+diff.*::
+	diff.order::
+		This option sets the number of columns to sort the result.
+		The default is 0, which means sorting by baseline.
+		Setting it to 1 will sort the result by delta (or other
+		compute method selected).
+
+	diff.compute::
+		This options sets the method for computing the diff result.
+		Possible values are 'delta', 'delta-abs', 'ratio' and
+		'wdiff'.  Default is 'delta'.
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/Documentation/perf-data.txt b/Documentation/perf-data.txt
new file mode 100644
index 0000000..c871807
--- /dev/null
+++ b/Documentation/perf-data.txt
@@ -0,0 +1,48 @@
+perf-data(1)
+============
+
+NAME
+----
+perf-data - Data file related processing
+
+SYNOPSIS
+--------
+[verse]
+'perf data' [<common options>] <command> [<options>]",
+
+DESCRIPTION
+-----------
+Data file related processing.
+
+COMMANDS
+--------
+convert::
+	Converts perf data file into another format (only CTF [1] format is support by now).
+	It's possible to set data-convert debug variable to get debug messages from conversion,
+	like:
+	  perf --debug data-convert data convert ...
+
+OPTIONS for 'convert'
+---------------------
+--to-ctf::
+	Triggers the CTF conversion, specify the path of CTF data directory.
+
+-i::
+	Specify input perf data file path.
+
+-f::
+--force::
+	Don't complain, do it.
+
+-v::
+--verbose::
+        Be more verbose (show counter open errors, etc).
+
+--all::
+	Convert all events, including non-sample events (comm, fork, ...), to output.
+	Default is off, only convert samples.
+
+SEE ALSO
+--------
+linkperf:perf[1]
+[1] Common Trace Format - http://www.efficios.com/ctf
diff --git a/Documentation/perf-diff.txt b/Documentation/perf-diff.txt
new file mode 100644
index 0000000..a79c84a
--- /dev/null
+++ b/Documentation/perf-diff.txt
@@ -0,0 +1,227 @@
+perf-diff(1)
+============
+
+NAME
+----
+perf-diff - Read perf.data files and display the differential profile
+
+SYNOPSIS
+--------
+[verse]
+'perf diff' [baseline file] [data file1] [[data file2] ... ]
+
+DESCRIPTION
+-----------
+This command displays the performance difference amongst two or more perf.data
+files captured via perf record.
+
+If no parameters are passed it will assume perf.data.old and perf.data.
+
+The differential profile is displayed only for events matching both
+specified perf.data files.
+
+If no parameters are passed the samples will be sorted by dso and symbol.
+As the perf.data files could come from different binaries, the symbols addresses
+could vary. So perf diff is based on the comparison of the files and
+symbols name.
+
+OPTIONS
+-------
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel
+
+-d::
+--dsos=::
+	Only consider symbols in these dsos. CSV that understands
+	file://filename entries.  This option will affect the percentage
+	of the Baseline/Delta column.  See --percentage for more info.
+
+-C::
+--comms=::
+	Only consider symbols in these comms. CSV that understands
+	file://filename entries.  This option will affect the percentage
+	of the Baseline/Delta column.  See --percentage for more info.
+
+-S::
+--symbols=::
+	Only consider these symbols. CSV that understands
+	file://filename entries.  This option will affect the percentage
+	of the Baseline/Delta column.  See --percentage for more info.
+
+-s::
+--sort=::
+	Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
+	Please see description of --sort in the perf-report man page.
+
+-t::
+--field-separator=::
+
+	Use a special separator character and don't pad with spaces, replacing
+	all occurrences of this separator in symbol names (and other output)
+	with a '.' character, that thus it's the only non valid separator.
+
+-v::
+--verbose::
+	Be verbose, for instance, show the raw counts in addition to the
+	diff.
+
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
+-f::
+--force::
+        Don't do ownership validation.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
+-b::
+--baseline-only::
+        Show only items with match in baseline.
+
+-c::
+--compute::
+        Differential computation selection - delta, ratio, wdiff, delta-abs
+        (default is delta-abs).  Default can be changed using diff.compute
+        config option.  See COMPARISON METHODS section for more info.
+
+-p::
+--period::
+        Show period values for both compared hist entries.
+
+-F::
+--formula::
+        Show formula for given computation.
+
+-o::
+--order::
+       Specify compute sorting column number.  0 means sorting by baseline
+       overhead and 1 (default) means sorting by computed value of column 1
+       (data from the first file other base baseline).  Values more than 1
+       can be used only if enough data files are provided.
+       The default value can be set using the diff.order config option.
+
+--percentage::
+	Determine how to display the overhead percentage of filtered entries.
+	Filters can be applied by --comms, --dsos and/or --symbols options.
+
+	"relative" means it's relative to filtered entries only so that the
+	sum of shown entries will be always 100%.  "absolute" means it retains
+	the original value before and after the filter is applied.
+
+COMPARISON
+----------
+The comparison is governed by the baseline file. The baseline perf.data
+file is iterated for samples. All other perf.data files specified on
+the command line are searched for the baseline sample pair. If the pair
+is found, specified computation is made and result is displayed.
+
+All samples from non-baseline perf.data files, that do not match any
+baseline entry, are displayed with empty space within baseline column
+and possible computation results (delta) in their related column.
+
+Example files samples:
+- file A with samples f1, f2, f3, f4,    f6
+- file B with samples     f2,     f4, f5
+- file C with samples f1, f2,         f5
+
+Example output:
+  x - computation takes place for pair
+  b - baseline sample percentage
+
+- perf diff A B C
+
+  baseline/A compute/B compute/C  samples
+  ---------------------------------------
+  b                    x          f1
+  b          x         x          f2
+  b                               f3
+  b          x                    f4
+  b                               f6
+             x         x          f5
+
+- perf diff B A C
+
+  baseline/B compute/A compute/C  samples
+  ---------------------------------------
+  b          x         x          f2
+  b          x                    f4
+  b                    x          f5
+             x         x          f1
+             x                    f3
+             x                    f6
+
+- perf diff C B A
+
+  baseline/C compute/B compute/A  samples
+  ---------------------------------------
+  b                    x          f1
+  b          x         x          f2
+  b          x                    f5
+                       x          f3
+             x         x          f4
+                       x          f6
+
+COMPARISON METHODS
+------------------
+delta
+~~~~~
+If specified the 'Delta' column is displayed with value 'd' computed as:
+
+  d = A->period_percent - B->period_percent
+
+with:
+  - A/B being matching hist entry from data/baseline file specified
+    (or perf.data/perf.data.old) respectively.
+
+  - period_percent being the % of the hist entry period value within
+    single data file
+
+  - with filtering by -C, -d and/or -S, period_percent might be changed
+    relative to how entries are filtered.  Use --percentage=absolute to
+    prevent such fluctuation.
+
+delta-abs
+~~~~~~~~~
+Same as 'delta` method, but sort the result with the absolute values.
+
+ratio
+~~~~~
+If specified the 'Ratio' column is displayed with value 'r' computed as:
+
+  r = A->period / B->period
+
+with:
+  - A/B being matching hist entry from data/baseline file specified
+    (or perf.data/perf.data.old) respectively.
+
+  - period being the hist entry period value
+
+wdiff:WEIGHT-B,WEIGHT-A
+~~~~~~~~~~~~~~~~~~~~~~~
+If specified the 'Weighted diff' column is displayed with value 'd' computed as:
+
+   d = B->period * WEIGHT-A - A->period * WEIGHT-B
+
+  - A/B being matching hist entry from data/baseline file specified
+    (or perf.data/perf.data.old) respectively.
+
+  - period being the hist entry period value
+
+  - WEIGHT-A/WEIGHT-B being user supplied weights in the the '-c' option
+    behind ':' separator like '-c wdiff:1,2'.
+    - WEIGHT-A being the weight of the data file
+    - WEIGHT-B being the weight of the baseline data file
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/Documentation/perf-evlist.txt b/Documentation/perf-evlist.txt
new file mode 100644
index 0000000..c0a6640
--- /dev/null
+++ b/Documentation/perf-evlist.txt
@@ -0,0 +1,45 @@
+perf-evlist(1)
+==============
+
+NAME
+----
+perf-evlist - List the event names in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf evlist <options>'
+
+DESCRIPTION
+-----------
+This command displays the names of events sampled in a perf.data file.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-f::
+--force::
+	Don't complain, do it.
+
+-F::
+--freq=::
+	Show just the sample frequency used for each event.
+
+-v::
+--verbose=::
+	Show all fields.
+
+-g::
+--group::
+	Show event group information.
+
+--trace-fields::
+	Show tracepoint field names.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-list[1],
+linkperf:perf-report[1]
diff --git a/Documentation/perf-ftrace.txt b/Documentation/perf-ftrace.txt
new file mode 100644
index 0000000..b80c843
--- /dev/null
+++ b/Documentation/perf-ftrace.txt
@@ -0,0 +1,87 @@
+perf-ftrace(1)
+==============
+
+NAME
+----
+perf-ftrace - simple wrapper for kernel's ftrace functionality
+
+
+SYNOPSIS
+--------
+[verse]
+'perf ftrace' <command>
+
+DESCRIPTION
+-----------
+The 'perf ftrace' command is a simple wrapper of kernel's ftrace
+functionality.  It only supports single thread tracing currently and
+just reads trace_pipe in text and then write it to stdout.
+
+The following options apply to perf ftrace.
+
+OPTIONS
+-------
+
+-t::
+--tracer=::
+	Tracer to use: function_graph or function.
+
+-v::
+--verbose=::
+        Verbosity level.
+
+-p::
+--pid=::
+	Trace on existing process id (comma separated list).
+
+-a::
+--all-cpus::
+	Force system-wide collection.  Scripts run without a <command>
+	normally use -a by default, while scripts run with a <command>
+	normally don't - this option allows the latter to be run in
+	system-wide mode.
+
+-C::
+--cpu=::
+	Only trace for the list of CPUs provided.  Multiple CPUs can
+	be provided as a comma separated list with no space like: 0,1.
+	Ranges of CPUs are specified with -: 0-2.
+	Default is to trace on all online CPUs.
+
+-T::
+--trace-funcs=::
+	Only trace functions given by the argument.  Multiple functions
+	can be given by using this option more than once.  The function
+	argument also can be a glob pattern.  It will be passed to
+	'set_ftrace_filter' in tracefs.
+
+-N::
+--notrace-funcs=::
+	Do not trace functions given by the argument.  Like -T option,
+	this can be used more than once to specify multiple functions
+	(or glob patterns).  It will be passed to 'set_ftrace_notrace'
+	in tracefs.
+
+-G::
+--graph-funcs=::
+	Set graph filter on the given function (or a glob pattern).
+	This is useful for the function_graph tracer only and enables
+	tracing for functions executed from the given function.
+	This can be used more than once to specify multiple functions.
+	It will be passed to 'set_graph_function' in tracefs.
+
+-g::
+--nograph-funcs=::
+	Set graph notrace filter on the given function (or a glob pattern).
+	Like -G option, this is useful for the function_graph tracer only
+	and disables tracing for function executed from the given function.
+	This can be used more than once to specify multiple functions.
+	It will be passed to 'set_graph_notrace' in tracefs.
+
+-D::
+--graph-depth=::
+	Set max depth for function graph tracer to follow
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-trace[1]
diff --git a/Documentation/perf-help.txt b/Documentation/perf-help.txt
new file mode 100644
index 0000000..5143918
--- /dev/null
+++ b/Documentation/perf-help.txt
@@ -0,0 +1,38 @@
+perf-help(1)
+============
+
+NAME
+----
+perf-help - display help information about perf
+
+SYNOPSIS
+--------
+'perf help' [-a|--all] [COMMAND]
+
+DESCRIPTION
+-----------
+
+With no options and no COMMAND given, the synopsis of the 'perf'
+command and a list of the most commonly used perf commands are printed
+on the standard output.
+
+If the option '--all' or '-a' is given, then all available commands are
+printed on the standard output.
+
+If a perf command is named, a manual page for that command is brought
+up. The 'man' program is used by default for this purpose, but this
+can be overridden by other options or configuration variables.
+
+Note that `perf --help ...` is identical to `perf help ...` because the
+former is internally converted into the latter.
+
+OPTIONS
+-------
+-a::
+--all::
+	Prints all the available commands on the standard output. This
+	option supersedes any other option.
+
+PERF
+----
+Part of the linkperf:perf[1] suite
diff --git a/Documentation/perf-inject.txt b/Documentation/perf-inject.txt
new file mode 100644
index 0000000..a64d658
--- /dev/null
+++ b/Documentation/perf-inject.txt
@@ -0,0 +1,69 @@
+perf-inject(1)
+==============
+
+NAME
+----
+perf-inject - Filter to augment the events stream with additional information
+
+SYNOPSIS
+--------
+[verse]
+'perf inject <options>'
+
+DESCRIPTION
+-----------
+perf-inject reads a perf-record event stream and repipes it to stdout.  At any
+point the processing code can inject other events into the event stream - in
+this case build-ids (-b option) are read and injected as needed into the event
+stream.
+
+Build-ids are just the first user of perf-inject - potentially anything that
+needs userspace processing to augment the events stream with additional
+information could make use of this facility.
+
+OPTIONS
+-------
+-b::
+--build-ids=::
+        Inject build-ids into the output stream
+-v::
+--verbose::
+	Be more verbose.
+-i::
+--input=::
+	Input file name. (default: stdin)
+-o::
+--output=::
+	Output file name. (default: stdout)
+-s::
+--sched-stat::
+	Merge sched_stat and sched_switch for getting events where and how long
+	tasks slept. sched_switch contains a callchain where a task slept and
+	sched_stat contains a timeslice how long a task slept.
+
+--kallsyms=<file>::
+	kallsyms pathname
+
+--itrace::
+	Decode Instruction Tracing data, replacing it with synthesized events.
+	Options are:
+
+include::itrace.txt[]
+
+--strip::
+	Use with --itrace to strip out non-synthesized events.
+
+-j::
+--jit::
+	Process jitdump files by injecting the mmap records corresponding to jitted
+	functions. This option also generates the ELF images for each jitted function
+	found in the jitdumps files captured in the input perf.data file. Use this option
+	if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+
+-f::
+--force::
+	Don't complain, do it.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/Documentation/perf-kallsyms.txt b/Documentation/perf-kallsyms.txt
new file mode 100644
index 0000000..f3c6209
--- /dev/null
+++ b/Documentation/perf-kallsyms.txt
@@ -0,0 +1,24 @@
+perf-kallsyms(1)
+================
+
+NAME
+----
+perf-kallsyms - Searches running kernel for symbols
+
+SYNOPSIS
+--------
+[verse]
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
+
+DESCRIPTION
+-----------
+This command searches the running kernel kallsyms file for the given symbol(s)
+and prints information about it, including the DSO, the kallsyms begin/end
+addresses and the addresses in the ELF kallsyms symbol table (for symbols in
+modules).
+
+OPTIONS
+-------
+-v::
+--verbose=::
+	Increase verbosity level, showing details about symbol table loading, etc.
diff --git a/Documentation/perf-kmem.txt b/Documentation/perf-kmem.txt
new file mode 100644
index 0000000..85b8ac6
--- /dev/null
+++ b/Documentation/perf-kmem.txt
@@ -0,0 +1,77 @@
+perf-kmem(1)
+============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record|stat} [<options>]
+
+DESCRIPTION
+-----------
+There are two variants of perf kmem:
+
+  'perf kmem record <command>' to record the kmem events
+  of an arbitrary workload.
+
+  'perf kmem stat' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+	Select the input file (default: perf.data unless stdin is a fifo)
+
+-f::
+--force::
+	Don't do ownership validation
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
+--caller::
+	Show per-callsite statistics
+
+--alloc::
+	Show per-allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+	Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
+	for page).  Available sort keys are 'ptr, callsite, bytes, hit,
+	pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
+	migtype, gfp' for page.  This option should be preceded by one of the
+	mode selection options - i.e. --slab, --page, --alloc and/or --caller.
+
+-l <num>::
+--line=<num>::
+	Print n lines only
+
+--raw-ip::
+	Print raw ip instead of symbol
+
+--slab::
+	Analyze SLAB allocator events.
+
+--page::
+	Analyze page allocator events
+
+--live::
+	Show live page stat.  The perf kmem shows total allocation stat by
+	default, but this option shows live (currently allocated) pages
+	instead.  (This option works with --page option only)
+
+--time=<start>,<stop>::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/Documentation/perf-kvm.txt b/Documentation/perf-kvm.txt
new file mode 100644
index 0000000..6a5bb2b
--- /dev/null
+++ b/Documentation/perf-kvm.txt
@@ -0,0 +1,164 @@
+perf-kvm(1)
+===========
+
+NAME
+----
+perf-kvm - Tool to trace/measure kvm guest os
+
+SYNOPSIS
+--------
+[verse]
+'perf kvm' [--host] [--guest] [--guestmount=<path>
+	[--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
+	{top|record|report|diff|buildid-list} [<options>]
+'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
+	| --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>]
+'perf kvm stat [record|report|live] [<options>]
+
+DESCRIPTION
+-----------
+There are a couple of variants of perf kvm:
+
+  'perf kvm [options] top <command>' to generates and displays
+  a performance counter profile of guest os in realtime
+  of an arbitrary workload.
+
+  'perf kvm record <command>' to record the performance counter profile
+  of an arbitrary workload and save it into a perf data file. We set the
+  default behavior of perf kvm as --guest, so if neither --host nor --guest
+  is input, the perf data file name is perf.data.guest. If --host is input,
+  the perf data file name is perf.data.kvm. If you want to record data into
+  perf.data.host, please input --host --no-guest. The behaviors are shown as
+  following:
+    Default('')         ->  perf.data.guest
+    --host              ->  perf.data.kvm
+    --guest             ->  perf.data.guest
+    --host --guest      ->  perf.data.kvm
+    --host --no-guest   ->  perf.data.host
+
+  'perf kvm report' to display the performance counter profile information
+  recorded via perf kvm record.
+
+  'perf kvm diff' to displays the performance difference amongst two perf.data
+  files captured via perf record.
+
+  'perf kvm buildid-list' to  display the buildids found in a perf data file,
+  so that other tools can be used to fetch packages with matching symbol tables
+  for use by perf report. As buildid is read from /sys/kernel/notes in os, then
+  if you want to list the buildid for guest, please make sure your perf data file
+  was captured with --guestmount in perf kvm record.
+
+  'perf kvm stat <command>' to run a command and gather performance counter
+  statistics.
+  Especially, perf 'kvm stat record/report' generates a statistical analysis
+  of KVM events. Currently, vmexit, mmio (x86 only) and ioport (x86 only)
+  events are supported. 'perf kvm stat record <command>' records kvm events
+  and the events between start and end <command>.
+  And this command produces a file which contains tracing results of kvm
+  events.
+
+  'perf kvm stat report' reports statistical data which includes events
+  handled time, samples, and so on.
+
+  'perf kvm stat live' reports statistical data in a live mode (similar to
+  record + report but with statistical data updated live at a given display
+  rate).
+
+OPTIONS
+-------
+-i::
+--input=<path>::
+        Input file name.
+-o::
+--output=<path>::
+        Output file name.
+--host::
+        Collect host side performance profile.
+--guest::
+        Collect guest side performance profile.
+--guestmount=<path>::
+	Guest os root file system mount directory. Users mounts guest os
+        root directories under <path> by a specific filesystem access method,
+	typically, sshfs. For example, start 2 guest os. The one's pid is 8888
+	and the other's is 9999.
+        #mkdir ~/guestmount; cd ~/guestmount
+        #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
+        #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
+        #perf kvm --host --guest --guestmount=~/guestmount top
+--guestkallsyms=<path>::
+        Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
+	kernel symbols. Users copy it out from guest os.
+--guestmodules=<path>::
+	Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
+	kernel module information. Users copy it out from guest os.
+--guestvmlinux=<path>::
+	Guest os kernel vmlinux.
+-v::
+--verbose::
+	Be more verbose (show counter open errors, etc).
+
+STAT REPORT OPTIONS
+-------------------
+--vcpu=<value>::
+       analyze events which occur on this vcpu. (default: all vcpus)
+
+--event=<value>::
+       event to be analyzed. Possible values: vmexit, mmio (x86 only),
+       ioport (x86 only). (default: vmexit)
+-k::
+--key=<value>::
+       Sorting key. Possible values: sample (default, sort by samples
+       number), time (sort by average time).
+-p::
+--pid=::
+    Analyze events only for given process ID(s) (comma separated list).
+
+STAT LIVE OPTIONS
+-----------------
+-d::
+--display::
+        Time in seconds between display updates
+
+-m::
+--mmap-pages=::
+    Number of mmap data pages (must be a power of two) or size
+    specification with appended unit character - B/K/M/G. The
+    size is rounded up to have nearest pages power of two value.
+
+-a::
+--all-cpus::
+        System-wide collection from all CPUs.
+
+-p::
+--pid=::
+    Analyze events only for given process ID(s) (comma separated list).
+
+--vcpu=<value>::
+       analyze events which occur on this vcpu. (default: all vcpus)
+
+
+--event=<value>::
+       event to be analyzed. Possible values: vmexit,
+       mmio (x86 only), ioport (x86 only).
+       (default: vmexit)
+
+-k::
+--key=<value>::
+       Sorting key. Possible values: sample (default, sort by samples
+       number), time (sort by average time).
+
+--duration=<value>::
+       Show events other than HLT (x86 only) or Wait state (s390 only)
+       that take longer than duration usecs.
+
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take
+	a long time, because the file may be huge. A time out is needed
+	in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-diff[1], linkperf:perf-buildid-list[1],
+linkperf:perf-stat[1]
diff --git a/Documentation/perf-list.txt b/Documentation/perf-list.txt
new file mode 100644
index 0000000..2549c34
--- /dev/null
+++ b/Documentation/perf-list.txt
@@ -0,0 +1,281 @@
+perf-list(1)
+============
+
+NAME
+----
+perf-list - List all symbolic event types
+
+SYNOPSIS
+--------
+[verse]
+'perf list' [--no-desc] [--long-desc]
+            [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
+
+DESCRIPTION
+-----------
+This command displays the symbolic event types which can be selected in the
+various perf commands with the -e option.
+
+OPTIONS
+-------
+--no-desc::
+Don't print descriptions.
+
+-v::
+--long-desc::
+Print longer event descriptions.
+
+--details::
+Print how named events are resolved internally into perf events, and also
+any extra expressions computed by perf stat.
+
+
+[[EVENT_MODIFIERS]]
+EVENT MODIFIERS
+---------------
+
+Events can optionally have a modifier by appending a colon and one or
+more modifiers. Modifiers allow the user to restrict the events to be
+counted. The following modifiers exist:
+
+ u - user-space counting
+ k - kernel counting
+ h - hypervisor counting
+ I - non idle counting
+ G - guest counting (in KVM guests)
+ H - host counting (not in KVM guests)
+ p - precise level
+ P - use maximum detected precise level
+ S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
+ W - group is weak and will fallback to non-group if not schedulable,
+     only supported in 'perf stat' for now.
+
+The 'p' modifier can be used for specifying how precise the instruction
+address should be. The 'p' modifier can be specified multiple times:
+
+ 0 - SAMPLE_IP can have arbitrary skid
+ 1 - SAMPLE_IP must have constant skid
+ 2 - SAMPLE_IP requested to have 0 skid
+ 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid
+     sample shadowing effects.
+
+For Intel systems precise event sampling is implemented with PEBS
+which supports up to precise-level 2, and precise level 3 for
+some special cases
+
+On AMD systems it is implemented using IBS (up to precise-level 2).
+The precise modifier works with event types 0x76 (cpu-cycles, CPU
+clocks not halted) and 0xC1 (micro-ops retired). Both events map to
+IBS execution sampling (IBS op) with the IBS Op Counter Control bit
+(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s
+Manual Volume 2: System Programming, 13.3 Instruction-Based
+Sampling). Examples to use IBS:
+
+ perf record -a -e cpu-cycles:p ...    # use ibs op counting cycles
+ perf record -a -e r076:p ...          # same as -e cpu-cycles:p
+ perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
+
+RAW HARDWARE EVENT DESCRIPTOR
+-----------------------------
+Even when an event is not available in a symbolic form within perf right now,
+it can be encoded in a per processor specific way.
+
+For instance For x86 CPUs NNN represents the raw register encoding with the
+layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
+of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344,
+Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
+
+Note: Only the following bit fields can be set in x86 counter
+registers: event, umask, edge, inv, cmask. Esp. guest/host only and
+OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT
+MODIFIERS>>.
+
+Example:
+
+If the Intel docs for a QM720 Core i7 describe an event as:
+
+  Event  Umask  Event Mask
+  Num.   Value  Mnemonic    Description                        Comment
+
+  A8H      01H  LSD.UOPS    Counts the number of micro-ops     Use cmask=1 and
+                            delivered by loop stream detector  invert to count
+                                                               cycles
+
+raw encoding of 0x1A8 can be used:
+
+ perf stat -e r1a8 -a sleep 1
+ perf record -e r1a8 ...
+
+You should refer to the processor specific documentation for getting these
+details. Some of them are referenced in the SEE ALSO section below.
+
+ARBITRARY PMUS
+--------------
+
+perf also supports an extended syntax for specifying raw parameters
+to PMUs. Using this typically requires looking up the specific event
+in the CPU vendor specific documentation.
+
+The available PMUs and their raw parameters can be listed with
+
+  ls /sys/devices/*/format
+
+For example the raw event "LSD.UOPS" core pmu event above could
+be specified as
+
+  perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
+
+PER SOCKET PMUS
+---------------
+
+Some PMUs are not associated with a core, but with a whole CPU socket.
+Events on these PMUs generally cannot be sampled, but only counted globally
+with perf stat -a. They can be bound to one logical CPU, but will measure
+all the CPUs in the same socket.
+
+This example measures memory bandwidth every second
+on the first memory controller on socket 0 of a Intel Xeon system
+
+  perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
+
+Each memory controller has its own PMU.  Measuring the complete system
+bandwidth would require specifying all imc PMUs (see perf list output),
+and adding the values together. To simplify creation of multiple events,
+prefix and glob matching is supported in the PMU name, and the prefix
+'uncore_' is also ignored when performing the match. So the command above
+can be expanded to all memory controllers by using the syntaxes:
+
+  perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
+  perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
+
+This example measures the combined core power every second
+
+  perf stat -I 1000 -e power/energy-cores/  -a
+
+ACCESS RESTRICTIONS
+-------------------
+
+For non root users generally only context switched PMU events are available.
+This is normally only the events in the cpu PMU, the predefined events
+like cycles and instructions and some software events.
+
+Other PMUs and global measurements are normally root only.
+Some event qualifiers, such as "any", are also root only.
+
+This can be overriden by setting the kernel.perf_event_paranoid
+sysctl to -1, which allows non root to use these events.
+
+For accessing trace point events perf needs to have read access to
+/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
+setting.
+
+TRACING
+-------
+
+Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
+that allows low overhead execution tracing.  These are described in a separate
+intel-pt.txt document.
+
+PARAMETERIZED EVENTS
+--------------------
+
+Some pmu events listed by 'perf-list' will be displayed with '?' in them. For
+example:
+
+  hv_gpci/dtbp_ptitc,phys_processor_idx=?/
+
+This means that when provided as an event, a value for '?' must
+also be supplied. For example:
+
+  perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
+
+EVENT GROUPS
+------------
+
+Perf supports time based multiplexing of events, when the number of events
+active exceeds the number of hardware performance counters. Multiplexing
+can cause measurement errors when the workload changes its execution
+profile.
+
+When metrics are computed using formulas from event counts, it is useful to
+ensure some events are always measured together as a group to minimize multiplexing
+errors. Event groups can be specified using { }.
+
+  perf stat -e '{instructions,cycles}' ...
+
+The number of available performance counters depend on the CPU. A group
+cannot contain more events than available counters.
+For example Intel Core CPUs typically have four generic performance counters
+for the core, plus three fixed counters for instructions, cycles and
+ref-cycles. Some special events have restrictions on which counter they
+can schedule, and may not support multiple instances in a single group.
+When too many events are specified in the group some of them will not
+be measured.
+
+Globally pinned events can limit the number of counters available for
+other groups. On x86 systems, the NMI watchdog pins a counter by default.
+The nmi watchdog can be disabled as root with
+
+	echo 0 > /proc/sys/kernel/nmi_watchdog
+
+Events from multiple different PMUs cannot be mixed in a group, with
+some exceptions for software events.
+
+LEADER SAMPLING
+---------------
+
+perf also supports group leader sampling using the :S specifier.
+
+  perf record -e '{cycles,instructions}:S' ...
+  perf report --group
+
+Normally all events in a event group sample, but with :S only
+the first event (the leader) samples, and it only reads the values of the
+other events in the group.
+
+OPTIONS
+-------
+
+Without options all known events will be listed.
+
+To limit the list use:
+
+. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
+
+. 'sw' or 'software' to list software events such as context switches, etc.
+
+. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
+
+. 'tracepoint' to list all tracepoint events, alternatively use
+  'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
+  block, etc.
+
+. 'pmu' to print the kernel supplied PMU events.
+
+. 'sdt' to list all Statically Defined Tracepoint events.
+
+. 'metric' to list metrics
+
+. 'metricgroup' to list metricgroups with metrics.
+
+. If none of the above is matched, it will apply the supplied glob to all
+  events, printing the ones that match.
+
+. As a last resort, it will do a substring search in all event names.
+
+One or more types can be used at the same time, listing the events for the
+types specified.
+
+Support raw format:
+
+. '--raw-dump', shows the raw-dump of all the events.
+. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
+  a certain kind of events.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1],
+http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
+http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
diff --git a/Documentation/perf-lock.txt b/Documentation/perf-lock.txt
new file mode 100644
index 0000000..74d7745
--- /dev/null
+++ b/Documentation/perf-lock.txt
@@ -0,0 +1,70 @@
+perf-lock(1)
+============
+
+NAME
+----
+perf-lock - Analyze lock events
+
+SYNOPSIS
+--------
+[verse]
+'perf lock' {record|report|script|info}
+
+DESCRIPTION
+-----------
+You can analyze various lock behaviours
+and statistics with this 'perf lock' command.
+
+  'perf lock record <command>' records lock events
+  between start and end <command>. And this command
+  produces the file "perf.data" which contains tracing
+  results of lock events.
+
+  'perf lock report' reports statistical data.
+
+  'perf lock script' shows raw lock events.
+
+  'perf lock info' shows metadata like threads or addresses
+  of lock instances.
+
+COMMON OPTIONS
+--------------
+
+-i::
+--input=<file>::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-f::
+--force::
+	Don't complan, do it.
+
+REPORT OPTIONS
+--------------
+
+-k::
+--key=<value>::
+        Sorting key. Possible values: acquired (default), contended,
+	avg_wait, wait_total, wait_max, wait_min.
+
+INFO OPTIONS
+------------
+
+-t::
+--threads::
+	dump thread list in perf.data
+
+-m::
+--map::
+	dump map of lock instances (address:name table)
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/Documentation/perf-mem.txt b/Documentation/perf-mem.txt
new file mode 100644
index 0000000..f8d2167
--- /dev/null
+++ b/Documentation/perf-mem.txt
@@ -0,0 +1,92 @@
+perf-mem(1)
+===========
+
+NAME
+----
+perf-mem - Profile memory accesses
+
+SYNOPSIS
+--------
+[verse]
+'perf mem' [<options>] (record [<command>] | report)
+
+DESCRIPTION
+-----------
+"perf mem record" runs a command and gathers memory operation data
+from it, into perf.data. Perf record options are accepted and are passed through.
+
+"perf mem report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile. By default, loads
+and stores are sampled. Use the -t option to limit to loads or stores.
+
+Note that on Intel systems the memory latency reported is the use-latency,
+not the pure load (or store latency). Use latency includes any pipeline
+queueing delays in addition to the memory subsystem latency.
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-i::
+--input=<file>::
+	Input file name.
+
+-f::
+--force::
+	Don't do ownership validation
+
+-t::
+--type=<type>::
+	Select the memory operation type: load or store (default: load,store)
+
+-D::
+--dump-raw-samples::
+	Dump the raw decoded samples on the screen in a format that is easy to parse with
+	one sample per line.
+
+-x::
+--field-separator=<separator>::
+	Specify the field separator used when dump raw samples (-D option). By default,
+	The separator is the space character.
+
+-C::
+--cpu=<cpu>::
+	Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+        comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default
+        is to monitor all CPUS.
+-U::
+--hide-unresolved::
+	Only display entries resolved to a symbol.
+
+-p::
+--phys-data::
+	Record/Report sample physical addresses
+
+RECORD OPTIONS
+--------------
+-e::
+--event <event>::
+	Event selector. Use 'perf mem record -e list' to list available events.
+
+-K::
+--all-kernel::
+	Configure all used events to run in kernel space.
+
+-U::
+--all-user::
+	Configure all used events to run in user space.
+
+-v::
+--verbose::
+	Be more verbose (show counter open errors, etc)
+
+--ldlat <n>::
+	Specify desired latency for loads event.
+
+In addition, for report all perf report options are valid, and for record
+all perf record options.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/Documentation/perf-probe.txt b/Documentation/perf-probe.txt
new file mode 100644
index 0000000..b6866a0
--- /dev/null
+++ b/Documentation/perf-probe.txt
@@ -0,0 +1,299 @@
+perf-probe(1)
+=============
+
+NAME
+----
+perf-probe - Define new dynamic tracepoints
+
+SYNOPSIS
+--------
+[verse]
+'perf probe' [options] --add='PROBE' [...]
+or
+'perf probe' [options] PROBE
+or
+'perf probe' [options] --del='[GROUP:]EVENT' [...]
+or
+'perf probe' --list[=[GROUP:]EVENT]
+or
+'perf probe' [options] --line='LINE'
+or
+'perf probe' [options] --vars='PROBEPOINT'
+or
+'perf probe' [options] --funcs
+or
+'perf probe' [options] --definition='PROBE' [...]
+
+DESCRIPTION
+-----------
+This command defines dynamic tracepoint events, by symbol and registers
+without debuginfo, or by C expressions (C line numbers, C function names,
+and C local variables) with debuginfo.
+
+
+OPTIONS
+-------
+-k::
+--vmlinux=PATH::
+	Specify vmlinux path which has debuginfo (Dwarf binary).
+	Only when using this with --definition, you can give an offline
+	vmlinux file.
+
+-m::
+--module=MODNAME|PATH::
+	Specify module name in which perf-probe searches probe points
+	or lines. If a path of module file is passed, perf-probe
+	treat it as an offline module (this means you can add a probe on
+        a module which has not been loaded yet).
+
+-s::
+--source=PATH::
+	Specify path to kernel source.
+
+-v::
+--verbose::
+        Be more verbose (show parsed arguments, etc).
+	Can not use with -q.
+
+-q::
+--quiet::
+	Be quiet (do not show any messages including errors).
+	Can not use with -v.
+
+-a::
+--add=::
+	Define a probe event (see PROBE SYNTAX for detail).
+
+-d::
+--del=::
+	Delete probe events. This accepts glob wildcards('*', '?') and character
+	classes(e.g. [a-z], [!A-Z]).
+
+-l::
+--list[=[GROUP:]EVENT]::
+	List up current probe events. This can also accept filtering patterns of
+	event names.
+	When this is used with --cache, perf shows all cached probes instead of
+	the live probes.
+
+-L::
+--line=::
+	Show source code lines which can be probed. This needs an argument
+	which specifies a range of the source code. (see LINE SYNTAX for detail)
+
+-V::
+--vars=::
+	Show available local variables at given probe point. The argument
+	syntax is same as PROBE SYNTAX, but NO ARGs.
+
+--externs::
+	(Only for --vars) Show external defined variables in addition to local
+	variables.
+
+--no-inlines::
+	(Only for --add) Search only for non-inlined functions. The functions
+	which do not have instances are ignored.
+
+-F::
+--funcs[=FILTER]::
+	Show available functions in given module or kernel. With -x/--exec,
+	can also list functions in a user space executable / shared library.
+	This also can accept a FILTER rule argument.
+
+-D::
+--definition=::
+	Show trace-event definition converted from given probe-event instead
+	of write it into tracing/[k,u]probe_events.
+
+--filter=FILTER::
+	(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
+	pattern, see FILTER PATTERN for detail.
+	Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
+	for --funcs.
+	If several filters are specified, only the last filter is used.
+
+-f::
+--force::
+	Forcibly add events with existing name.
+
+-n::
+--dry-run::
+	Dry run. With this option, --add and --del doesn't execute actual
+	adding and removal operations.
+
+--cache::
+	(With --add) Cache the probes. Any events which successfully added
+	are also stored in the cache file.
+	(With --list) Show cached probes.
+	(With --del) Remove cached probes.
+
+--max-probes=NUM::
+	Set the maximum number of probe points for an event. Default is 128.
+
+--target-ns=PID:
+	Obtain mount namespace information from the target pid.  This is
+	used when creating a uprobe for a process that resides in a
+	different mount namespace from the perf(1) utility.
+
+-x::
+--exec=PATH::
+	Specify path to the executable or shared library file for user
+	space tracing. Can also be used with --funcs option.
+
+--demangle::
+	Demangle application symbols. --no-demangle is also available
+	for disabling demangling.
+
+--demangle-kernel::
+	Demangle kernel symbols. --no-demangle-kernel is also available
+	for disabling kernel demangling.
+
+In absence of -m/-x options, perf probe checks if the first argument after
+the options is an absolute path name. If its an absolute path, perf probe
+uses it as a target module/target user space binary to probe.
+
+PROBE SYNTAX
+------------
+Probe points are defined by following syntax.
+
+    1) Define event based on function name
+     [[GROUP:]EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
+
+    2) Define event based on source file with line number
+     [[GROUP:]EVENT=]SRC:ALN [ARG ...]
+
+    3) Define event based on source file with lazy pattern
+     [[GROUP:]EVENT=]SRC;PTN [ARG ...]
+
+    4) Pre-defined SDT events or cached event with name
+     %[sdt_PROVIDER:]SDTEVENT
+     or,
+     sdt_PROVIDER:SDTEVENT
+
+'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe.
+Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the
+modules.
+'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition.  In addition, '@SRC' specifies a source file which has that function.
+It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
+'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
+'SDTEVENT' and 'PROVIDER' is the pre-defined event name which is defined by user SDT (Statically Defined Tracing) or the pre-cached probes with event name.
+Note that before using the SDT event, the target binary (on which SDT events are defined) must be scanned by linkperf:perf-buildid-cache[1] to make SDT events as cached events.
+
+For details of the SDT, see below.
+https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html
+
+ESCAPED CHARACTER
+-----------------
+
+In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters.
+This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters.
+Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB').
+See EXAMPLES how it is used.
+
+PROBE ARGUMENT
+--------------
+Each probe argument follows below syntax.
+
+ [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
+
+'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
+'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
+On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (sNN/uNN) or hex (xNN). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
+LINE SYNTAX
+-----------
+Line range is described by following syntax.
+
+ "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
+for searching a specific function when several functions share same name.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
+LAZY MATCHING
+-------------
+ The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
+
+e.g.
+ 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
+
+This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
+
+FILTER PATTERN
+--------------
+ The filter pattern is a glob matching pattern(s) to filter variables.
+ In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
+
+e.g.
+ With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
+ With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
+
+EXAMPLES
+--------
+Display which lines in schedule() can be probed:
+
+ ./perf probe --line schedule
+
+Add a probe on schedule() function 12th line with recording cpu local variable:
+
+ ./perf probe schedule:12 cpu
+ or
+ ./perf probe --add='schedule:12 cpu'
+
+Add one or more probes which has the name start with "schedule".
+
+ ./perf probe schedule*
+ or
+ ./perf probe --add='schedule*'
+
+Add probes on lines in schedule() function which calls update_rq_clock().
+
+ ./perf probe 'schedule;update_rq_clock*'
+ or
+ ./perf probe --add='schedule;update_rq_clock*'
+
+Delete all probes on schedule().
+
+ ./perf probe --del='schedule*'
+
+Add probes at zfree() function on /bin/zsh
+
+ ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree
+
+Add probes at malloc() function on libc
+
+ ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
+
+Add a uprobe to a target process running in a different mount namespace
+
+ ./perf probe --target-ns <target pid> -x /lib64/libc.so.6 malloc
+
+Add a USDT probe to a target process running in a different mount namespace
+
+ ./perf probe --target-ns <target pid> -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end
+
+Add a probe on specific versioned symbol by backslash escape
+
+ ./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5'
+
+Add a probe in a source file using special characters by backslash escape
+
+ ./perf probe -x /opt/test/a.out 'foo\+bar.c:4'
+
+
+SEE ALSO
+--------
+linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1]
diff --git a/Documentation/perf-record.txt b/Documentation/perf-record.txt
new file mode 100644
index 0000000..cc37b3a
--- /dev/null
+++ b/Documentation/perf-record.txt
@@ -0,0 +1,502 @@
+perf-record(1)
+==============
+
+NAME
+----
+perf-record - Run a command and record its profile into perf.data
+
+SYNOPSIS
+--------
+[verse]
+'perf record' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf record' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it, into perf.data - without displaying anything.
+
+This file can then be inspected later on, using 'perf report'.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-e::
+--event=::
+	Select the PMU event. Selection can be:
+
+        - a symbolic event name	(use 'perf list' to list all events)
+
+        - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+	  hexadecimal event descriptor.
+
+	- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
+	  'param1', 'param2', etc are defined as formats for the PMU in
+	  /sys/bus/event_source/devices/<pmu>/format/*.
+
+	- a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
+
+          where M, N, K are numbers (in decimal, hex, octal format). Acceptable
+          values for each of 'config', 'config1' and 'config2' are defined by
+          corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
+          param1 and param2 are defined as formats for the PMU in:
+          /sys/bus/event_source/devices/<pmu>/format/*
+
+	  There are also some parameters which are not defined in .../<pmu>/format/*.
+	  These params can be used to overload default config values per event.
+	  Here are some common parameters:
+	  - 'period': Set event sampling period
+	  - 'freq': Set event sampling frequency
+	  - 'time': Disable/enable time stamping. Acceptable values are 1 for
+		    enabling time stamping. 0 for disabling time stamping.
+		    The default is 1.
+	  - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+			 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+			 "no" for disable callgraph.
+	  - 'stack-size': user stack size for dwarf mode
+
+          See the linkperf:perf-list[1] man page for more parameters.
+
+	  Note: If user explicitly sets options which conflict with the params,
+	  the value set by the parameters will be overridden.
+
+	  Also not defined in .../<pmu>/format/* are PMU driver specific
+	  configuration parameters.  Any configuration parameter preceded by
+	  the letter '@' is not interpreted in user space and sent down directly
+	  to the PMU driver.  For example:
+
+	  perf record -e some_event/@cfg1,@cfg2=config/ ...
+
+	  will see 'cfg1' and 'cfg2=config' pushed to the PMU driver associated
+	  with the event for further processing.  There is no restriction on
+	  what the configuration parameters are, as long as their semantic is
+	  understood and supported by the PMU driver.
+
+        - a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
+          where addr is the address in memory you want to break in.
+          Access is the memory access type (read, write, execute) it can
+          be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range,
+          number of bytes from specified addr, which the breakpoint will cover.
+          If you want to profile read-write accesses in 0x1000, just set
+          'mem:0x1000:rw'.
+          If you want to profile write accesses in [0x1000~1008), just set
+          'mem:0x1000/8:w'.
+
+	- a group of events surrounded by a pair of brace ("{event1,event2,...}").
+	  Each event is separated by commas and the group should be quoted to
+	  prevent the shell interpretation.  You also need to use --group on
+	  "perf report" to view group events together.
+
+--filter=<filter>::
+        Event filter. This option should follow a event selector (-e) which
+	selects either tracepoint event(s) or a hardware trace PMU
+	(e.g. Intel PT or CoreSight).
+
+	- tracepoint filters
+
+	In the case of tracepoints, multiple '--filter' options are combined
+	using '&&'.
+
+	- address filters
+
+	A hardware trace PMU advertises its ability to accept a number of
+	address filters	by specifying a non-zero value in
+	/sys/bus/event_source/devices/<pmu>/nr_addr_filters.
+
+	Address filters have the format:
+
+	filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
+
+	Where:
+	- 'filter': defines a region that will be traced.
+	- 'start': defines an address at which tracing will begin.
+	- 'stop': defines an address at which tracing will stop.
+	- 'tracestop': defines a region in which tracing will stop.
+
+	<file name> is the name of the object file, <start> is the offset to the
+	code to trace in that file, and <size> is the size of the region to
+	trace. 'start' and 'stop' filters need not specify a <size>.
+
+	If no object file is specified then the kernel is assumed, in which case
+	the start address must be a current kernel memory address.
+
+	<start> can also be specified by providing the name of a symbol. If the
+	symbol name is not unique, it can be disambiguated by inserting #n where
+	'n' selects the n'th symbol in address order. Alternately #0, #g or #G
+	select only a global symbol. <size> can also be specified by providing
+	the name of a symbol, in which case the size is calculated to the end
+	of that symbol. For 'filter' and 'tracestop' filters, if <size> is
+	omitted and <start> is a symbol, then the size is calculated to the end
+	of that symbol.
+
+	If <size> is omitted and <start> is '*', then the start and size will
+	be calculated from the first and last symbols, i.e. to trace the whole
+	file.
+
+	If symbol names (or '*') are provided, they must be surrounded by white
+	space.
+
+	The filter passed to the kernel is not necessarily the same as entered.
+	To see the filter that is passed, use the -v option.
+
+	The kernel may not be able to configure a trace region if it is not
+	within a single mapping.  MMAP events (or /proc/<pid>/maps) can be
+	examined to determine if that is a possibility.
+
+	Multiple filters can be separated with space or comma.
+
+--exclude-perf::
+	Don't record events issued by perf itself. This option should follow
+	a event selector (-e) which selects tracepoint event(s). It adds a
+	filter expression 'common_pid != $PERFPID' to filters. If other
+	'--filter' exists, the new filter expression will be combined with
+	them by '&&'.
+
+-a::
+--all-cpus::
+        System-wide collection from all CPUs (default if no target is specified).
+
+-p::
+--pid=::
+	Record events on existing process ID (comma separated list).
+
+-t::
+--tid=::
+        Record events on existing thread ID (comma separated list).
+        This option also disables inheritance by default.  Enable it by adding
+        --inherit.
+
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
+
+-r::
+--realtime=::
+	Collect data with this RT SCHED_FIFO priority.
+
+--no-buffering::
+	Collect data without buffering.
+
+-c::
+--count=::
+	Event period to sample.
+
+-o::
+--output=::
+	Output file name.
+
+-i::
+--no-inherit::
+	Child tasks do not inherit counters.
+
+-F::
+--freq=::
+	Profile at this frequency. Use 'max' to use the currently maximum
+	allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
+	sysctl. Will throttle down to the currently maximum allowed frequency.
+	See --strict-freq.
+
+--strict-freq::
+	Fail if the specified frequency can't be used.
+
+-m::
+--mmap-pages=::
+	Number of mmap data pages (must be a power of two) or size
+	specification with appended unit character - B/K/M/G. The
+	size is rounded up to have nearest pages power of two value.
+	Also, by adding a comma, the number of mmap pages for AUX
+	area tracing can be specified.
+
+--group::
+	Put all events in a single event group.  This precedes the --event
+	option and remains only for backward compatibility.  See --event.
+
+-g::
+	Enables call-graph (stack chain/backtrace) recording.
+
+--call-graph::
+	Setup and enable call-graph (stack chain/backtrace) recording,
+	implies -g.  Default is "fp".
+
+	Allows specifying "fp" (frame pointer) or "dwarf"
+	(DWARF's CFI - Call Frame Information) or "lbr"
+	(Hardware Last Branch Record facility) as the method to collect
+	the information used to show the call graphs.
+
+	In some systems, where binaries are build with gcc
+	--fomit-frame-pointer, using the "fp" method will produce bogus
+	call graphs, using "dwarf", if available (perf tools linked to
+	the libunwind or libdw library) should be used instead.
+	Using the "lbr" method doesn't require any compiler options. It
+	will produce call graphs from the hardware LBR registers. The
+	main limitation is that it is only available on new Intel
+	platforms, such as Haswell. It can only get user call chain. It
+	doesn't work with branch stack sampling at the same time.
+
+	When "dwarf" recording is used, perf also records (user) stack dump
+	when sampled.  Default size of the stack dump is 8192 (bytes).
+	User can change the size by passing the size after comma like
+	"--call-graph dwarf,4096".
+
+-q::
+--quiet::
+	Don't print any message, useful for scripting.
+
+-v::
+--verbose::
+	Be more verbose (show counter open errors, etc).
+
+-s::
+--stat::
+	Record per-thread event counts.  Use it with 'perf report -T' to see
+	the values.
+
+-d::
+--data::
+	Record the sample virtual addresses.
+
+--phys-data::
+	Record the sample physical addresses.
+
+-T::
+--timestamp::
+	Record the sample timestamps. Use it with 'perf report -D' to see the
+	timestamps, for instance.
+
+-P::
+--period::
+	Record the sample period.
+
+--sample-cpu::
+	Record the sample cpu.
+
+-n::
+--no-samples::
+	Don't sample.
+
+-R::
+--raw-samples::
+Collect raw sample records from all opened counters (default for tracepoint counters).
+
+-C::
+--cpu::
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode with inheritance mode on (default), samples are captured only when
+the thread executes on the designated CPUs. Default is to monitor all CPUs.
+
+-B::
+--no-buildid::
+Do not save the build ids of binaries in the perf.data files. This skips
+post processing after recording, which sometimes makes the final step in
+the recording process to take a long time, as it needs to process all
+events looking for mmap records. The downside is that it can misresolve
+symbols if the workload binaries used when recording get locally rebuilt
+or upgraded, because the only key available in this case is the
+pathname. You can also set the "record.build-id" config variable to
+'skip to have this behaviour permanently.
+
+-N::
+--no-buildid-cache::
+Do not update the buildid cache. This saves some overhead in situations
+where the information in the perf.data file (which includes buildids)
+is sufficient.  You can also set the "record.build-id" config variable to
+'no-cache' to have the same effect.
+
+-G name,...::
+--cgroup name,...::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
+If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
+command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
+
+-b::
+--branch-any::
+Enable taken branch stack sampling. Any type of taken branch may be sampled.
+This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+Enable taken branch stack sampling. Each sample captures a series of consecutive
+taken branches. The number of branches captured with each sample depends on the
+underlying hardware, the type of branches of interest, and the executed code.
+It is possible to select the types of branches captured by enabling filters. The
+following filters are defined:
+
+        - any:  any type of branches
+        - any_call: any function call or system call
+        - any_ret: any function return or system call return
+        - ind_call: any indirect branch
+        - call: direct calls, including far (to/from kernel) calls
+        - u:  only when the branch target is at the user level
+        - k: only when the branch target is in the kernel
+        - hv: only when the target is at the hypervisor level
+	- in_tx: only when the target is in a hardware transaction
+	- no_tx: only when the target is not in a hardware transaction
+	- abort_tx: only when the target is a hardware transaction abort
+	- cond: conditional branches
+	- save_type: save branch type during sampling in case binary is not available later
+
++
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+The privilege levels may be omitted, in which case, the privilege levels of the associated
+event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+levels are subject to permissions.  When sampling on multiple events, branch stack sampling
+is enabled for all the sampling events. The sampled branch type is the same for all events.
+The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+Note that this feature may not be available on all processors.
+
+--weight::
+Enable weightened sampling. An additional weight is recorded per sample and can be
+displayed with the weight and local_weight sort keys.  This currently works for TSX
+abort events and some memory events in precise mode on modern Intel CPUs.
+
+--namespaces::
+Record events of type PERF_RECORD_NAMESPACES.
+
+--transaction::
+Record transaction flags for transaction related events.
+
+--per-thread::
+Use per-thread mmaps.  By default per-cpu mmaps are created.  This option
+overrides that and uses per-thread mmaps.  A side-effect of that is that
+inheritance is automatically disabled.  --per-thread is ignored with a warning
+if combined with -a or -C options.
+
+-D::
+--delay=::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-I::
+--intr-regs::
+Capture machine state (registers) at interrupt, i.e., on counter overflows for
+each sample. List of captured registers depends on the architecture. This option
+is off by default. It is possible to select the registers to sample using their
+symbolic names, e.g. on x86, ax, si. To list the available registers use
+--intr-regs=\?. To name registers, pass a comma separated list such as
+--intr-regs=ax,bx. The list of register is architecture dependent.
+
+--user-regs::
+Capture user registers at sample time. Same arguments as -I.
+
+--running-time::
+Record running and enabled time for read events (:S)
+
+-k::
+--clockid::
+Sets the clock id to use for the various time fields in the perf_event_type
+records. See clock_gettime(). In particular CLOCK_MONOTONIC and
+CLOCK_MONOTONIC_RAW are supported, some events might also allow
+CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
+
+-S::
+--snapshot::
+Select AUX area tracing Snapshot Mode. This option is valid only with an
+AUX area tracing event. Optionally the number of bytes to capture per
+snapshot can be specified. In Snapshot Mode, trace data is captured only when
+signal SIGUSR2 is received.
+
+--proc-map-timeout::
+When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+because the file may be huge. A time out is needed in such cases.
+This option sets the time out limit. The default value is 500 ms.
+
+--switch-events::
+Record context switch events i.e. events of type PERF_RECORD_SWITCH or
+PERF_RECORD_SWITCH_CPU_WIDE.
+
+--clang-path=PATH::
+Path to clang binary to use for compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--clang-opt=OPTIONS::
+Options passed to clang when compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--vmlinux=PATH::
+Specify vmlinux path which has debuginfo.
+(enabled when BPF prologue is on)
+
+--buildid-all::
+Record build-id of all DSOs regardless whether it's actually hit or not.
+
+--all-kernel::
+Configure all used events to run in kernel space.
+
+--all-user::
+Configure all used events to run in user space.
+
+--timestamp-filename
+Append timestamp to output file name.
+
+--timestamp-boundary::
+Record timestamp boundary (time of first/last samples).
+
+--switch-output[=mode]::
+Generate multiple perf.data files, timestamp prefixed, switching to a new one
+based on 'mode' value:
+  "signal" - when receiving a SIGUSR2 (default value) or
+  <size>   - when reaching the size threshold, size is expected to
+             be a number with appended unit character - B/K/M/G
+  <time>   - when reaching the time threshold, size is expected to
+             be a number with appended unit character - s/m/h/d
+
+             Note: the precision of  the size  threshold  hugely depends
+             on your configuration  - the number and size of  your  ring
+             buffers (-m). It is generally more precise for higher sizes
+             (like >5M), for lower values expect different sizes.
+
+A possible use case is to, given an external event, slice the perf.data file
+that gets then processed, possibly via a perf script, to decide if that
+particular perf.data snapshot should be kept or not.
+
+Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+The reason for the latter two is to reduce the data file switching
+overhead. You can still switch them on with:
+
+  --switch-output --no-no-buildid  --no-no-buildid-cache
+
+--dry-run::
+Parse options then exit. --dry-run can be used to detect errors in cmdline
+options.
+
+'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
+in config file is set to true.
+
+--tail-synthesize::
+Instead of collecting non-sample events (for example, fork, comm, mmap) at
+the beginning of record, collect them during finalizing an output file.
+The collected non-sample events reflects the status of the system when
+record is finished.
+
+--overwrite::
+Makes all events use an overwritable ring buffer. An overwritable ring
+buffer works like a flight recorder: when it gets full, the kernel will
+overwrite the oldest records, that thus will never make it to the
+perf.data file.
+
+When '--overwrite' and '--switch-output' are used perf records and drops
+events until it receives a signal, meaning that something unusual was
+detected that warrants taking a snapshot of the most current events,
+those fitting in the ring buffer at that moment.
+
+'overwrite' attribute can also be set or canceled for an event using
+config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
+
+Implies --tail-synthesize.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/Documentation/perf-report.txt b/Documentation/perf-report.txt
new file mode 100644
index 0000000..917e36f
--- /dev/null
+++ b/Documentation/perf-report.txt
@@ -0,0 +1,484 @@
+perf-report(1)
+==============
+
+NAME
+----
+perf-report - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf report' [-i <file> | --input=file]
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf record.
+
+OPTIONS
+-------
+-i::
+--input=::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
+-n::
+--show-nr-samples::
+	Show the number of samples for each symbol
+
+--show-cpu-utilization::
+        Show sample percentage for different cpu modes.
+
+-T::
+--threads::
+	Show per-thread event counters.  The input data file should be recorded
+	with -s option.
+-c::
+--comms=::
+	Only consider symbols in these comms. CSV that understands
+	file://filename entries.  This option will affect the percentage of
+	the overhead column.  See --percentage for more info.
+--pid=::
+        Only show events for given process ID (comma separated list).
+
+--tid=::
+        Only show events for given thread ID (comma separated list).
+-d::
+--dsos=::
+	Only consider symbols in these dsos. CSV that understands
+	file://filename entries.  This option will affect the percentage of
+	the overhead column.  See --percentage for more info.
+-S::
+--symbols=::
+	Only consider these symbols. CSV that understands
+	file://filename entries.  This option will affect the percentage of
+	the overhead column.  See --percentage for more info.
+
+--symbol-filter=::
+	Only show symbols that match (partially) with this filter.
+
+-U::
+--hide-unresolved::
+        Only display entries resolved to a symbol.
+
+-s::
+--sort=::
+	Sort histogram entries by given key(s) - multiple keys can be specified
+	in CSV format.  Following sort keys are available:
+	pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
+	local_weight, cgroup_id.
+
+	Each key has following meaning:
+
+	- comm: command (name) of the task which can be read via /proc/<pid>/comm
+	- pid: command and tid of the task
+	- dso: name of library or module executed at the time of sample
+	- dso_size: size of library or module executed at the time of sample
+	- symbol: name of function executed at the time of sample
+	- symbol_size: size of function executed at the time of sample
+	- parent: name of function matched to the parent regex filter. Unmatched
+	entries are displayed as "[other]".
+	- cpu: cpu number the task ran at the time of sample
+	- socket: processor socket number the task ran at the time of sample
+	- srcline: filename and line number executed at the time of sample.  The
+	DWARF debugging info must be provided.
+	- srcfile: file name of the source file of the same. Requires dwarf
+	information.
+	- weight: Event specific weight, e.g. memory latency or transaction
+	abort cost. This is the global weight.
+	- local_weight: Local weight version of the weight above.
+	- cgroup_id: ID derived from cgroup namespace device and inode numbers.
+	- transaction: Transaction abort flags.
+	- overhead: Overhead percentage of sample
+	- overhead_sys: Overhead percentage of sample running in system mode
+	- overhead_us: Overhead percentage of sample running in user mode
+	- overhead_guest_sys: Overhead percentage of sample running in system mode
+	on guest machine
+	- overhead_guest_us: Overhead percentage of sample running in user mode on
+	guest machine
+	- sample: Number of sample
+	- period: Raw number of event count of sample
+
+	By default, comm, dso and symbol keys are used.
+	(i.e. --sort comm,dso,symbol)
+
+	If --branch-stack option is used, following sort keys are also
+	available:
+
+	- dso_from: name of library or module branched from
+	- dso_to: name of library or module branched to
+	- symbol_from: name of function branched from
+	- symbol_to: name of function branched to
+	- srcline_from: source file and line branched from
+	- srcline_to: source file and line branched to
+	- mispredict: "N" for predicted branch, "Y" for mispredicted branch
+	- in_tx: branch in TSX transaction
+	- abort: TSX transaction abort.
+	- cycles: Cycles in basic block
+
+	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
+	and symbol_to, see '--branch-stack'.
+
+	If the --mem-mode option is used, the following sort keys are also available
+	(incompatible with --branch-stack):
+	symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+	- symbol_daddr: name of data symbol being executed on at the time of sample
+	- dso_daddr: name of library or module containing the data being executed
+	on at the time of the sample
+	- locked: whether the bus was locked at the time of the sample
+	- tlb: type of tlb access for the data at the time of the sample
+	- mem: type of memory access for the data at the time of the sample
+	- snoop: type of snoop (if any) for the data at the time of the sample
+	- dcacheline: the cacheline the data address is on at the time of the sample
+	- phys_daddr: physical address of data being executed on at the time of sample
+
+	And the default sort keys are changed to local_weight, mem, sym, dso,
+	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
+	If the data file has tracepoint event(s), following (dynamic) sort keys
+	are also available:
+	trace, trace_fields, [<event>.]<field>[/raw]
+
+	- trace: pretty printed trace output in a single column
+	- trace_fields: fields in tracepoints in separate columns
+	- <field name>: optional event and field name for a specific field
+
+	The last form consists of event and field names.  If event name is
+	omitted, it searches all events for matching field name.  The matched
+	field will be shown only for the event has the field.  The event name
+	supports substring match so user doesn't need to specify full subsystem
+	and event name everytime.  For example, 'sched:sched_switch' event can
+	be shortened to 'switch' as long as it's not ambiguous.  Also event can
+	be specified by its index (starting from 1) preceded by the '%'.
+	So '%1' is the first event, '%2' is the second, and so on.
+
+	The field name can have '/raw' suffix which disables pretty printing
+	and shows raw field value like hex numbers.  The --raw-trace option
+	has the same effect for all dynamic sort keys.
+
+	The default sort keys are changed to 'trace' if all events in the data
+	file are tracepoint.
+
+-F::
+--fields=::
+	Specify output field - multiple keys can be specified in CSV format.
+	Following fields are available:
+	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+	Also it can contain any sort key(s).
+
+	By default, every sort keys not specified in -F will be appended
+	automatically.
+
+	If the keys starts with a prefix '+', then it will append the specified
+        field(s) to the default field order. For example: perf report -F +period,sample.
+
+-p::
+--parent=<regex>::
+        A regex filter to identify parent. The parent is a caller of this
+	function and searched through the callchain, thus it requires callchain
+	information recorded. The pattern is in the extended regex format and
+	defaults to "\^sys_|^do_page_fault", see '--sort parent'.
+
+-x::
+--exclude-other::
+        Only display entries with parent-match.
+
+-w::
+--column-widths=<width[,width...]>::
+	Force each column width to the provided list, for large terminal
+	readability.  0 means no limit (default behavior).
+
+-t::
+--field-separator=::
+	Use a special separator character and don't pad with spaces, replacing
+	all occurrences of this separator in symbol names (and other output)
+	with a '.' character, that thus it's the only non valid separator.
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-g::
+--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
+        Display call chains using type, min percent threshold, print limit,
+	call order, sort key, optional branch and value.  Note that ordering
+	is not fixed so any parameter can be given in an arbitrary order.
+	One exception is the print_limit which should be preceded by threshold.
+
+	print_type can be either:
+	- flat: single column, linear exposure of call chains.
+	- graph: use a graph tree, displaying absolute overhead rates. (default)
+	- fractal: like graph, but displays relative rates. Each branch of
+		 the tree is considered as a new profiled object.
+	- folded: call chains are displayed in a line, separated by semicolons
+	- none: disable call chain display.
+
+	threshold is a percentage value which specifies a minimum percent to be
+	included in the output call graph.  Default is 0.5 (%).
+
+	print_limit is only applied when stdio interface is used.  It's to limit
+	number of call graph entries in a single hist entry.  Note that it needs
+	to be given after threshold (but not necessarily consecutive).
+	Default is 0 (unlimited).
+
+	order can be either:
+	- callee: callee based call graph.
+	- caller: inverted caller based call graph.
+	Default is 'caller' when --children is used, otherwise 'callee'.
+
+	sort_key can be:
+	- function: compare on functions (default)
+	- address: compare on individual code addresses
+	- srcline: compare on source filename and line number
+
+	branch can be:
+	- branch: include last branch information in callgraph when available.
+	          Usually more convenient to use --branch-history for this.
+
+	value can be:
+	- percent: diplay overhead percent (default)
+	- period: display event period
+	- count: display event count
+
+--children::
+	Accumulate callchain of children to parent entry so that then can
+	show up in the output.  The output will have a new "Children" column
+	and will be sorted on the data.  It requires callchains are recorded.
+	See the `overhead calculation' section for more details. Enabled by
+	default, disable with --no-children.
+
+--max-stack::
+	Set the stack depth limit when parsing the callchain, anything
+	beyond the specified depth will be ignored. This is a trade-off
+	between information loss and faster processing especially for
+	workloads that can have a very long callchain stack.
+	Note that when using the --itrace option the synthesized callchain size
+	will override this value if the synthesized callchain size is bigger.
+
+	Default: 127
+
+-G::
+--inverted::
+        alias for inverted caller based call graph.
+
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
+--pretty=<key>::
+        Pretty printing style.  key: normal, raw
+
+--stdio:: Use the stdio interface.
+
+--stdio-color::
+	'always', 'never' or 'auto', allowing configuring color output
+	via the command line, in addition to via "color.ui" .perfconfig.
+	Use '--stdio-color always' to generate color even when redirecting
+	to a pipe or file. Using just '--stdio-color' is equivalent to
+	using 'always'.
+
+--tui:: Use the TUI interface, that is integrated with annotate and allows
+        zooming into DSOs or threads, among other features. Use of --tui
+	requires a tty, if one is not present, as when piping to other
+	commands, the stdio interface is used.
+
+--gtk:: Use the GTK2 interface.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+--ignore-vmlinux::
+	Ignore vmlinux files.
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: This should only be used with -k and
+        a LIVE kernel.
+
+-f::
+--force::
+        Don't do ownership validation.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--source::
+	Interleave source code with assembly code. Enabled by default,
+	disable with --no-source.
+
+--asm-raw::
+	Show raw instruction encoding of assembly instructions.
+
+--show-total-period:: Show a column with the sum of periods.
+
+-I::
+--show-info::
+	Display extended information about the perf.data file. This adds
+	information which may be very large and thus may clutter the display.
+	It currently includes: cpu and numa topology of the host system.
+
+-b::
+--branch-stack::
+	Use the addresses of sampled taken branches instead of the instruction
+	address to build the histograms. To generate meaningful output, the
+	perf.data file must have been obtained using perf record -b or
+	perf record --branch-filter xxx where xxx is a branch filter option.
+	perf report is able to auto-detect whether a perf.data file contains
+	branch stacks and it will automatically switch to the branch view mode,
+	unless --no-branch-stack is used.
+
+--branch-history::
+	Add the addresses of sampled taken branches to the callstack.
+	This allows to examine the path the program took to each sample.
+	The data collection must have used -b (or -j) and -g.
+
+--objdump=<path>::
+        Path to objdump binary.
+
+--group::
+	Show event group information together. It forces group output also
+	if there are no groups defined in data file.
+
+--demangle::
+	Demangle symbol names to human readable form. It's enabled by default,
+	disable with --no-demangle.
+
+--demangle-kernel::
+	Demangle kernel symbol names to human readable form (for C++ kernels).
+
+--mem-mode::
+	Use the data addresses of samples in addition to instruction addresses
+	to build the histograms.  To generate meaningful output, the perf.data
+	file must have been obtained using perf record -d -W and using a
+	special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
+	'perf mem' for simpler access.
+
+--percent-limit::
+	Do not show entries which have an overhead under that percent.
+	(Default: 0).  Note that this option also sets the percent limit (threshold)
+	of callchains.  However the default value of callchain threshold is
+	different than the default value of hist entries.  Please see the
+	--call-graph option for details.
+
+--percentage::
+	Determine how to display the overhead percentage of filtered entries.
+	Filters can be applied by --comms, --dsos and/or --symbols options and
+	Zoom operations on the TUI (thread, dso, etc).
+
+	"relative" means it's relative to filtered entries only so that the
+	sum of shown entries will be always 100%.  "absolute" means it retains
+	the original value before and after the filter is applied.
+
+--header::
+	Show header information in the perf.data file.  This includes
+	various information like hostname, OS and perf version, cpu/mem
+	info, perf command line, event list and so on.  Currently only
+	--stdio output supports this feature.
+
+--header-only::
+	Show only perf.data header (forces --stdio).
+
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
+	Also support time percent with multiple time range. Time string is
+	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
+
+	For example:
+	Select the second 10% time slice:
+
+	  perf report --time 10%/2
+
+	Select from 0% to 10% time slice:
+
+	  perf report --time 0%-10%
+
+	Select the first and second 10% time slices:
+
+	  perf report --time 10%/1,10%/2
+
+	Select from 0% to 10% and 30% to 40% slices:
+
+	  perf report --time 0%-10%,30%-40%
+
+--itrace::
+	Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+	To disable decoding entirely, use --no-itrace.
+
+--full-source-path::
+	Show the full path for source files for srcline output.
+
+--show-ref-call-graph::
+	When multiple events are sampled, it may not be needed to collect
+	callgraphs for all of them. The sample sites are usually nearby,
+	and it's enough to collect the callgraphs on a reference event.
+	So user can use "call-graph=no" event modifier to disable callgraph
+	for other events to reduce the overhead.
+	However, perf report cannot show callgraphs for the event which
+	disable the callgraph.
+	This option extends the perf report to show reference callgraphs,
+	which collected by reference event, in no callgraph event.
+
+--socket-filter::
+	Only report the samples on the processor socket that match with this filter
+
+--raw-trace::
+	When displaying traceevent output, do not use print fmt or plugins.
+
+--hierarchy::
+	Enable hierarchical output.
+
+--inline::
+	If a callgraph address belongs to an inlined function, the inline stack
+	will be printed. Each entry is function name or file/line. Enabled by
+	default, disable with --no-inline.
+
+--mmaps::
+	Show --tasks output plus mmap information in a format similar to
+	/proc/<PID>/maps.
+
+	Please note that not all mmaps are stored, options affecting which ones
+	are include 'perf record --data', for instance.
+
+--stats::
+	Display overall events statistics without any further processing.
+	(like the one at the end of the perf report -D command)
+
+--tasks::
+	Display monitored tasks stored in perf data. Displaying pid/tid/ppid
+	plus the command string aligned to distinguish parent and child tasks.
+
+include::callchain-overhead-calculation.txt[]
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
diff --git a/Documentation/perf-sched.txt b/Documentation/perf-sched.txt
new file mode 100644
index 0000000..63f938b
--- /dev/null
+++ b/Documentation/perf-sched.txt
@@ -0,0 +1,167 @@
+perf-sched(1)
+=============
+
+NAME
+----
+perf-sched - Tool to trace/measure scheduler properties (latencies)
+
+SYNOPSIS
+--------
+[verse]
+'perf sched' {record|latency|map|replay|script|timehist}
+
+DESCRIPTION
+-----------
+There are several variants of 'perf sched':
+
+  'perf sched record <command>' to record the scheduling events
+  of an arbitrary workload.
+
+  'perf sched latency' to report the per task scheduling latencies
+  and other scheduling properties of the workload.
+
+  'perf sched script' to see a detailed trace of the workload that
+   was recorded (aliased to 'perf script' for now).
+
+  'perf sched replay' to simulate the workload that was recorded
+  via perf sched record. (this is done by starting up mockup threads
+  that mimic the workload based on the events in the trace. These
+  threads can then replay the timings (CPU runtime and sleep patterns)
+  of the workload as it occurred when it was recorded - and can repeat
+  it a number of times, measuring its performance.)
+
+  'perf sched map' to print a textual context-switching outline of
+  workload captured via perf sched record.  Columns stand for
+  individual CPUs, and the two-letter shortcuts stand for tasks that
+  are running on a CPU. A '*' denotes the CPU that had the event, and
+  a dot signals an idle CPU.
+
+  'perf sched timehist' provides an analysis of scheduling events.
+    
+    Example usage:
+        perf sched record -- sleep 1
+        perf sched timehist
+    
+   By default it shows the individual schedule events, including the wait
+   time (time between sched-out and next sched-in events for the task), the
+   task scheduling delay (time between wakeup and actually running) and run
+   time for the task:
+    
+                time    cpu  task name             wait time  sch delay   run time
+                             [tid/pid]                (msec)     (msec)     (msec)
+      -------------- ------  --------------------  ---------  ---------  ---------
+        79371.874569 [0011]  gcc[31949]                0.014      0.000      1.148
+        79371.874591 [0010]  gcc[31951]                0.000      0.000      0.024
+        79371.874603 [0010]  migration/10[59]          3.350      0.004      0.011
+        79371.874604 [0011]  <idle>                    1.148      0.000      0.035
+        79371.874723 [0005]  <idle>                    0.016      0.000      1.383
+        79371.874746 [0005]  gcc[31949]                0.153      0.078      0.022
+    ...
+    
+   Times are in msec.usec.
+
+OPTIONS
+-------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
+-D::
+--dump-raw-trace=::
+        Display verbose dump of the sched data.
+
+-f::
+--force::
+	Don't complain, do it.
+
+OPTIONS for 'perf sched map'
+----------------------------
+
+--compact::
+	Show only CPUs with activity. Helps visualizing on high core
+	count systems.
+
+--cpus::
+	Show just entries with activities for the given CPUs.
+
+--color-cpus::
+	Highlight the given cpus.
+
+--color-pids::
+	Highlight the given pids.
+
+OPTIONS for 'perf sched timehist'
+---------------------------------
+-k::
+--vmlinux=<file>::
+    vmlinux pathname
+
+--kallsyms=<file>::
+    kallsyms pathname
+
+-g::
+--call-graph::
+	Display call chains if present (default on).
+
+--max-stack::
+	Maximum number of functions to display in backtrace, default 5.
+
+-p=::
+--pid=::
+	Only show events for given process ID (comma separated list).
+
+-t=::
+--tid=::
+	Only show events for given thread ID (comma separated list).
+
+-s::
+--summary::
+    Show only a summary of scheduling by thread with min, max, and average
+    run times (in sec) and relative stddev.
+
+-S::
+--with-summary::
+    Show all scheduling events followed by a summary by thread with min,
+    max, and average run times (in sec) and relative stddev.
+
+--symfs=<directory>::
+    Look for files with symbols relative to this directory.
+
+-V::
+--cpu-visual::
+	Show visual aid for sched switches by CPU: 'i' marks idle time,
+	's' are scheduler events.
+
+-w::
+--wakeups::
+	Show wakeup events.
+
+-M::
+--migrations::
+	Show migration events.
+
+-n::
+--next::
+	Show next task.
+
+-I::
+--idle-hist::
+	Show idle-related events only.
+
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
+--state::
+	Show task state when it switched out.
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/Documentation/perf-script-perl.txt b/Documentation/perf-script-perl.txt
new file mode 100644
index 0000000..5a1f681
--- /dev/null
+++ b/Documentation/perf-script-perl.txt
@@ -0,0 +1,216 @@
+perf-script-perl(1)
+===================
+
+NAME
+----
+perf-script-perl - Process trace data with a Perl script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Perl]:script[.pl] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Perl interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Perl script, if any.
+
+STARTER SCRIPTS
+---------------
+
+You can avoid reading the rest of this document by running 'perf script
+-g perl' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/perl for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-script.pl script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_unhandled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+sub sched::sched_wakeup
+{
+   my ($event_name, $context, $common_cpu, $common_secs,
+       $common_nsecs, $common_pid, $common_comm,
+       $comm, $pid, $prio, $success, $target_cpu) = @_;
+}
+----
+
+The handler function takes the form subsystem::event_name.
+
+The $common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ $event_name 	  	    the name of the event as text
+ $context		    an opaque 'cookie' used in calls back into perf
+ $common_cpu		    the cpu the event occurred on
+ $common_secs		    the secs portion of the event timestamp
+ $common_nsecs		    the nsecs portion of the event timestamp
+ $common_pid		    the pid of the current task
+ $common_comm		    the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Perl script should start by setting up a Perl module
+search path and 'use'ing a few support modules (see module
+descriptions below):
+
+----
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+ use lib "./Perf-Trace-Util/lib";
+ use Perf::Trace::Core;
+ use Perf::Trace::Context;
+ use Perf::Trace::Util;
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+ sub trace_begin
+ {
+ }
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+sub trace_end
+{
+}
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs,
+        $common_nsecs, $common_pid, $common_comm) = @_;
+}
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Perl modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various Perf::Trace::* Perl modules.  To use the functions and
+variables from the given module, add the corresponding 'use
+Perf::Trace::XXX' line to your perf script script.
+
+Perf::Trace::Core Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the flag field $field_name of event $event_name
+  symbol_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the symbolic field $field_name of event $event_name
+
+Perf::Trace::Context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+Perf::Trace::Context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a $context variable, which is the same as the
+$context variable passed into every event handler as the second
+argument.
+
+ common_pc($context) - returns common_preempt count for the current event
+ common_flags($context) - returns common_flags for the current event
+ common_lock_depth($context) - returns common_lock_depth for the current event
+
+Perf::Trace::Util Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+  nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs($nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
+  nsecs_str($nsecs) - returns printable string in the form secs.nsecs
+  avg($total, $n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/Documentation/perf-script-python.txt b/Documentation/perf-script-python.txt
new file mode 100644
index 0000000..51ec2d2
--- /dev/null
+++ b/Documentation/perf-script-python.txt
@@ -0,0 +1,615 @@
+perf-script-python(1)
+====================
+
+NAME
+----
+perf-script-python - Process trace data with a Python script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Python]:script[.py] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Python interpreter.  It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Python script, if any.
+
+A QUICK EXAMPLE
+---------------
+
+This section shows the process, start to finish, of creating a working
+Python script that aggregates and extracts useful information from a
+raw perf script stream.  You can avoid reading the rest of this
+document if an example is enough for you; the rest of the document
+provides more details on each step and lists the library functions
+available to script writers.
+
+This example actually details the steps that were used to create the
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'.  As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
+scripts listed by that command.
+
+The syscall-counts script is a simple script, but demonstrates all the
+basic ideas necessary to create a useful script.  Here's an example
+of its output (syscall names are not yet supported, they will appear
+as numbers):
+
+----
+syscall events:
+
+event                                          count
+----------------------------------------  -----------
+sys_write                                     455067
+sys_getdents                                    4072
+sys_close                                       3037
+sys_swapoff                                     1769
+sys_read                                         923
+sys_sched_setparam                               826
+sys_open                                         331
+sys_newfstat                                     326
+sys_mmap                                         217
+sys_munmap                                       216
+sys_futex                                        141
+sys_select                                       102
+sys_poll                                          84
+sys_setitimer                                     12
+sys_writev                                         8
+15                                                 8
+sys_lseek                                          7
+sys_rt_sigprocmask                                 6
+sys_wait4                                          3
+sys_ioctl                                          3
+sys_set_robust_list                                1
+sys_exit                                           1
+56                                                 1
+sys_access                                         1
+----
+
+Basically our task is to keep a per-syscall tally that gets updated
+every time a system call occurs in the system.  Our script will do
+that, but first we need to record the data that will be processed by
+that script.  Theoretically, there are a couple of ways we could do
+that:
+
+- we could enable every event under the tracing/events/syscalls
+  directory, but this is over 600 syscalls, well beyond the number
+  allowable by perf.  These individual syscall events will however be
+  useful if we want to later use the guidance we get from the
+  general-purpose scripts to drill down and get more detail about
+  individual syscalls of interest.
+
+- we can enable the sys_enter and/or sys_exit syscalls found under
+  tracing/events/raw_syscalls.  These are called for all syscalls; the
+  'id' field can be used to distinguish between individual syscall
+  numbers.
+
+For this script, we only need to know that a syscall was entered; we
+don't care how it exited, so we'll use 'perf record' to record only
+the sys_enter events:
+
+----
+# perf record -a -e raw_syscalls:sys_enter
+
+^C[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
+----
+
+The options basically say to collect data for every syscall event
+system-wide and multiplex the per-cpu output into a single stream.
+That single stream will be recorded in a file in the current directory
+called perf.data.
+
+Once we have a perf.data file containing our data, we can use the -g
+'perf script' option to generate a Python script that will contain a
+callback handler for each event type found in the perf.data trace
+stream (for more details, see the STARTER SCRIPTS section).
+
+----
+# perf script -g python
+generated Python script: perf-script.py
+
+The output file created also in the current directory is named
+perf-script.py.  Here's the file in its entirety:
+
+# perf script event handlers, generated by perf script -g python
+# Licensed under the terms of the GNU GPL License version 2
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the format files.  Those fields not available as handler params can
+# be retrieved using Python functions of the form common_*(context).
+# See the perf-script-python Documentation for the list of available functions.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_begin():
+	print "in trace_begin"
+
+def trace_end():
+	print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+			common_pid, common_comm)
+
+		print "id=%d, args=%s\n" % \
+		(id, args),
+
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+	print "%-20s %5u %05u.%09u %8u %-20s " % \
+	(event_name, cpu, secs, nsecs, pid, comm),
+----
+
+At the top is a comment block followed by some import statements and a
+path append which every perf script script should include.
+
+Following that are a couple generated functions, trace_begin() and
+trace_end(), which are called at the beginning and the end of the
+script respectively (for more details, see the SCRIPT_LAYOUT section
+below).
+
+Following those are the 'event handler' functions generated one for
+every event in the 'perf record' output.  The handler functions take
+the form subsystem__event_name, and contain named parameters, one for
+each field in the event; in this case, there's only one event,
+raw_syscalls__sys_enter().  (see the EVENT HANDLERS section below for
+more info on event handlers).
+
+The final couple of functions are, like the begin and end functions,
+generated for every script.  The first, trace_unhandled(), is called
+every time the script finds an event in the perf.data file that
+doesn't correspond to any event handler in the script.  This could
+mean either that the record step recorded event types that it wasn't
+really interested in, or the script was run against a trace file that
+doesn't correspond to the script.
+
+The script generated by -g option simply prints a line for each
+event found in the trace stream i.e. it basically just dumps the event
+and its parameter values to stdout.  The print_header() function is
+simply a utility function used for that purpose.  Let's rename the
+script and run it to see the default output:
+
+----
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
+
+raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847620860     7506 perf                  id=1, args=
+raw_syscalls__sys_enter     1 00840.847710478     6533 npviewer.bin          id=78, args=
+raw_syscalls__sys_enter     1 00840.847719204     6533 npviewer.bin          id=142, args=
+raw_syscalls__sys_enter     1 00840.847755445     6533 npviewer.bin          id=3, args=
+raw_syscalls__sys_enter     1 00840.847775601     6533 npviewer.bin          id=3, args=
+raw_syscalls__sys_enter     1 00840.847781820     6533 npviewer.bin          id=3, args=
+.
+.
+.
+----
+
+Of course, for this script, we're not interested in printing every
+trace event, but rather aggregating it in a useful way.  So we'll get
+rid of everything to do with printing as well as the trace_begin() and
+trace_unhandled() functions, which we won't be using.  That leaves us
+with this minimalistic skeleton:
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_end():
+	print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+----
+
+In trace_end(), we'll simply print the results, but first we need to
+generate some results to print.  To do that we need to have our
+sys_enter() handler do the necessary tallying until all events have
+been counted.  A hash table indexed by syscall id is a good way to
+store that information; every time the sys_enter() handler is called,
+we simply increment a count associated with that hash entry indexed by
+that syscall id:
+
+----
+  syscalls = autodict()
+
+  try:
+    syscalls[id] += 1
+  except TypeError:
+    syscalls[id] = 1
+----
+
+The syscalls 'autodict' object is a special kind of Python dictionary
+(implemented in Core.py) that implements Perl's 'autovivifying' hashes
+in Python i.e. with autovivifying hashes, you can assign nested hash
+values without having to go to the trouble of creating intermediate
+levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
+the intermediate hash levels and finally assign the value 1 to the
+hash entry for 'id' (because the value being assigned isn't a hash
+object itself, the initial value is assigned in the TypeError
+exception.  Well, there may be a better way to do this in Python but
+that's what works for now).
+
+Putting that code into the raw_syscalls__sys_enter() handler, we
+effectively end up with a single-level dictionary keyed on syscall id
+and having the counts we've tallied as values.
+
+The print_syscall_totals() function iterates over the entries in the
+dictionary and displays a line for each entry containing the syscall
+name (the dictionary keys contain the syscall ids, which are passed to
+the Util function syscall_name(), which translates the raw syscall
+numbers to the corresponding syscall name strings).  The output is
+displayed after all the events in the trace have been processed, by
+calling the print_syscall_totals() function from the trace_end()
+handler called at the end of script processing.
+
+The final script producing the output shown above is shown in its
+entirety below (syscall_name() helper is not yet available, you can
+only deal with id's for now):
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+syscalls = autodict()
+
+def trace_end():
+	print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	try:
+		syscalls[id] += 1
+	except TypeError:
+		syscalls[id] = 1
+
+def print_syscall_totals():
+    if for_comm is not None:
+	    print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+				  reverse = True):
+	    print "%-40s  %10d\n" % (syscall_name(id), val),
+----
+
+The script can be run just as before:
+
+  # perf script -s syscall-counts.py
+
+So those are the essential steps in writing and running a script.  The
+process can be generalized to any tracepoint or set of tracepoints
+you're interested in - basically find the tracepoint(s) you're
+interested in by looking at the list of available events shown by
+'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
+detailed event and field info, record the corresponding trace data
+using 'perf record', passing it the list of interesting events,
+generate a skeleton script using 'perf script -g python' and modify the
+code to aggregate and display it for your particular needs.
+
+After you've done that you may end up with a general-purpose script
+that you want to keep around and have available for future use.  By
+writing a couple of very simple shell scripts and putting them in the
+right place, you can have your script listed alongside the other
+scripts listed by the 'perf script -l' command e.g.:
+
+----
+# perf script -l
+List of available trace scripts:
+  wakeup-latency                       system-wide min/max/avg wakeup latency
+  rw-by-file <comm>                    r/w activity for a program, by file
+  rw-by-pid                            system-wide r/w activity
+----
+
+A nice side effect of doing this is that you also then capture the
+probably lengthy 'perf record' command needed to record the events for
+the script.
+
+To have the script appear as a 'built-in' script, you write two simple
+scripts, one for recording and one for 'reporting'.
+
+The 'record' script is a shell script with the same base name as your
+script, but with -record appended.  The shell script should be put
+into the perf/scripts/python/bin directory in the kernel source tree.
+In that script, you write the 'perf record' command-line needed for
+your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
+
+#!/bin/bash
+perf record -a -e raw_syscalls:sys_enter
+----
+
+The 'report' script is also a shell script with the same base name as
+your script, but with -report appended.  It should also be located in
+the perf/scripts/python/bin directory.  In that script, you write the
+'perf script -s' command-line needed for running your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
+
+#!/bin/bash
+# description: system-wide syscall counts
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+----
+
+Note that the location of the Python script given in the shell script
+is in the libexec/perf-core/scripts/python directory - this is where
+the script will be copied by 'make install' when you install perf.
+For the installation to install your script there, your script needs
+to be located in the perf/scripts/python directory in the kernel
+source tree:
+
+----
+# ls -al kernel-source/tools/perf/scripts/python
+total 32
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
+drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
+----
+
+Once you've done that (don't forget to do a new 'make install',
+otherwise your script won't show up at run-time), 'perf script -l'
+should show a new entry for your script:
+
+----
+# perf script -l
+List of available trace scripts:
+  wakeup-latency                       system-wide min/max/avg wakeup latency
+  rw-by-file <comm>                    r/w activity for a program, by file
+  rw-by-pid                            system-wide r/w activity
+  syscall-counts                       system-wide syscall counts
+----
+
+You can now perform the record step via 'perf script record':
+
+  # perf script record syscall-counts
+
+and display the output using 'perf script report':
+
+  # perf script report syscall-counts
+
+STARTER SCRIPTS
+---------------
+
+You can quickly get started writing a script for a particular set of
+trace data by generating a skeleton script using 'perf script -g
+python' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/python for typical examples showing how to
+do basic things like aggregate event data, print results, etc.  Also,
+the check-perf-script.py script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace.  If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_unhandled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+        field:unsigned short common_type;
+        field:unsigned char common_flags;
+        field:unsigned char common_preempt_count;
+        field:int common_pid;
+
+        field:char comm[TASK_COMM_LEN];
+        field:pid_t pid;
+        field:int prio;
+        field:int success;
+        field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
+       common_nsecs, common_pid, common_comm,
+       comm, pid, prio, success, target_cpu):
+       pass
+----
+
+The handler function takes the form subsystem__event_name.
+
+The common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ event_name 	  	    the name of the event as text
+ context		    an opaque 'cookie' used in calls back into perf
+ common_cpu		    the cpu the event occurred on
+ common_secs		    the secs portion of the event timestamp
+ common_nsecs		    the nsecs portion of the event timestamp
+ common_pid		    the pid of the current task
+ common_comm		    the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script.  The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Python script should start by setting up a Python
+module search path and 'import'ing a few support modules (see module
+descriptions below):
+
+----
+ import os
+ import sys
+
+ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	      '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+ from perf_trace_context import *
+ from Core import *
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+def trace_begin():
+    pass
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+def trace_end():
+    pass
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it.  The standard set
+ of common arguments are passed into it:
+
+----
+def trace_unhandled(event_name, context, event_fields_dict):
+    pass
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Python modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various perf script Python modules.  To use the functions and
+variables from the given module, add the corresponding 'from XXXX
+import' line to your perf script script.
+
+Core.py Module
+~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields.  These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+  flag_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the flag field field_name of event event_name
+  symbol_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the symbolic field field_name of event event_name
+
+The *autodict* function returns a special kind of Python
+dictionary that implements Perl's 'autovivifying' hashes in Python
+i.e. with autovivifying hashes, you can assign nested hash values
+without having to go to the trouble of creating intermediate levels if
+they don't exist.
+
+  autodict() - returns an autovivifying dictionary instance
+
+
+perf_trace_context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+perf_trace_context defines a set of functions that can be used to
+access this data in the context of the current event.  Each of these
+functions expects a context variable, which is the same as the
+context variable passed into every event handler as the second
+argument.
+
+ common_pc(context) - returns common_preempt count for the current event
+ common_flags(context) - returns common_flags for the current event
+ common_lock_depth(context) - returns common_lock_depth for the current event
+
+Util.py Module
+~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+  nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
+  nsecs_secs(nsecs) - returns whole secs portion given nsecs
+  nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
+  nsecs_str(nsecs) - returns printable string in the form secs.nsecs
+  avg(total, n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/Documentation/perf-script.txt b/Documentation/perf-script.txt
new file mode 100644
index 0000000..afdafe2
--- /dev/null
+++ b/Documentation/perf-script.txt
@@ -0,0 +1,389 @@
+perf-script(1)
+=============
+
+NAME
+----
+perf-script - Read perf.data (created by perf record) and display trace output
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays the trace recorded.
+
+There are several variants of perf script:
+
+  'perf script' to see a detailed trace of the workload that was
+  recorded.
+
+  You can also run a set of pre-canned scripts that aggregate and
+  summarize the raw trace data in various ways (the list of scripts is
+  available via 'perf script -l').  The following variants allow you to
+  record and run those scripts:
+
+  'perf script record <script> <command>' to record the events required
+  for 'perf script report'.  <script> is the name displayed in the
+  output of 'perf script --list' i.e. the actual script name minus any
+  language extension.  If <command> is not specified, the events are
+  recorded using the -a (system-wide) 'perf record' option.
+
+  'perf script report <script> [args]' to run and display the results
+  of <script>.  <script> is the name displayed in the output of 'perf
+  script --list' i.e. the actual script name minus any language
+  extension.  The perf.data output from a previous run of 'perf script
+  record <script>' is used and should be present for this command to
+  succeed.  [args] refers to the (mainly optional) args expected by
+  the script.
+
+  'perf script <script> <required-script-args> <command>' to both
+  record the events required for <script> and to run the <script>
+  using 'live-mode' i.e. without writing anything to disk.  <script>
+  is the name displayed in the output of 'perf script --list' i.e. the
+  actual script name minus any language extension.  If <command> is
+  not specified, the events are recorded using the -a (system-wide)
+  'perf record' option.  If <script> has any required args, they
+  should be specified before <command>.  This mode doesn't allow for
+  optional script args to be specified; if optional script args are
+  desired, they can be specified using separate 'perf script record'
+  and 'perf script report' commands, with the stdout of the record step
+  piped to the stdin of the report script, using the '-o -' and '-i -'
+  options of the corresponding commands.
+
+  'perf script <top-script>' to both record the events required for
+  <top-script> and to run the <top-script> using 'live-mode'
+  i.e. without writing anything to disk.  <top-script> is the name
+  displayed in the output of 'perf script --list' i.e. the actual
+  script name minus any language extension; a <top-script> is defined
+  as any script name ending with the string 'top'.
+
+  [<record-options>] can be passed to the record steps of 'perf script
+  record' and 'live-mode' variants; this isn't possible however for
+  <top-script> 'live-mode' or 'perf script report' variants.
+
+  See the 'SEE ALSO' section for links to language-specific
+  information on how to write and run your own trace scripts.
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+-D::
+--dump-raw-trace=::
+        Display verbose dump of the trace data.
+
+-L::
+--Latency=::
+        Show latency attributes (irqs/preemption disabled, etc).
+
+-l::
+--list=::
+        Display a list of available trace scripts.
+
+-s ['lang']::
+--script=::
+        Process trace data with the given script ([lang]:script[.ext]).
+	If the string 'lang' is specified in place of a script name, a
+        list of supported languages will be displayed instead.
+
+-g::
+--gen-script=::
+        Generate perf-script.[ext] starter script for given language,
+        using current perf.data.
+
+-a::
+        Force system-wide collection.  Scripts run without a <command>
+        normally use -a by default, while scripts run with a <command>
+        normally don't - this option allows the latter to be run in
+        system-wide mode.
+
+-i::
+--input=::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-d::
+--debug-mode::
+        Do various checks like samples ordering and lost events.
+
+-F::
+--fields::
+        Comma separated list of fields to print. Options are:
+        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
+        srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
+        brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc.
+        Field list can be prepended with the type, trace, sw or hw,
+        to indicate to which event type the field list applies.
+        e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
+
+		perf script -F <fields>
+
+	is equivalent to:
+
+		perf script -F trace:<fields> -F sw:<fields> -F hw:<fields>
+
+	i.e., the specified fields apply to all event types if the type string
+	is not given.
+
+	In addition to overriding fields, it is also possible to add or remove
+	fields from the defaults. For example
+
+		-F -cpu,+insn
+
+	removes the cpu field and adds the insn field. Adding/removing fields
+	cannot be mixed with normal overriding.
+
+	The arguments are processed in the order received. A later usage can
+	reset a prior request. e.g.:
+
+		-F trace: -F comm,tid,time,ip,sym
+
+	The first -F suppresses trace events (field list is ""), but then the
+	second invocation sets the fields to comm,tid,time,ip,sym. In this case a
+	warning is given to the user:
+
+		"Overriding previous field request for all events."
+
+	Alternatively, consider the order:
+
+		-F comm,tid,time,ip,sym -F trace:
+
+	The first -F sets the fields for all events and the second -F
+	suppresses trace events. The user is given a warning message about
+	the override, and the result of the above is that only S/W and H/W
+	events are displayed with the given fields.
+
+	For the 'wildcard' option if a user selected field is invalid for an
+	event type, a message is displayed to the user that the option is
+	ignored for that type. For example:
+
+		$ perf script -F comm,tid,trace
+		'trace' not valid for hardware events. Ignoring.
+		'trace' not valid for software events. Ignoring.
+
+	Alternatively, if the type is given an invalid field is specified it
+	is an error. For example:
+
+        perf script -v -F sw:comm,tid,trace
+        'trace' not valid for software events.
+
+	At this point usage is displayed, and perf-script exits.
+
+	The flags field is synthesized and may have a value when Instruction
+	Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+	call, return, conditional, system, asynchronous, interrupt,
+	transaction abort, trace begin, trace end, and in transaction,
+	respectively. Known combinations of flags are printed more nicely e.g.
+	"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
+	"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
+	"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
+	"tr end" for "bE". However the "x" flag will be display separately in those
+	cases e.g. "jcc     (x)" for a condition branch within a transaction.
+
+	The callindent field is synthesized and may have a value when
+	Instruction Trace decoding. For calls and returns, it will display the
+	name of the symbol indented with spaces to reflect the stack depth.
+
+	When doing instruction trace decoding insn and insnlen give the
+	instruction bytes and the instruction length of the current
+	instruction.
+
+	The synth field is used by synthesized events which may be created when
+	Instruction Trace decoding.
+
+	Finally, a user may not set fields to none for all event types.
+	i.e., -F "" is not allowed.
+
+	The brstack output includes branch related information with raw addresses using the
+	/v/v/v/v/cycles syntax in the following order:
+	FROM: branch source instruction
+	TO  : branch target instruction
+        M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
+	X/- : X=branch inside a transactional region, -=not in transaction region or not supported
+	A/- : A=TSX abort entry, -=not aborted region or not supported
+	cycles
+
+	The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
+
+	When brstackinsn is specified the full assembler sequences of branch sequences for each sample
+	is printed. This is the full execution path leading to the sample. This is only supported when the
+	sample was recorded with perf record -b or -j any.
+
+	The brstackoff field will print an offset into a specific dso/binary.
+
+	With the metric option perf script can compute metrics for
+	sampling periods, similar to perf stat. This requires
+	specifying a group with multiple metrics with the :S option
+	for perf record. perf will sample on the first event, and
+	compute metrics for all the events in the group. Please note
+	that the metric computed is averaged over the whole sampling
+	period, not just for the sample point.
+
+	For sample events it's possible to display misc field with -F +misc option,
+	following letters are displayed for each bit:
+
+	  PERF_RECORD_MISC_KERNEL               K
+	  PERF_RECORD_MISC_USER                 U
+	  PERF_RECORD_MISC_HYPERVISOR           H
+	  PERF_RECORD_MISC_GUEST_KERNEL         G
+	  PERF_RECORD_MISC_GUEST_USER           g
+	  PERF_RECORD_MISC_MMAP_DATA*           M
+	  PERF_RECORD_MISC_COMM_EXEC            E
+	  PERF_RECORD_MISC_SWITCH_OUT           S
+	  PERF_RECORD_MISC_SWITCH_OUT_PREEMPT   Sp
+
+	  $ perf script -F +misc ...
+	   sched-messaging  1414 K     28690.636582:       4590 cycles ...
+	   sched-messaging  1407 U     28690.636600:     325620 cycles ...
+	   sched-messaging  1414 K     28690.636608:      19473 cycles ...
+	  misc field ___________/
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
+-G::
+--hide-call-graph::
+        When printing symbols do not display call chain.
+
+--stop-bt::
+        Stop display of callgraph at these symbols
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+	be provided as a comma-separated list with no space: 0,1. Ranges of
+	CPUs are specified with -: 0-2. Default is to report samples on all
+	CPUs.
+
+-c::
+--comms=::
+	Only display events for these comms. CSV that understands
+	file://filename entries.
+
+--pid=::
+	Only show events for given process ID (comma separated list).
+
+--tid=::
+	Only show events for given thread ID (comma separated list).
+
+-I::
+--show-info::
+	Display extended information about the perf.data file. This adds
+	information which may be very large and thus may clutter the display.
+	It currently includes: cpu and numa topology of the host system.
+	It can only be used with the perf script report mode.
+
+--show-kernel-path::
+	Try to resolve the path of [kernel.kallsyms]
+
+--show-task-events
+	Display task related events (e.g. FORK, COMM, EXIT).
+
+--show-mmap-events
+	Display mmap related events (e.g. MMAP, MMAP2).
+
+--show-namespace-events
+	Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
+
+--show-switch-events
+	Display context switch events i.e. events of type PERF_RECORD_SWITCH or
+	PERF_RECORD_SWITCH_CPU_WIDE.
+
+--show-lost-events
+	Display lost events i.e. events of type PERF_RECORD_LOST.
+
+--show-round-events
+	Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
+
+--demangle::
+	Demangle symbol names to human readable form. It's enabled by default,
+	disable with --no-demangle.
+
+--demangle-kernel::
+	Demangle kernel symbol names to human readable form (for C++ kernels).
+
+--header
+	Show perf.data header.
+
+--header-only
+	Show only perf.data header.
+
+--itrace::
+	Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+	To disable decoding entirely, use --no-itrace.
+
+--full-source-path::
+	Show the full path for source files for srcline output.
+
+--max-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        beyond the specified depth will be ignored. This is a trade-off
+        between information loss and faster processing especially for
+        workloads that can have a very long callchain stack.
+        Note that when using the --itrace option the synthesized callchain size
+        will override this value if the synthesized callchain size is bigger.
+
+        Default: 127
+
+--ns::
+	Use 9 decimal places when displaying time (i.e. show the nanoseconds)
+
+-f::
+--force::
+	Don't do ownership validation.
+
+--time::
+	Only analyze samples within given time window: <start>,<stop>. Times
+	have the format seconds.microseconds. If start is not given (i.e., time
+	string is ',x.y') then analysis starts at the beginning of the file. If
+	stop time is not given (i.e, time string is 'x.y,') then analysis goes
+	to end of file.
+
+	Also support time percent with multipe time range. Time string is
+	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
+
+	For example:
+	Select the second 10% time slice:
+	perf script --time 10%/2
+
+	Select from 0% to 10% time slice:
+	perf script --time 0%-10%
+
+	Select the first and second 10% time slices:
+	perf script --time 10%/1,10%/2
+
+	Select from 0% to 10% and 30% to 40% slices:
+	perf script --time 0%-10%,30%-40%
+
+--max-blocks::
+	Set the maximum number of program blocks to print with brstackasm for
+	each sample.
+
+--per-event-dump::
+	Create per event files with a "perf.data.EVENT.dump" name instead of
+        printing to stdout, useful, for instance, for generating flamegraphs.
+
+--inline::
+	If a callgraph address belongs to an inlined function, the inline stack
+	will be printed. Each entry has function name and file/line. Enabled by
+	default, disable with --no-inline.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
diff --git a/Documentation/perf-stat.txt b/Documentation/perf-stat.txt
new file mode 100644
index 0000000..e6c3b4e
--- /dev/null
+++ b/Documentation/perf-stat.txt
@@ -0,0 +1,337 @@
+perf-stat(1)
+============
+
+NAME
+----
+perf-stat - Run a command and gather performance counter statistics
+
+SYNOPSIS
+--------
+[verse]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
+'perf stat' report [-i file]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers performance counter statistics
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+	Any command you can specify in a shell.
+
+record::
+	See STAT RECORD.
+
+report::
+	See STAT REPORT.
+
+-e::
+--event=::
+	Select the PMU event. Selection can be:
+
+	- a symbolic event name (use 'perf list' to list all events)
+
+	- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+	  hexadecimal event descriptor.
+
+	- a symbolically formed event like 'pmu/param1=0x3,param2/' where
+	  param1 and param2 are defined as formats for the PMU in
+	  /sys/bus/event_source/devices/<pmu>/format/*
+
+	- a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
+	  where M, N, K are numbers (in decimal, hex, octal format).
+	  Acceptable values for each of 'config', 'config1' and 'config2'
+	  parameters are defined by corresponding entries in
+	  /sys/bus/event_source/devices/<pmu>/format/*
+
+	Note that the last two syntaxes support prefix and glob matching in
+	the PMU name to simplify creation of events accross multiple instances
+	of the same type of PMU in large systems (e.g. memory controller PMUs).
+	Multiple PMU instances are typical for uncore PMUs, so the prefix
+	'uncore_' is also ignored when performing this match.
+
+
+-i::
+--no-inherit::
+        child tasks do not inherit counters
+-p::
+--pid=<pid>::
+        stat events on existing process id (comma separated list)
+
+-t::
+--tid=<tid>::
+        stat events on existing thread id (comma separated list)
+
+
+-a::
+--all-cpus::
+        system-wide collection from all CPUs (default if no target is specified)
+
+-c::
+--scale::
+	scale/normalize counter values
+
+-d::
+--detailed::
+	print more detailed statistics, can be specified up to 3 times
+
+	   -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
+-r::
+--repeat=<n>::
+	repeat command and print average + stddev (max: 100). 0 means forever.
+
+-B::
+--big-num::
+        print large numbers with thousands' separators according to locale
+
+-C::
+--cpu=::
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode, this option is ignored. The -a option is still necessary
+to activate system-wide monitoring. Default is to count on all CPUs.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+-n::
+--null::
+        null run - don't start any counters
+
+-v::
+--verbose::
+        be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
+-G name::
+--cgroup name::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
+If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
+command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
+
+-o file::
+--output file::
+Print the output into the designated file.
+
+--append::
+Append to the output file designated with the -o option. Ignored if -o is not specified.
+
+--log-fd::
+
+Log output to fd, instead of stderr.  Complementary to --output, and mutually exclusive
+with it.  --append may be used here.  Examples:
+     3>results  perf stat --log-fd 3          -- $cmd
+     3>>results perf stat --log-fd 3 --append -- $cmd
+
+--pre::
+--post::
+	Pre and post measurement hooks, e.g.:
+
+perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+
+-I msecs::
+--interval-print msecs::
+Print count deltas every N milliseconds (minimum: 1ms)
+The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
+	example: 'perf stat -I 1000 -e cycles -a sleep 5'
+
+--interval-count times::
+Print count deltas for fixed number of times.
+This option should be used together with "-I" option.
+	example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
+
+--timeout msecs::
+Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
+This option is not supported with the "-I" option.
+	example: 'perf stat --time 2000 -e cycles -a'
+
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements.  This
+is a useful mode to detect imbalance between sockets.  To enable this mode,
+use --per-socket in addition to -a. (system-wide).  The output includes the
+socket number and the number of online processors on that socket. This is
+useful to gauge the amount of aggregation.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.  This
+is a useful mode to detect imbalance between physical cores.  To enable this mode,
+use --per-core in addition to -a. (system-wide).  The output includes the
+core number and the number of online logical processors on that physical processor.
+
+--per-thread::
+Aggregate counts per monitored threads, when monitoring threads (-t option)
+or processes (-p option).
+
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
+STAT RECORD
+-----------
+Stores stat data into perf data file.
+
+-o file::
+--output file::
+Output file name.
+
+STAT REPORT
+-----------
+Reads and reports stat data from perf data file.
+
+-i file::
+--input file::
+Input file name.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.
+
+-M::
+--metrics::
+Print metrics or metricgroups specified in a comma separated list.
+For a group all metrics from the group are added.
+The events from the metrics are automatically measured.
+See perf list output for the possble metrics and metricgroups.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
+
+--no-merge::
+Do not merge results from same PMUs.
+
+When multiple events are created from a single event specification,
+stat will, by default, aggregate the event counts and show the result
+in a single row. This option disables that behavior and shows
+the individual events and counts.
+
+Multiple events are created from a single event specification when:
+1. Prefix or glob matching is used for the PMU name.
+2. Aliases, which are listed immediately after the Kernel PMU events
+   by perf list, are used.
+
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
+EXAMPLES
+--------
+
+$ perf stat -- make -j
+
+ Performance counter stats for 'make -j':
+
+    8117.370256  task clock ticks     #      11.281 CPU utilization factor
+            678  context switches     #       0.000 M/sec
+            133  CPU migrations       #       0.000 M/sec
+         235724  pagefaults           #       0.029 M/sec
+    24821162526  CPU cycles           #    3057.784 M/sec
+    18687303457  instructions         #    2302.138 M/sec
+      172158895  cache references     #      21.209 M/sec
+       27075259  cache misses         #       3.335 M/sec
+
+ Wall-clock time elapsed:   719.554352 msecs
+
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+	- optional usec time stamp in fractions of second (with -I xxx)
+	- optional CPU, core, or socket identifier
+	- optional number of logical CPUs aggregated
+	- counter value
+	- unit of the counter value or empty
+	- event name
+	- run time of counter
+	- percentage of measurement time the counter was running
+	- optional variance if multiple values are collected with -r
+	- optional metric value
+	- optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/Documentation/perf-test.txt b/Documentation/perf-test.txt
new file mode 100644
index 0000000..b329c65
--- /dev/null
+++ b/Documentation/perf-test.txt
@@ -0,0 +1,36 @@
+perf-test(1)
+============
+
+NAME
+----
+perf-test - Runs sanity tests.
+
+SYNOPSIS
+--------
+[verse]
+'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
+
+DESCRIPTION
+-----------
+This command does assorted sanity tests, initially through linked routines but
+also will look for a directory with more tests in the form of scripts.
+
+To get a list of available tests use 'perf test list', specifying a test name
+fragment will show all tests that have it.
+
+To run just specific tests, inform test name fragments or the numbers obtained
+from 'perf test list'.
+
+OPTIONS
+-------
+-s::
+--skip::
+	Tests to skip (comma separated numeric list).
+
+-v::
+--verbose::
+	Be more verbose.
+
+-F::
+--dont-fork::
+	Do not fork child for each test, run all tests within single process.
diff --git a/Documentation/perf-timechart.txt b/Documentation/perf-timechart.txt
new file mode 100644
index 0000000..ef0c756
--- /dev/null
+++ b/Documentation/perf-timechart.txt
@@ -0,0 +1,128 @@
+perf-timechart(1)
+=================
+
+NAME
+----
+perf-timechart - Tool to visualize total system behavior during a workload
+
+SYNOPSIS
+--------
+[verse]
+'perf timechart' [<timechart options>] {record} [<record options>]
+
+DESCRIPTION
+-----------
+There are two variants of perf timechart:
+
+  'perf timechart record <command>' to record the system level events
+  of an arbitrary workload. By default timechart records only scheduler
+  and CPU events (task switches, running times, CPU power states, etc),
+  but it's possible to record IO (disk, network) activity using -I argument.
+
+  'perf timechart' to turn a trace into a Scalable Vector Graphics file,
+  that can be viewed with popular SVG viewers such as 'Inkscape'. Depending
+  on the events in the perf.data file, timechart will contain scheduler/cpu
+  events or IO events.
+
+  In IO mode, every bar has two charts: upper and lower.
+  Upper bar shows incoming events (disk reads, ingress network packets).
+  Lower bar shows outgoing events (disk writes, egress network packets).
+  There are also poll bars which show how much time application spent
+  in poll/epoll/select syscalls.
+
+TIMECHART OPTIONS
+-----------------
+-o::
+--output=::
+        Select the output file (default: output.svg)
+-i::
+--input=::
+        Select the input file (default: perf.data unless stdin is a fifo)
+-w::
+--width=::
+        Select the width of the SVG file (default: 1000)
+-P::
+--power-only::
+        Only output the CPU power section of the diagram
+-T::
+--tasks-only::
+        Don't output processor state transitions
+-p::
+--process::
+        Select the processes to display, by name or PID
+-f::
+--force::
+	Don't complain, do it.
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+-n::
+--proc-num::
+        Print task info for at least given number of tasks.
+-t::
+--topology::
+        Sort CPUs according to topology.
+--highlight=<duration_nsecs|task_name>::
+	Highlight tasks (using different color) that run more than given
+	duration or tasks with given name. If number is given it's interpreted
+	as number of nanoseconds. If non-numeric string is given it's
+	interpreted as task name.
+--io-skip-eagain::
+	Don't draw EAGAIN IO events.
+--io-min-time=<nsecs>::
+	Draw small events as if they lasted min-time. Useful when you need
+	to see very small and fast IO. It's possible to specify ms or us
+	suffix to specify time in milliseconds or microseconds.
+	Default value is 1ms.
+--io-merge-dist=<nsecs>::
+	Merge events that are merge-dist nanoseconds apart.
+	Reduces number of figures on the SVG and makes it more render-friendly.
+	It's possible to specify ms or us suffix to specify time in
+	milliseconds or microseconds.
+	Default value is 1us.
+
+RECORD OPTIONS
+--------------
+-P::
+--power-only::
+        Record only power-related events
+-T::
+--tasks-only::
+        Record only tasks-related events
+-I::
+--io-only::
+        Record only io-related events
+-g::
+--callchain::
+        Do call-graph (stack chain/backtrace) recording
+
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+  [ perf record: Woken up 13 times to write data ]
+  [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+  Written 10.2 seconds of trace to output.svg.
+
+Record system-wide timechart:
+
+  $ perf timechart record
+
+  then generate timechart and highlight 'gcc' tasks:
+
+  $ perf timechart --highlight gcc
+
+Record system-wide IO events:
+
+  $ perf timechart record -I
+
+  then generate timechart:
+
+  $ perf timechart
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/Documentation/perf-top.txt b/Documentation/perf-top.txt
new file mode 100644
index 0000000..114fda1
--- /dev/null
+++ b/Documentation/perf-top.txt
@@ -0,0 +1,294 @@
+perf-top(1)
+===========
+
+NAME
+----
+perf-top - System profiling tool.
+
+SYNOPSIS
+--------
+[verse]
+'perf top' [-e <EVENT> | --event=EVENT] [<options>]
+
+DESCRIPTION
+-----------
+This command generates and displays a performance counter profile in real time.
+
+
+OPTIONS
+-------
+-a::
+--all-cpus::
+        System-wide collection.  (default)
+
+-c <count>::
+--count=<count>::
+	Event period to sample.
+
+-C <cpu-list>::
+--cpu=<cpu>::
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Default is to monitor all CPUS.
+
+-d <seconds>::
+--delay=<seconds>::
+	Number of seconds to delay between refreshes.
+
+-e <event>::
+--event=<event>::
+	Select the PMU event. Selection can be a symbolic event name
+	(use 'perf list' to list all events) or a raw PMU
+	event (eventsel+umask) in the form of rNNN where NNN is a
+	hexadecimal event descriptor.
+
+-E <entries>::
+--entries=<entries>::
+	Display this many functions.
+
+-f <count>::
+--count-filter=<count>::
+	Only display functions with more events than this.
+
+--group::
+        Put the counters into a counter group.
+
+-F <freq>::
+--freq=<freq>::
+	Profile at this frequency. Use 'max' to use the currently maximum
+	allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
+	sysctl.
+
+-i::
+--inherit::
+	Child tasks do not inherit counters.
+
+-k <path>::
+--vmlinux=<path>::
+	Path to vmlinux.  Required for annotation functionality.
+
+--ignore-vmlinux::
+	Ignore vmlinux files.
+
+-m <pages>::
+--mmap-pages=<pages>::
+	Number of mmap data pages (must be a power of two) or size
+	specification with appended unit character - B/K/M/G. The
+	size is rounded up to have nearest pages power of two value.
+
+-p <pid>::
+--pid=<pid>::
+	Profile events on existing Process ID (comma separated list).
+
+-t <tid>::
+--tid=<tid>::
+        Profile events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
+
+-r <priority>::
+--realtime=<priority>::
+	Collect data with this RT SCHED_FIFO priority.
+
+--sym-annotate=<symbol>::
+        Annotate this symbol.
+
+-K::
+--hide_kernel_symbols::
+        Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+        Hide user symbols.
+
+--demangle-kernel::
+        Demangle kernel symbols.
+
+-D::
+--dump-symtab::
+        Dump the symbol table used for profiling.
+
+-v::
+--verbose::
+	Be more verbose (show counter open errors, etc).
+
+-z::
+--zero::
+	Zero history across display updates.
+
+-s::
+--sort::
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+	local_weight, abort, in_tx, transaction, overhead, sample, period.
+	Please see description of --sort in the perf-report man page.
+
+--fields=::
+	Specify output field - multiple keys can be specified in CSV format.
+	Following fields are available:
+	overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+	Also it can contain any sort key(s).
+
+	By default, every sort keys not specified in --field will be appended
+	automatically.
+
+-n::
+--show-nr-samples::
+	Show a column with the number of samples.
+
+--show-total-period::
+	Show a column with the sum of periods.
+
+--dsos::
+	Only consider symbols in these dsos.  This option will affect the
+	percentage of the overhead column.  See --percentage for more info.
+
+--comms::
+	Only consider symbols in these comms.  This option will affect the
+	percentage of the overhead column.  See --percentage for more info.
+
+--symbols::
+	Only consider these symbols.  This option will affect the
+	percentage of the overhead column.  See --percentage for more info.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--source::
+	Interleave source code with assembly code. Enabled by default,
+	disable with --no-source.
+
+--asm-raw::
+	Show raw instruction encoding of assembly instructions.
+
+-g::
+	Enables call-graph (stack chain/backtrace) recording.
+
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+	Setup and enable call-graph (stack chain/backtrace) recording,
+	implies -g.  See `--call-graph` section in perf-record and
+	perf-report man pages for details.
+
+--children::
+	Accumulate callchain of children to parent entry so that then can
+	show up in the output.  The output will have a new "Children" column
+	and will be sorted on the data.  It requires -g/--call-graph option
+	enabled.  See the `overhead calculation' section for more details.
+	Enabled by default, disable with --no-children.
+
+--max-stack::
+	Set the stack depth limit when parsing the callchain, anything
+	beyond the specified depth will be ignored. This is a trade-off
+	between information loss and faster processing especially for
+	workloads that can have a very long callchain stack.
+
+	Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
+--percent-limit::
+	Do not show entries which have an overhead under that percent.
+	(Default: 0).
+
+--percentage::
+	Determine how to display the overhead percentage of filtered entries.
+	Filters can be applied by --comms, --dsos and/or --symbols options and
+	Zoom operations on the TUI (thread, dso, etc).
+
+	"relative" means it's relative to filtered entries only so that the
+	sum of shown entries will be always 100%. "absolute" means it retains
+	the original value before and after the filter is applied.
+
+-w::
+--column-widths=<width[,width...]>::
+	Force each column width to the provided list, for large terminal
+	readability.  0 means no limit (default behavior).
+
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take
+	a long time, because the file may be huge. A time out is needed
+	in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
+
+-b::
+--branch-any::
+	Enable taken branch stack sampling. Any type of taken branch may be sampled.
+	This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+	Enable taken branch stack sampling. Each sample captures a series of consecutive
+	taken branches. The number of branches captured with each sample depends on the
+	underlying hardware, the type of branches of interest, and the executed code.
+	It is possible to select the types of branches captured by enabling filters.
+	For a full list of modifiers please see the perf record manpage.
+
+	The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+	The privilege levels may be omitted, in which case, the privilege levels of the associated
+	event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+	levels are subject to permissions.  When sampling on multiple events, branch stack sampling
+	is enabled for all the sampling events. The sampled branch type is the same for all events.
+	The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+	Note that this feature may not be available on all processors.
+
+--raw-trace::
+	When displaying traceevent output, do not use print fmt or plugins.
+
+--hierarchy::
+	Enable hierarchy output.
+
+--force::
+	Don't do ownership validation.
+
+--num-thread-synthesize::
+	The number of threads to run when synthesizing events for existing processes.
+	By default, the number of threads equals to the number of online CPUs.
+
+INTERACTIVE PROMPTING KEYS
+--------------------------
+
+[d]::
+	Display refresh delay.
+
+[e]::
+	Number of entries to display.
+
+[E]::
+	Event to display when multiple counters are active.
+
+[f]::
+	Profile display filter (>= hit count).
+
+[F]::
+	Annotation display filter (>= % of total).
+
+[s]::
+	Annotate symbol.
+
+[S]::
+	Stop annotation, return to full profile display.
+
+[K]::
+	Hide kernel symbols.
+
+[U]::
+	Hide user symbols.
+
+[z]::
+	Toggle event count zeroing across display updates.
+
+[qQ]::
+	Quit.
+
+Pressing any unmapped key displays a menu, and prompts for input.
+
+include::callchain-overhead-calculation.txt[]
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
diff --git a/Documentation/perf-trace.txt b/Documentation/perf-trace.txt
new file mode 100644
index 0000000..115db9e
--- /dev/null
+++ b/Documentation/perf-trace.txt
@@ -0,0 +1,243 @@
+perf-trace(1)
+=============
+
+NAME
+----
+perf-trace - strace inspired tool
+
+SYNOPSIS
+--------
+[verse]
+'perf trace'
+'perf trace record'
+
+DESCRIPTION
+-----------
+This command will show the events associated with the target, initially
+syscalls, but other system events like pagefaults, task lifetime events,
+scheduling events, etc.
+
+This is a live mode tool in addition to working with perf.data files like
+the other perf tools. Files can be generated using the 'perf record' command
+but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
+Alternatively, 'perf trace record' can be used as a shortcut to
+automatically include the raw_syscalls events when writing events to a file.
+
+The following options apply to perf trace; options to perf trace record are
+found in the perf record man page.
+
+OPTIONS
+-------
+
+-a::
+--all-cpus::
+        System-wide collection from all CPUs.
+
+-e::
+--expr::
+--event::
+	List of syscalls and other perf events (tracepoints, HW cache events,
+	etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
+	See 'perf list' for a complete list of events.
+	Prefixing with ! shows all syscalls but the ones specified.  You may
+	need to escape it.
+
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-o::
+--output=::
+	Output file name.
+
+-p::
+--pid=::
+	Record events on existing process ID (comma separated list).
+
+-t::
+--tid=::
+        Record events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+        Record events in threads owned by uid. Name or number.
+
+-G::
+--cgroup::
+	Record events in threads in a cgroup.
+
+	Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
+	remove the /sys/fs/cgroup/perf_event/ part and try:
+
+		perf trace -G A -e sched:*switch
+
+	Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+	_and_ sched:sched_switch to the 'A' cgroup, while:
+
+		perf trace -e sched:*switch -G A
+
+	will only set the sched:sched_switch event to the 'A' cgroup, all the
+	other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+	a cgroup (on the root cgroup, sys wide, etc).
+
+	Multiple cgroups:
+
+		perf trace -G A -e sched:*switch -G B
+
+	the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+	to the 'B' cgroup.
+
+--filter-pids=::
+	Filter out events for these pids and for 'trace' itself (comma separated list).
+
+-v::
+--verbose=::
+        Verbosity level.
+
+--no-inherit::
+	Child tasks do not inherit counters.
+
+-m::
+--mmap-pages=::
+	Number of mmap data pages (must be a power of two) or size
+	specification with appended unit character - B/K/M/G. The
+	size is rounded up to have nearest pages power of two value.
+
+-C::
+--cpu::
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode with inheritance mode on (default), Events are captured only when
+the thread executes on the designated CPUs. Default is to monitor all CPUs.
+
+--duration::
+	Show only events that had a duration greater than N.M ms.
+
+--sched::
+	Accrue thread runtime and provide a summary at the end of the session.
+
+--failure::
+	Show only syscalls that failed, i.e. that returned < 0.
+
+-i::
+--input::
+	Process events from a given perf data file.
+
+-T::
+--time::
+	Print full timestamp rather time relative to first sample.
+
+--comm::
+        Show process COMM right beside its ID, on by default, disable with --no-comm.
+
+-s::
+--summary::
+	Show only a summary of syscalls by thread with min, max, and average times
+    (in msec) and relative stddev.
+
+-S::
+--with-summary::
+	Show all syscalls followed by a summary by thread with min, max, and
+    average times (in msec) and relative stddev.
+
+--tool_stats::
+	Show tool stats such as number of times fd->pathname was discovered thru
+	hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+
+-f::
+--force::
+	Don't complain, do it.
+
+-F=[all|min|maj]::
+--pf=[all|min|maj]::
+	Trace pagefaults. Optionally, you can specify whether you want minor,
+	major or all pagefaults. Default value is maj.
+
+--syscalls::
+	Trace system calls. This options is enabled by default, disable with
+	--no-syscalls.
+
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+        Setup and enable call-graph (stack chain/backtrace) recording.
+        See `--call-graph` section in perf-record and perf-report
+        man pages for details. The ones that are most useful in 'perf trace'
+        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
+        Using this will, for the root user, bump the value of --mmap-pages to 4
+        times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
+        sysctl. This is done only if the user doesn't specify a --mmap-pages value.
+
+--kernel-syscall-graph::
+	 Show the kernel callchains on the syscall exit path.
+
+--max-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        beyond the specified depth will be ignored. Note that at this point
+        this is just about the presentation part, i.e. the kernel is still
+        not limiting, the overhead of callchains needs to be set via the
+        knobs in --call-graph dwarf.
+
+        Implies '--call-graph dwarf' when --call-graph not present on the
+        command line, on systems where DWARF unwinding was built in.
+
+        Default: /proc/sys/kernel/perf_event_max_stack when present for
+                 live sessions (without --input/-i), 127 otherwise.
+
+--min-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        below the specified depth will be ignored. Disabled by default.
+
+        Implies '--call-graph dwarf' when --call-graph not present on the
+        command line, on systems where DWARF unwinding was built in.
+
+--print-sample::
+	Print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info for the
+	raw_syscalls:sys_{enter,exit} tracepoints, for debugging.
+
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+	because the file may be huge. A time out is needed in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
+PAGEFAULTS
+----------
+
+When tracing pagefaults, the format of the trace is as follows:
+
+<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
+
+- min/maj indicates whether fault event is minor or major;
+- ip.symbol shows symbol for instruction pointer (the code that generated the
+  fault); if no debug symbols available, perf trace will print raw IP;
+- addr.dso shows DSO for the faulted address;
+- map type is either 'd' for non-executable maps or 'x' for executable maps;
+- addr level is either 'k' for kernel dso or '.' for user dso.
+
+For symbols resolution you may need to install debugging symbols.
+
+Please be aware that duration is currently always 0 and doesn't reflect actual
+time it took for fault to be handled!
+
+When --verbose specified, perf trace tries to print all available information
+for both IP and fault address in the form of dso@symbol+offset.
+
+EXAMPLES
+--------
+
+Trace only major pagefaults:
+
+ $ perf trace --no-syscalls -F
+
+Trace syscalls, major and minor pagefaults:
+
+ $ perf trace -F all
+
+  1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
+
+  As you can see, there was major pagefault in python process, from
+  CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/Documentation/perf-version.txt b/Documentation/perf-version.txt
new file mode 100644
index 0000000..e207b7c
--- /dev/null
+++ b/Documentation/perf-version.txt
@@ -0,0 +1,24 @@
+perf-version(1)
+===============
+
+NAME
+----
+perf-version - display the version of perf binary
+
+SYNOPSIS
+--------
+'perf version' [--build-options]
+
+DESCRIPTION
+-----------
+With no options given, the 'perf version' prints the perf version
+on the standard output.
+
+If the option '--build-options' is given, then the status of
+compiled-in libraries are printed on the standard output.
+
+OPTIONS
+-------
+--build-options::
+        Prints the status of compiled-in libraries on the
+        standard output.
diff --git a/Documentation/perf.data-file-format.txt b/Documentation/perf.data-file-format.txt
new file mode 100644
index 0000000..d00f0d5
--- /dev/null
+++ b/Documentation/perf.data-file-format.txt
@@ -0,0 +1,489 @@
+perf.data format
+
+Uptodate as of v4.7
+
+This document describes the on-disk perf.data format, generated by perf record
+or perf inject and consumed by the other perf tools.
+
+On a high level perf.data contains the events generated by the PMUs, plus metadata.
+
+All fields are in native-endian of the machine that generated the perf.data.
+
+When perf is writing to a pipe it uses a special version of the file
+format that does not rely on seeking to adjust data offsets.  This
+format is described in "Pipe-mode data" section. The pipe data version can be
+augmented with additional events using perf inject.
+
+The file starts with a perf_header:
+
+struct perf_header {
+	char magic[8];		/* PERFILE2 */
+	uint64_t size;		/* size of the header */
+	uint64_t attr_size;	/* size of an attribute in attrs */
+	struct perf_file_section attrs;
+	struct perf_file_section data;
+	struct perf_file_section event_types;
+	uint64_t flags;
+	uint64_t flags1[3];
+};
+
+The magic number identifies the perf file and the version. Current perf versions
+use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
+is not described here. The magic number also identifies the endian. When the
+magic value is 64bit byte swapped compared the file is in non-native
+endian.
+
+A perf_file_section contains a pointer to another section of the perf file.
+The header contains three such pointers: for attributes, data and event types.
+
+struct perf_file_section {
+	uint64_t offset;	/* offset from start of file */
+	uint64_t size;		/* size of the section */
+};
+
+Flags section:
+
+The header is followed by different optional headers, described by the bits set
+in flags. Only headers for which the bit is set are included. Each header
+consists of a perf_file_section located after the initial header.
+The respective perf_file_section points to the data of the additional
+header and defines its size.
+
+Some headers consist of strings, which are defined like this:
+
+struct perf_header_string {
+       uint32_t len;
+       char string[len]; /* zero terminated */
+};
+
+Some headers consist of a sequence of strings, which start with a
+
+struct perf_header_string_list {
+     uint32_t nr;
+     struct perf_header_string strings[nr]; /* variable length records */
+};
+
+The bits are the flags bits in a 256 bit bitmap starting with
+flags. These define the valid bits:
+
+	HEADER_RESERVED		= 0,	/* always cleared */
+	HEADER_FIRST_FEATURE	= 1,
+	HEADER_TRACING_DATA	= 1,
+
+Describe me.
+
+	HEADER_BUILD_ID = 2,
+
+The header consists of an sequence of build_id_event. The size of each record
+is defined by header.size (see perf_event.h). Each event defines a ELF build id
+for a executable file name for a pid. An ELF build id is a unique identifier
+assigned by the linker to an executable.
+
+struct build_id_event {
+	struct perf_event_header header;
+	pid_t			 pid;
+	uint8_t			 build_id[24];
+	char			 filename[header.size - offsetof(struct build_id_event, filename)];
+};
+
+	HEADER_HOSTNAME = 3,
+
+A perf_header_string with the hostname where the data was collected
+(uname -n)
+
+	HEADER_OSRELEASE = 4,
+
+A perf_header_string with the os release where the data was collected
+(uname -r)
+
+	HEADER_VERSION = 5,
+
+A perf_header_string with the perf user tool version where the
+data was collected. This is the same as the version of the source tree
+the perf tool was built from.
+
+	HEADER_ARCH = 6,
+
+A perf_header_string with the CPU architecture (uname -m)
+
+	HEADER_NRCPUS = 7,
+
+A structure defining the number of CPUs.
+
+struct nr_cpus {
+       uint32_t nr_cpus_online;
+       uint32_t nr_cpus_available; /* CPUs not yet onlined */
+};
+
+	HEADER_CPUDESC = 8,
+
+A perf_header_string with description of the CPU. On x86 this is the model name
+in /proc/cpuinfo
+
+	HEADER_CPUID = 9,
+
+A perf_header_string with the exact CPU type. On x86 this is
+vendor,family,model,stepping. For example: GenuineIntel,6,69,1
+
+	HEADER_TOTAL_MEM = 10,
+
+An uint64_t with the total memory in bytes.
+
+	HEADER_CMDLINE = 11,
+
+A perf_header_string with the perf command line used to collect the data.
+
+	HEADER_EVENT_DESC = 12,
+
+Another description of the perf_event_attrs, more detailed than header.attrs
+including IDs and names. See perf_event.h or the man page for a description
+of a struct perf_event_attr.
+
+struct {
+       uint32_t nr; /* number of events */
+       uint32_t attr_size; /* size of each perf_event_attr */
+       struct {
+	      struct perf_event_attr attr;  /* size of attr_size */
+	      uint32_t nr_ids;
+	      struct perf_header_string event_string;
+	      uint64_t ids[nr_ids];
+       } events[nr]; /* Variable length records */
+};
+
+	HEADER_CPU_TOPOLOGY = 13,
+
+String lists defining the core and CPU threads topology.
+
+struct {
+       struct perf_header_string_list cores; /* Variable length */
+       struct perf_header_string_list threads; /* Variable length */
+};
+
+Example:
+	sibling cores   : 0-3
+	sibling threads : 0-1
+	sibling threads : 2-3
+
+	HEADER_NUMA_TOPOLOGY = 14,
+
+	A list of NUMA node descriptions
+
+struct {
+       uint32_t nr;
+       struct {
+	      uint32_t nodenr;
+	      uint64_t mem_total;
+	      uint64_t mem_free;
+	      struct perf_header_string cpus;
+       } nodes[nr]; /* Variable length records */
+};
+
+	HEADER_BRANCH_STACK = 15,
+
+Not implemented in perf.
+
+	HEADER_PMU_MAPPINGS = 16,
+
+	A list of PMU structures, defining the different PMUs supported by perf.
+
+struct {
+       uint32_t nr;
+       struct pmu {
+	      uint32_t pmu_type;
+	      struct perf_header_string pmu_name;
+       } [nr]; /* Variable length records */
+};
+
+	HEADER_GROUP_DESC = 17,
+
+	Description of counter groups ({...} in perf syntax)
+
+struct {
+         uint32_t nr;
+         struct {
+		struct perf_header_string string;
+		uint32_t leader_idx;
+		uint32_t nr_members;
+	 } [nr]; /* Variable length records */
+};
+
+	HEADER_AUXTRACE = 18,
+
+Define additional auxtrace areas in the perf.data. auxtrace is used to store
+undecoded hardware tracing information, such as Intel Processor Trace data.
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ *                               perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+	u64			file_offset;
+	u64			sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ *                         file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+	struct list_head	list;
+	size_t			nr;
+	struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+	HEADER_STAT = 19,
+
+This is merely a flag signifying that the data section contains data
+recorded from perf stat record.
+
+	HEADER_CACHE = 20,
+
+Description of the cache hierarchy. Based on the Linux sysfs format
+in /sys/devices/system/cpu/cpu*/cache/
+
+	u32 version	Currently always 1
+	u32 number_of_cache_levels
+
+struct {
+	u32	level;
+	u32	line_size;
+	u32	sets;
+	u32	ways;
+	struct perf_header_string type;
+	struct perf_header_string size;
+	struct perf_header_string map;
+}[number_of_cache_levels];
+
+	HEADER_SAMPLE_TIME = 21,
+
+Two uint64_t for the time of first sample and the time of last sample.
+
+	other bits are reserved and should ignored for now
+	HEADER_FEAT_BITS	= 256,
+
+Attributes
+
+This is an array of perf_event_attrs, each attr_size bytes long, which defines
+each event collected. See perf_event.h or the man page for a detailed
+description.
+
+Data
+
+This section is the bulk of the file. It consist of a stream of perf_events
+describing events. This matches the format generated by the kernel.
+See perf_event.h or the manpage for a detailed description.
+
+Some notes on parsing:
+
+Ordering
+
+The events are not necessarily in time stamp order, as they can be
+collected in parallel on different CPUs. If the events should be
+processed in time order they need to be sorted first. It is possible
+to only do a partial sort using the FINISHED_ROUND event header (see
+below). perf record guarantees that there is no reordering over a
+FINISHED_ROUND.
+
+ID vs IDENTIFIER
+
+When the event stream contains multiple events each event is identified
+by an ID. This can be either through the PERF_SAMPLE_ID or the
+PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
+at a fixed offset from the event header, which allows reliable
+parsing of the header. Relying on ID may be ambiguous.
+IDENTIFIER is only supported by newer Linux kernels.
+
+Perf record specific events:
+
+In addition to the kernel generated event types perf record adds its
+own event types (in addition it also synthesizes some kernel events,
+for example MMAP events)
+
+	PERF_RECORD_USER_TYPE_START		= 64,
+	PERF_RECORD_HEADER_ATTR			= 64,
+
+struct attr_event {
+	struct perf_event_header header;
+	struct perf_event_attr attr;
+	uint64_t id[];
+};
+
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+	uint64_t	event_id;
+	char	name[MAX_EVENT_NAME];
+};
+
+struct event_type_event {
+	struct perf_event_header header;
+	struct perf_trace_event_type event_type;
+};
+
+
+	PERF_RECORD_HEADER_TRACING_DATA		= 66,
+
+Describe me
+
+struct tracing_data_event {
+	struct perf_event_header header;
+	uint32_t size;
+};
+
+	PERF_RECORD_HEADER_BUILD_ID		= 67,
+
+Define a ELF build ID for a referenced executable.
+
+       struct build_id_event;   /* See above */
+
+	PERF_RECORD_FINISHED_ROUND		= 68,
+
+No event reordering over this header. No payload.
+
+	PERF_RECORD_ID_INDEX			= 69,
+
+Map event ids to CPUs and TIDs.
+
+struct id_index_entry {
+	uint64_t id;
+	uint64_t idx;
+	uint64_t cpu;
+	uint64_t tid;
+};
+
+struct id_index_event {
+	struct perf_event_header header;
+	uint64_t nr;
+	struct id_index_entry entries[nr];
+};
+
+	PERF_RECORD_AUXTRACE_INFO		= 70,
+
+Auxtrace type specific information. Describe me
+
+struct auxtrace_info_event {
+	struct perf_event_header header;
+	uint32_t type;
+	uint32_t reserved__; /* For alignment */
+	uint64_t priv[];
+};
+
+	PERF_RECORD_AUXTRACE			= 71,
+
+Defines auxtrace data. Followed by the actual data. The contents of
+the auxtrace data is dependent on the event and the CPU. For example
+for Intel Processor Trace it contains Processor Trace data generated
+by the CPU.
+
+struct auxtrace_event {
+	struct perf_event_header header;
+	uint64_t size;
+	uint64_t offset;
+	uint64_t reference;
+	uint32_t idx;
+	uint32_t tid;
+	uint32_t cpu;
+	uint32_t reserved__; /* For alignment */
+};
+
+struct aux_event {
+	struct perf_event_header header;
+	uint64_t	aux_offset;
+	uint64_t	aux_size;
+	uint64_t	flags;
+};
+
+	PERF_RECORD_AUXTRACE_ERROR		= 72,
+
+Describes an error in hardware tracing
+
+enum auxtrace_error_type {
+	PERF_AUXTRACE_ERROR_ITRACE  = 1,
+	PERF_AUXTRACE_ERROR_MAX
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+	struct perf_event_header header;
+	uint32_t type;
+	uint32_t code;
+	uint32_t cpu;
+	uint32_t pid;
+	uint32_t tid;
+	uint32_t reserved__; /* For alignment */
+	uint64_t ip;
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+	PERF_RECORD_HEADER_FEATURE		= 80,
+
+Describes a header feature. These are records used in pipe-mode that
+contain information that otherwise would be in perf.data file's header.
+
+Event types
+
+Define the event attributes with their IDs.
+
+An array bound by the perf_file_section size.
+
+	struct {
+		struct perf_event_attr attr;   /* Size defined by header.attr_size */
+		struct perf_file_section ids;
+	}
+
+ids points to a array of uint64_t defining the ids for event attr attr.
+
+Pipe-mode data
+
+Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
+from the struct perf_header. The trimmed header is:
+
+struct perf_pipe_file_header {
+	u64				magic;
+	u64				size;
+};
+
+The information about attrs, data, and event_types is instead in the
+synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
+PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
+that are generated by perf record in pipe-mode.
+
+
+References:
+
+include/uapi/linux/perf_event.h
+
+This is the canonical description of the kernel generated perf_events
+and the perf_event_attrs.
+
+perf_events manpage
+
+A manpage describing perf_event and perf_event_attr is here:
+http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
+This tends to be slightly behind the kernel include, but has better
+descriptions.  An (typically older) version of the man page may be
+included with the standard Linux man pages, available with "man
+perf_events"
+
+pmu-tools
+
+https://github.com/andikleen/pmu-tools/tree/master/parser
+
+A definition of the perf.data format in python "construct" format is available
+in pmu-tools parser. This allows to read perf.data from python and dump it.
+
+quipper
+
+The quipper C++ parser is available at
+http://github.com/google/perf_data_converter/tree/master/src/quipper
+
diff --git a/Documentation/perf.txt b/Documentation/perf.txt
new file mode 100644
index 0000000..864e375
--- /dev/null
+++ b/Documentation/perf.txt
@@ -0,0 +1,49 @@
+perf(1)
+=======
+
+NAME
+----
+perf - Performance analysis tools for Linux
+
+SYNOPSIS
+--------
+[verse]
+'perf' [--version] [--help] [OPTIONS] COMMAND [ARGS]
+
+OPTIONS
+-------
+--debug::
+	Setup debug variable (see list below) in value
+	range (0, 10). Use like:
+	  --debug verbose   # sets verbose = 1
+	  --debug verbose=2 # sets verbose = 2
+
+	List of debug variables allowed to set:
+	  verbose          - general debug messages
+	  ordered-events   - ordered events object debug messages
+	  data-convert     - data convert command debug messages
+
+--buildid-dir::
+	Setup buildid cache directory. It has higher priority than
+	buildid.dir config file option.
+
+-v::
+--version::
+  Display perf version.
+
+-h::
+--help::
+  Run perf help command.
+
+DESCRIPTION
+-----------
+Performance counters for Linux are a new kernel-based subsystem
+that provide a framework for all things performance analysis. It
+covers hardware level (CPU/PMU, Performance Monitoring Unit) features
+and software features (software counters, tracepoints) as well.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-list[1]
diff --git a/Documentation/perfconfig.example b/Documentation/perfconfig.example
new file mode 100644
index 0000000..2b477c1
--- /dev/null
+++ b/Documentation/perfconfig.example
@@ -0,0 +1,38 @@
+[colors]
+
+	# These were the old defaults
+	top = red, lightgray
+	medium = green, lightgray
+	normal = black, lightgray
+	selected = lightgray, magenta
+	jump_arrows = blue, lightgray
+	addr = magenta, lightgray
+
+[tui]
+
+	# Defaults if linked with libslang
+	report = on
+	annotate = on
+	top = on
+
+[buildid]
+
+	# Default, disable using /dev/null
+	dir = /root/.debug
+
+[annotate]
+
+	# Defaults
+	hide_src_code = false
+	use_offset = true
+	jump_arrows = true
+	show_nr_jumps = false
+
+[report]
+
+	# Defaults
+	sort-order = comm,dso,symbol
+	percent-limit = 0
+	queue-size = 0
+	children = true
+	group = true
diff --git a/Documentation/tips.txt b/Documentation/tips.txt
new file mode 100644
index 0000000..849599f
--- /dev/null
+++ b/Documentation/tips.txt
@@ -0,0 +1,36 @@
+For a higher level overview, try: perf report --sort comm,dso
+Sample related events with: perf record -e '{cycles,instructions}:S'
+Compare performance results with: perf diff [<old file> <new file>]
+Boolean options have negative forms, e.g.: perf report --no-children
+Customize output of perf script with: perf script -F event,ip,sym
+Generate a script for your data: perf script -g <lang>
+Save output of perf stat using: perf stat record <target workload>
+Create an archive with symtabs to analyse on other machine: perf archive
+Search options using a keyword: perf report -h <keyword>
+Use parent filter to see specific call path: perf report -p <regex>
+List events using substring match: perf list <keyword>
+To see list of saved events and attributes: perf evlist -v
+Use --symfs <dir> if your symbol files are in non-standard locations
+To see callchains in a more compact form: perf report -g folded
+Show individual samples with: perf script
+Limit to show entries above 5% only: perf report --percent-limit 5
+Profiling branch (mis)predictions with: perf record -b / perf report
+Treat branches as callchains: perf report --branch-history
+To count events in every 1000 msec: perf stat -I 1000
+Print event counts in CSV format with: perf stat -x,
+If you have debuginfo enabled, try: perf report -s sym,srcline
+For memory address profiling, try: perf mem record / perf mem report
+For tracepoint events, try: perf report -s trace_fields
+To record callchains for each sample: perf record -g
+To record every process run by a user: perf record -u <user>
+Skip collecting build-id when recording: perf record -B
+To change sampling frequency to 100 Hz: perf record -F 100
+See assembly instructions with percentage: perf annotate <symbol>
+If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy
+Order by the overhead of source file name and line number: perf report -s srcline
+System-wide collection from all CPUs: perf record -a
+Show current config key-value pairs: perf config --list
+Show user configuration overrides: perf config --user --list
+To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
+To report cacheline events from previous recording: perf c2c report
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..627b7ca
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,18 @@
+tools/perf
+tools/arch
+tools/scripts
+tools/build
+tools/include
+tools/lib/traceevent
+tools/lib/api
+tools/lib/bpf
+tools/lib/subcmd
+tools/lib/hweight.c
+tools/lib/rbtree.c
+tools/lib/string.c
+tools/lib/symbol/kallsyms.c
+tools/lib/symbol/kallsyms.h
+tools/lib/find_bit.c
+tools/lib/bitmap.c
+tools/lib/str_error_r.c
+tools/lib/vsprintf.c
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2254544
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is a simple wrapper Makefile that calls the main Makefile.perf
+# with a -j option to do parallel builds
+#
+# If you want to invoke the perf build in some non-standard way then
+# you can use the 'make -f Makefile.perf' method to invoke it.
+#
+
+#
+# Clear out the built-in rules GNU make defines by default (such as .o targets),
+# so that we pass through all targets to Makefile.perf:
+#
+.SUFFIXES:
+
+#
+# We don't want to pass along options like -j:
+#
+unexport MAKEFLAGS
+
+#
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
+#
+# (To override it, run 'make JOBS=1' and similar.)
+#
+ifeq ($(JOBS),)
+  JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(JOBS),0)
+    JOBS := 1
+  endif
+endif
+
+#
+# Only pass canonical directory names as the output directory:
+#
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+endif
+
+#
+# Only accept the 'DEBUG' variable from the command line:
+#
+ifeq ("$(origin DEBUG)", "command line")
+  ifeq ($(DEBUG),)
+    override DEBUG = 0
+  else
+    SET_DEBUG = "DEBUG=$(DEBUG)"
+  endif
+else
+  override DEBUG = 0
+endif
+
+define print_msg
+  @printf '  BUILD:   Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+endef
+
+define make
+  @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
+endef
+
+#
+# Needed if no target specified:
+# (Except for tags and TAGS targets. The reason is that the
+# Makefile does not treat tags/TAGS as targets but as files
+# and thus won't rebuilt them once they are in place.)
+#
+all tags TAGS:
+	$(print_msg)
+	$(make)
+
+ifdef MAKECMDGOALS
+has_clean := 0
+ifneq ($(filter clean,$(MAKECMDGOALS)),)
+  has_clean := 1
+endif # clean
+
+ifeq ($(has_clean),1)
+  rest := $(filter-out clean,$(MAKECMDGOALS))
+  ifneq ($(rest),)
+$(rest): clean
+  endif # rest
+endif # has_clean
+endif # MAKECMDGOALS
+
+#
+# The clean target is not really parallel, don't print the jobs info:
+#
+clean:
+	$(make)
+
+#
+# The build-test target is not really parallel, don't print the jobs info,
+# it also uses only the tests/make targets that don't pollute the source
+# repository, i.e. that uses O= or builds the tarpkg outside the source
+# repo directories.
+#
+# For a full test, use:
+#
+# make -C tools/perf -f tests/make
+#
+build-test:
+	@$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
+
+#
+# All other targets get passed through:
+#
+%: FORCE
+	$(print_msg)
+	$(make)
+
+.PHONY: tags TAGS FORCE Makefile
diff --git a/Makefile.config b/Makefile.config
new file mode 100644
index 0000000..ae7dc46
--- /dev/null
+++ b/Makefile.config
@@ -0,0 +1,1007 @@
+
+ifeq ($(src-perf),)
+src-perf := $(srctree)/tools/perf
+endif
+
+ifeq ($(obj-perf),)
+obj-perf := $(OUTPUT)
+endif
+
+ifneq ($(obj-perf),)
+obj-perf := $(abspath $(obj-perf))/
+endif
+
+$(shell printf "" > $(OUTPUT).config-detected)
+detected     = $(shell echo "$(1)=y"       >> $(OUTPUT).config-detected)
+detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
+
+CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+
+include $(srctree)/tools/scripts/Makefile.arch
+
+$(call detected_var,SRCARCH)
+
+NO_PERF_REGS := 1
+NO_SYSCALL_TABLE := 1
+
+# Additional ARCH settings for ppc
+ifeq ($(SRCARCH),powerpc)
+  NO_PERF_REGS := 0
+  NO_SYSCALL_TABLE := 0
+  CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
+  LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
+endif
+
+# Additional ARCH settings for x86
+ifeq ($(SRCARCH),x86)
+  $(call detected,CONFIG_X86)
+  ifeq (${IS_64_BIT}, 1)
+    NO_SYSCALL_TABLE := 0
+    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated
+    ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
+    LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
+    $(call detected,CONFIG_X86_64)
+  else
+    LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
+  endif
+  NO_PERF_REGS := 0
+endif
+
+ifeq ($(SRCARCH),arm)
+  NO_PERF_REGS := 0
+  LIBUNWIND_LIBS = -lunwind -lunwind-arm
+endif
+
+ifeq ($(SRCARCH),arm64)
+  NO_PERF_REGS := 0
+  LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
+endif
+
+ifeq ($(ARCH),s390)
+  NO_PERF_REGS := 0
+  NO_SYSCALL_TABLE := 0
+  CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
+endif
+
+ifeq ($(NO_PERF_REGS),0)
+  $(call detected,CONFIG_PERF_REGS)
+endif
+
+ifneq ($(NO_SYSCALL_TABLE),1)
+  CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
+endif
+
+# So far there's only x86 and arm libdw unwind support merged in perf.
+# Disable it on all other architectures in case libdw unwind
+# support is detected in system. Add supported architectures
+# to the check.
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390))
+  NO_LIBDW_DWARF_UNWIND := 1
+endif
+
+ifeq ($(LIBUNWIND_LIBS),)
+  NO_LIBUNWIND := 1
+endif
+#
+# For linking with debug library, run like:
+#
+#   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+#
+
+libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code))
+define libunwind_arch_set_flags_code
+  FEATURE_CHECK_CFLAGS-libunwind-$(1)  = -I$(LIBUNWIND_DIR)/include
+  FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib
+endef
+
+ifdef LIBUNWIND_DIR
+  LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
+  LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+  LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
+  $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
+endif
+
+# Set per-feature check compilation flags
+FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
+FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
+
+ifdef CSINCLUDES
+  LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
+endif
+OPENCSDLIBS := -lopencsd_c_api -lopencsd
+ifdef CSLIBS
+  LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
+endif
+FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS)
+
+ifeq ($(NO_PERF_REGS),0)
+  CFLAGS += -DHAVE_PERF_REGS_SUPPORT
+endif
+
+# for linking with debug library, run like:
+# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ifdef LIBDW_DIR
+  LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+  LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+endif
+DWARFLIBS := -ldw
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+  DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2
+endif
+FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS)
+
+# for linking with debug library, run like:
+# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+ifdef LIBBABELTRACE_DIR
+  LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
+  LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
+# include ARCH specific config
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
+
+ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+  CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+endif
+
+include $(srctree)/tools/scripts/utilities.mak
+
+ifeq ($(call get-executable,$(FLEX)),)
+  dummy := $(error Error: $(FLEX) is missing on this system, please install it)
+endif
+
+ifeq ($(call get-executable,$(BISON)),)
+  dummy := $(error Error: $(BISON) is missing on this system, please install it)
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+  CFLAGS += -Werror
+  CXXFLAGS += -Werror
+endif
+
+ifndef DEBUG
+  DEBUG := 0
+endif
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+  CFLAGS += -O3
+else
+  CFLAGS += -O6
+endif
+endif
+
+ifdef PARSER_DEBUG
+  PARSER_DEBUG_BISON := -t
+  PARSER_DEBUG_FLEX  := -d
+  CFLAGS             += -DPARSER_DEBUG
+  $(call detected_var,PARSER_DEBUG_BISON)
+  $(call detected_var,PARSER_DEBUG_FLEX)
+endif
+
+# Try different combinations to accommodate systems that only have
+# python[2][-config] in weird combinations but always preferring
+# python2 and python2-config as per pep-0394. If we catch a
+# python[-config] in version 3, the version check will kill it.
+PYTHON2 := $(if $(call get-executable,python2),python2,python)
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
+PYTHON2_CONFIG := \
+  $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+override PYTHON_CONFIG := \
+  $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+
+grep-libs  = $(filter -l%,$(1))
+strip-libs  = $(filter-out -l%,$(1))
+
+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+
+ifdef PYTHON_CONFIG
+  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+  PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
+  PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
+  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+  FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+endif
+
+FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
+
+CFLAGS += -fno-omit-frame-pointer
+CFLAGS += -ggdb3
+CFLAGS += -funwind-tables
+CFLAGS += -Wall
+CFLAGS += -Wextra
+CFLAGS += -std=gnu99
+
+CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti
+CXXFLAGS += -Wall
+CXXFLAGS += -fno-omit-frame-pointer
+CXXFLAGS += -ggdb3
+CXXFLAGS += -funwind-tables
+CXXFLAGS += -Wno-strict-aliasing
+
+# Enforce a non-executable stack, as we may regress (again) in the future by
+# adding assembler files missing the .GNU-stack linker note.
+LDFLAGS += -Wl,-z,noexecstack
+
+EXTLIBS = -lpthread -lrt -lm -ldl
+
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+
+ifeq ($(feature-stackprotector-all), 1)
+  CFLAGS += -fstack-protector-all
+endif
+
+ifeq ($(DEBUG),0)
+  ifeq ($(feature-fortify-source), 1)
+    CFLAGS += -D_FORTIFY_SOURCE=2
+  endif
+endif
+
+INC_FLAGS += -I$(src-perf)/util/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
+INC_FLAGS += -I$(srctree)/tools/include/uapi
+INC_FLAGS += -I$(srctree)/tools/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
+
+# $(obj-perf)      for generated common-cmds.h
+# $(obj-perf)/util for generated bison/flex headers
+ifneq ($(OUTPUT),)
+INC_FLAGS += -I$(obj-perf)/util
+INC_FLAGS += -I$(obj-perf)
+endif
+
+INC_FLAGS += -I$(src-perf)/util
+INC_FLAGS += -I$(src-perf)
+INC_FLAGS += -I$(srctree)/tools/lib/
+
+CFLAGS   += $(INC_FLAGS)
+CXXFLAGS += $(INC_FLAGS)
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+ifeq ($(feature-sync-compare-and-swap), 1)
+  CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
+endif
+
+ifeq ($(feature-pthread-attr-setaffinity-np), 1)
+  CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP
+endif
+
+ifeq ($(feature-pthread-barrier), 1)
+  CFLAGS += -DHAVE_PTHREAD_BARRIER
+endif
+
+ifndef NO_BIONIC
+  $(call feature_check,bionic)
+  ifeq ($(feature-bionic), 1)
+    BIONIC := 1
+    EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
+    EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+  endif
+endif
+
+ifdef NO_LIBELF
+  NO_DWARF := 1
+  NO_DEMANGLE := 1
+  NO_LIBUNWIND := 1
+  NO_LIBDW_DWARF_UNWIND := 1
+  NO_LIBBPF := 1
+  NO_JVMTI := 1
+else
+  ifeq ($(feature-libelf), 0)
+    ifeq ($(feature-glibc), 1)
+      LIBC_SUPPORT := 1
+    endif
+    ifeq ($(BIONIC),1)
+      LIBC_SUPPORT := 1
+    endif
+    ifeq ($(LIBC_SUPPORT),1)
+      msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel);
+
+      NO_LIBELF := 1
+      NO_DWARF := 1
+      NO_DEMANGLE := 1
+      NO_LIBUNWIND := 1
+      NO_LIBDW_DWARF_UNWIND := 1
+      NO_LIBBPF := 1
+      NO_JVMTI := 1
+    else
+      ifneq ($(filter s% -static%,$(LDFLAGS),),)
+        msg := $(error No static glibc found, please install glibc-static);
+      else
+        msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
+      endif
+    endif
+  else
+    ifndef NO_LIBDW_DWARF_UNWIND
+      ifneq ($(feature-libdw-dwarf-unwind),1)
+        NO_LIBDW_DWARF_UNWIND := 1
+        msg := $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR);
+      endif
+    endif
+    ifneq ($(feature-dwarf), 1)
+      ifndef NO_DWARF
+        msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
+        NO_DWARF := 1
+      endif
+    else
+      ifneq ($(feature-dwarf_getlocations), 1)
+        msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
+      else
+        CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT
+      endif # dwarf_getlocations
+    endif # Dwarf support
+  endif # libelf support
+endif # NO_LIBELF
+
+ifeq ($(feature-glibc), 1)
+  CFLAGS += -DHAVE_GLIBC_SUPPORT
+endif
+
+ifdef NO_DWARF
+  NO_LIBDW_DWARF_UNWIND := 1
+endif
+
+ifeq ($(feature-sched_getcpu), 1)
+  CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
+endif
+
+ifeq ($(feature-setns), 1)
+  CFLAGS += -DHAVE_SETNS_SUPPORT
+  $(call detected,CONFIG_SETNS)
+endif
+
+ifndef NO_CORESIGHT
+  ifeq ($(feature-libopencsd), 1)
+    CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
+    LDFLAGS += $(LIBOPENCSD_LDFLAGS)
+    EXTLIBS += $(OPENCSDLIBS)
+    $(call detected,CONFIG_LIBOPENCSD)
+    ifdef CSTRACE_RAW
+      CFLAGS += -DCS_DEBUG_RAW
+      ifeq (${CSTRACE_RAW}, packed)
+        CFLAGS += -DCS_RAW_PACKED
+      endif
+    endif
+  endif
+endif
+
+ifndef NO_LIBELF
+  CFLAGS += -DHAVE_LIBELF_SUPPORT
+  EXTLIBS += -lelf
+  $(call detected,CONFIG_LIBELF)
+
+  ifeq ($(feature-libelf-mmap), 1)
+    CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+  endif
+
+  ifeq ($(feature-libelf-getphdrnum), 1)
+    CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+  endif
+
+  ifeq ($(feature-libelf-gelf_getnote), 1)
+    CFLAGS += -DHAVE_GELF_GETNOTE_SUPPORT
+  else
+    msg := $(warning gelf_getnote() not found on libelf, SDT support disabled);
+  endif
+
+  ifeq ($(feature-libelf-getshdrstrndx), 1)
+    CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
+  endif
+
+  ifndef NO_DWARF
+    ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
+      msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
+      NO_DWARF := 1
+    else
+      CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
+      LDFLAGS += $(LIBDW_LDFLAGS)
+      EXTLIBS += ${DWARFLIBS}
+      $(call detected,CONFIG_DWARF)
+    endif # PERF_HAVE_DWARF_REGS
+  endif # NO_DWARF
+
+  ifndef NO_LIBBPF
+    ifeq ($(feature-bpf), 1)
+      CFLAGS += -DHAVE_LIBBPF_SUPPORT
+      $(call detected,CONFIG_LIBBPF)
+    endif
+
+    ifndef NO_DWARF
+      ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+        CFLAGS += -DHAVE_BPF_PROLOGUE
+        $(call detected,CONFIG_BPF_PROLOGUE)
+      else
+        msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
+      endif
+    else
+      msg := $(warning DWARF support is off, BPF prologue is disabled);
+    endif
+
+  endif # NO_LIBBPF
+endif # NO_LIBELF
+
+ifndef NO_SDT
+  ifneq ($(feature-sdt), 1)
+    msg := $(warning No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev);
+    NO_SDT := 1;
+  else
+    CFLAGS += -DHAVE_SDT_EVENT
+    $(call detected,CONFIG_SDT_EVENT)
+  endif
+endif
+
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_LIBELF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
+ifeq ($(SRCARCH),powerpc)
+  ifndef NO_DWARF
+    CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
+  endif
+endif
+
+ifndef NO_LIBUNWIND
+  have_libunwind :=
+
+  ifeq ($(feature-libunwind-x86), 1)
+    $(call detected,CONFIG_LIBUNWIND_X86)
+    CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
+    LDFLAGS += -lunwind-x86
+    EXTLIBS_LIBUNWIND += -lunwind-x86
+    have_libunwind = 1
+  endif
+
+  ifeq ($(feature-libunwind-aarch64), 1)
+    $(call detected,CONFIG_LIBUNWIND_AARCH64)
+    CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
+    LDFLAGS += -lunwind-aarch64
+    EXTLIBS_LIBUNWIND += -lunwind-aarch64
+    have_libunwind = 1
+    $(call feature_check,libunwind-debug-frame-aarch64)
+    ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
+      msg := $(warning No debug_frame support found in libunwind-aarch64);
+      CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
+    endif
+  endif
+
+  ifneq ($(feature-libunwind), 1)
+    msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+    NO_LOCAL_LIBUNWIND := 1
+  else
+    have_libunwind := 1
+    $(call detected,CONFIG_LOCAL_LIBUNWIND)
+  endif
+
+  ifneq ($(have_libunwind), 1)
+    NO_LIBUNWIND := 1
+  endif
+else
+  NO_LOCAL_LIBUNWIND := 1
+endif
+
+ifndef NO_LIBBPF
+  ifneq ($(feature-bpf), 1)
+    msg := $(warning BPF API too old. Please install recent kernel headers. BPF support in 'perf record' is disabled.)
+    NO_LIBBPF := 1
+  endif
+endif
+
+dwarf-post-unwind := 1
+dwarf-post-unwind-text := BUG
+
+# setup DWARF post unwinder
+ifdef NO_LIBUNWIND
+  ifdef NO_LIBDW_DWARF_UNWIND
+    msg := $(warning Disabling post unwind, no support found.);
+    dwarf-post-unwind := 0
+  else
+    dwarf-post-unwind-text := libdw
+    $(call detected,CONFIG_LIBDW_DWARF_UNWIND)
+  endif
+else
+  dwarf-post-unwind-text := libunwind
+  $(call detected,CONFIG_LIBUNWIND)
+  # Enable libunwind support by default.
+  ifndef NO_LIBDW_DWARF_UNWIND
+    NO_LIBDW_DWARF_UNWIND := 1
+  endif
+endif
+
+ifeq ($(dwarf-post-unwind),1)
+  CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT
+  $(call detected,CONFIG_DWARF_UNWIND)
+else
+  NO_DWARF_UNWIND := 1
+endif
+
+ifndef NO_LOCAL_LIBUNWIND
+  ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
+    $(call feature_check,libunwind-debug-frame)
+    ifneq ($(feature-libunwind-debug-frame), 1)
+      msg := $(warning No debug_frame support found in libunwind);
+      CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+    endif
+  else
+    # non-ARM has no dwarf_find_debug_frame() function:
+    CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+  endif
+  EXTLIBS += $(LIBUNWIND_LIBS)
+  LDFLAGS += $(LIBUNWIND_LIBS)
+endif
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+  # gcc -static links libgcc_eh which contans piece of libunwind
+  LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
+endif
+
+ifndef NO_LIBUNWIND
+  CFLAGS  += -DHAVE_LIBUNWIND_SUPPORT
+  CFLAGS  += $(LIBUNWIND_CFLAGS)
+  LDFLAGS += $(LIBUNWIND_LDFLAGS)
+  EXTLIBS += $(EXTLIBS_LIBUNWIND)
+endif
+
+ifeq ($(NO_SYSCALL_TABLE),0)
+  $(call detected,CONFIG_TRACE)
+else
+  ifndef NO_LIBAUDIT
+    ifneq ($(feature-libaudit), 1)
+      msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
+      NO_LIBAUDIT := 1
+    else
+      CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
+      EXTLIBS += -laudit
+      $(call detected,CONFIG_TRACE)
+    endif
+  endif
+endif
+
+ifndef NO_LIBCRYPTO
+  ifneq ($(feature-libcrypto), 1)
+    msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
+    NO_LIBCRYPTO := 1
+  else
+    CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
+    EXTLIBS += -lcrypto
+    $(call detected,CONFIG_CRYPTO)
+  endif
+endif
+
+ifdef NO_NEWT
+  NO_SLANG=1
+endif
+
+ifndef NO_SLANG
+  ifneq ($(feature-libslang), 1)
+    msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
+    NO_SLANG := 1
+  else
+    # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
+    CFLAGS += -I/usr/include/slang
+    CFLAGS += -DHAVE_SLANG_SUPPORT
+    EXTLIBS += -lslang
+    $(call detected,CONFIG_SLANG)
+  endif
+endif
+
+ifndef NO_GTK2
+  FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
+  ifneq ($(feature-gtk2), 1)
+    msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
+    NO_GTK2 := 1
+  else
+    ifeq ($(feature-gtk2-infobar), 1)
+      GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT
+    endif
+    CFLAGS += -DHAVE_GTK2_SUPPORT
+    GTK_CFLAGS += $(shell $(PKG_CONFIG) --cflags gtk+-2.0 2>/dev/null)
+    GTK_LIBS := $(shell $(PKG_CONFIG) --libs gtk+-2.0 2>/dev/null)
+    EXTLIBS += -ldl
+  endif
+endif
+
+ifdef NO_LIBPERL
+  CFLAGS += -DNO_LIBPERL
+else
+  PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
+  PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
+  PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
+  PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+  PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+  FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+
+  ifneq ($(feature-libperl), 1)
+    CFLAGS += -DNO_LIBPERL
+    NO_LIBPERL := 1
+    msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev);
+  else
+    LDFLAGS += $(PERL_EMBED_LDFLAGS)
+    EXTLIBS += $(PERL_EMBED_LIBADD)
+    CFLAGS += -DHAVE_LIBPERL_SUPPORT
+    $(call detected,CONFIG_LIBPERL)
+  endif
+endif
+
+ifeq ($(feature-timerfd), 1)
+  CFLAGS += -DHAVE_TIMERFD_SUPPORT
+else
+  msg := $(warning No timerfd support. Disables 'perf kvm stat live');
+endif
+
+disable-python = $(eval $(disable-python_code))
+define disable-python_code
+  CFLAGS += -DNO_LIBPYTHON
+  $(warning $1)
+  NO_LIBPYTHON := 1
+endef
+
+ifdef NO_LIBPYTHON
+  $(call disable-python,Python support disabled by user)
+else
+
+  ifndef PYTHON
+    $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev)
+  else
+    PYTHON_WORD := $(call shell-wordify,$(PYTHON))
+
+    ifndef PYTHON_CONFIG
+      $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
+    else
+
+      ifneq ($(feature-libpython), 1)
+        $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
+      else
+         LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
+         EXTLIBS += $(PYTHON_EMBED_LIBADD)
+         LANG_BINDINGS += $(obj-perf)python/perf.so
+         CFLAGS += -DHAVE_LIBPYTHON_SUPPORT
+         $(call detected,CONFIG_LIBPYTHON)
+      endif
+    endif
+  endif
+endif
+
+ifeq ($(feature-libbfd), 1)
+  EXTLIBS += -lbfd
+
+  # call all detections now so we get correct
+  # status in VF output
+  $(call feature_check,liberty)
+  $(call feature_check,liberty-z)
+  $(call feature_check,cplus-demangle)
+
+  ifeq ($(feature-liberty), 1)
+    EXTLIBS += -liberty
+  else
+    ifeq ($(feature-liberty-z), 1)
+      EXTLIBS += -liberty -lz
+    endif
+  endif
+endif
+
+ifdef NO_DEMANGLE
+  CFLAGS += -DNO_DEMANGLE
+else
+  ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
+    EXTLIBS += -liberty
+    CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
+  else
+    ifneq ($(feature-libbfd), 1)
+      ifneq ($(feature-liberty), 1)
+        ifneq ($(feature-liberty-z), 1)
+          # we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
+          # or any of 'bfd iberty z' trinity
+          ifeq ($(feature-cplus-demangle), 1)
+            EXTLIBS += -liberty
+            CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
+          else
+            msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
+            CFLAGS += -DNO_DEMANGLE
+          endif
+        endif
+      endif
+    endif
+  endif
+endif
+
+ifneq ($(filter -lbfd,$(EXTLIBS)),)
+  CFLAGS += -DHAVE_LIBBFD_SUPPORT
+endif
+
+ifndef NO_ZLIB
+  ifeq ($(feature-zlib), 1)
+    CFLAGS += -DHAVE_ZLIB_SUPPORT
+    EXTLIBS += -lz
+    $(call detected,CONFIG_ZLIB)
+  else
+    NO_ZLIB := 1
+  endif
+endif
+
+ifndef NO_LZMA
+  ifeq ($(feature-lzma), 1)
+    CFLAGS += -DHAVE_LZMA_SUPPORT
+    EXTLIBS += -llzma
+    $(call detected,CONFIG_LZMA)
+  else
+    msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev);
+    NO_LZMA := 1
+  endif
+endif
+
+ifndef NO_BACKTRACE
+  ifeq ($(feature-backtrace), 1)
+    CFLAGS += -DHAVE_BACKTRACE_SUPPORT
+  endif
+endif
+
+ifndef NO_LIBNUMA
+  ifeq ($(feature-libnuma), 0)
+    msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev);
+    NO_LIBNUMA := 1
+  else
+    ifeq ($(feature-numa_num_possible_cpus), 0)
+      msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8);
+      NO_LIBNUMA := 1
+    else
+      CFLAGS += -DHAVE_LIBNUMA_SUPPORT
+      EXTLIBS += -lnuma
+      $(call detected,CONFIG_NUMA)
+    endif
+  endif
+endif
+
+ifdef HAVE_KVM_STAT_SUPPORT
+    CFLAGS += -DHAVE_KVM_STAT_SUPPORT
+endif
+
+ifeq (${IS_64_BIT}, 1)
+  ifndef NO_PERF_READ_VDSO32
+    $(call feature_check,compile-32)
+    ifeq ($(feature-compile-32), 1)
+      CFLAGS += -DHAVE_PERF_READ_VDSO32
+    else
+      NO_PERF_READ_VDSO32 := 1
+    endif
+  endif
+  ifneq ($(SRCARCH), x86)
+    NO_PERF_READ_VDSOX32 := 1
+  endif
+  ifndef NO_PERF_READ_VDSOX32
+    $(call feature_check,compile-x32)
+    ifeq ($(feature-compile-x32), 1)
+      CFLAGS += -DHAVE_PERF_READ_VDSOX32
+    else
+      NO_PERF_READ_VDSOX32 := 1
+    endif
+  endif
+else
+  NO_PERF_READ_VDSO32 := 1
+  NO_PERF_READ_VDSOX32 := 1
+endif
+
+ifndef NO_LIBBABELTRACE
+  $(call feature_check,libbabeltrace)
+  ifeq ($(feature-libbabeltrace), 1)
+    CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
+    LDFLAGS += $(LIBBABELTRACE_LDFLAGS)
+    EXTLIBS += -lbabeltrace-ctf
+    $(call detected,CONFIG_LIBBABELTRACE)
+  else
+    msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
+  endif
+endif
+
+ifndef NO_AUXTRACE
+  ifeq ($(SRCARCH),x86)
+    ifeq ($(feature-get_cpuid), 0)
+      msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+      NO_AUXTRACE := 1
+    endif
+  endif
+  ifndef NO_AUXTRACE
+    $(call detected,CONFIG_AUXTRACE)
+    CFLAGS += -DHAVE_AUXTRACE_SUPPORT
+  endif
+endif
+
+ifndef NO_JVMTI
+  ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+    JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
+  else
+    ifneq (,$(wildcard /usr/sbin/alternatives))
+      JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+    endif
+  endif
+  ifndef JDIR
+    $(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory)
+    NO_JVMTI := 1
+  endif
+endif
+
+ifndef NO_JVMTI
+  FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux
+  $(call feature_check,jvmti)
+  ifeq ($(feature-jvmti), 1)
+    $(call detected_var,JDIR)
+  else
+    $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel)
+    NO_JVMTI := 1
+  endif
+endif
+
+USE_CXX = 0
+USE_CLANGLLVM = 0
+ifdef LIBCLANGLLVM
+  $(call feature_check,cxx)
+  ifneq ($(feature-cxx), 1)
+    msg := $(warning No g++ found, disable clang and llvm support. Please install g++)
+  else
+    $(call feature_check,llvm)
+    $(call feature_check,llvm-version)
+    ifneq ($(feature-llvm), 1)
+      msg := $(warning No suitable libLLVM found, disabling builtin clang and LLVM support. Please install llvm-dev(el) (>= 3.9.0))
+    else
+      $(call feature_check,clang)
+      ifneq ($(feature-clang), 1)
+        msg := $(warning No suitable libclang found, disabling builtin clang and LLVM support. Please install libclang-dev(el) (>= 3.9.0))
+      else
+        CFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT
+        CXXFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT -I$(shell $(LLVM_CONFIG) --includedir)
+        $(call detected,CONFIG_CXX)
+        $(call detected,CONFIG_CLANGLLVM)
+	USE_CXX = 1
+	USE_LLVM = 1
+	USE_CLANG = 1
+        ifneq ($(feature-llvm-version),1)
+          msg := $(warning This version of LLVM is not tested. May cause build errors)
+        endif
+      endif
+    endif
+  endif
+endif
+
+# Among the variables below, these:
+#   perfexecdir
+#   template_dir
+#   mandir
+#   infodir
+#   htmldir
+#   ETC_PERFCONFIG (but not sysconfdir)
+# can be specified as a relative path some/where/else;
+# this is interpreted as relative to $(prefix) and "perf" at
+# runtime figures out where they are based on the path to the executable.
+# This can help installing the suite in a relocatable way.
+
+# Make the path relative to DESTDIR, not to prefix
+ifndef DESTDIR
+prefix ?= $(HOME)
+endif
+bindir_relative = bin
+bindir = $(abspath $(prefix)/$(bindir_relative))
+mandir = share/man
+infodir = share/info
+perfexecdir = libexec/perf-core
+sharedir = $(prefix)/share
+template_dir = share/perf-core/templates
+STRACE_GROUPS_DIR = share/perf-core/strace/groups
+htmldir = share/doc/perf-doc
+tipdir = share/doc/perf-tip
+srcdir = $(srctree)/tools/perf
+ifeq ($(prefix),/usr)
+sysconfdir = /etc
+ETC_PERFCONFIG = $(sysconfdir)/perfconfig
+else
+sysconfdir = $(prefix)/etc
+ETC_PERFCONFIG = etc/perfconfig
+endif
+ifndef lib
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
+lib = lib64
+else
+lib = lib
+endif
+endif # lib
+libdir = $(prefix)/$(lib)
+
+# Shell quote (do not use $(call) to accommodate ancient setups);
+ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
+STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR))
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+mandir_SQ = $(subst ','\'',$(mandir))
+infodir_SQ = $(subst ','\'',$(infodir))
+perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+template_dir_SQ = $(subst ','\'',$(template_dir))
+htmldir_SQ = $(subst ','\'',$(htmldir))
+tipdir_SQ = $(subst ','\'',$(tipdir))
+prefix_SQ = $(subst ','\'',$(prefix))
+sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
+libdir_SQ = $(subst ','\'',$(libdir))
+srcdir_SQ = $(subst ','\'',$(srcdir))
+
+ifneq ($(filter /%,$(firstword $(perfexecdir))),)
+perfexec_instdir = $(perfexecdir)
+STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
+tip_instdir = $(tipdir)
+else
+perfexec_instdir = $(prefix)/$(perfexecdir)
+STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
+tip_instdir = $(prefix)/$(tipdir)
+endif
+perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
+tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
+
+# If we install to $(HOME) we keep the traceevent default:
+# $(HOME)/.traceevent/plugins
+# Otherwise we install plugins into the global $(libdir).
+ifdef DESTDIR
+plugindir=$(libdir)/traceevent/plugins
+plugindir_SQ= $(subst ','\'',$(plugindir))
+endif
+
+print_var = $(eval $(print_var_code)) $(info $(MSG))
+define print_var_code
+    MSG = $(shell printf '...%30s: %s' $(1) $($(1)))
+endef
+
+ifeq ($(VF),1)
+  # Display EXTRA features which are detected manualy
+  # from here with feature_check call and thus cannot
+  # be partof global state output.
+  $(foreach feat,$(FEATURE_TESTS_EXTRA),$(call feature_print_status,$(feat),))
+  $(call print_var,prefix)
+  $(call print_var,bindir)
+  $(call print_var,libdir)
+  $(call print_var,sysconfdir)
+  $(call print_var,LIBUNWIND_DIR)
+  $(call print_var,LIBDW_DIR)
+  $(call print_var,JDIR)
+
+  ifeq ($(dwarf-post-unwind),1)
+    $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
+  endif
+  $(info )
+endif
+
+$(call detected_var,bindir_SQ)
+$(call detected_var,PYTHON_WORD)
+ifneq ($(OUTPUT),)
+$(call detected_var,OUTPUT)
+endif
+$(call detected_var,htmldir_SQ)
+$(call detected_var,infodir_SQ)
+$(call detected_var,mandir_SQ)
+$(call detected_var,ETC_PERFCONFIG_SQ)
+$(call detected_var,STRACE_GROUPS_DIR_SQ)
+$(call detected_var,prefix_SQ)
+$(call detected_var,perfexecdir_SQ)
+$(call detected_var,tipdir_SQ)
+$(call detected_var,srcdir_SQ)
+$(call detected_var,LIBDIR)
+$(call detected_var,GTK_CFLAGS)
+$(call detected_var,PERL_EMBED_CCOPTS)
+$(call detected_var,PYTHON_EMBED_CCOPTS)
diff --git a/Makefile.perf b/Makefile.perf
new file mode 100644
index 0000000..83e453d
--- /dev/null
+++ b/Makefile.perf
@@ -0,0 +1,887 @@
+include ../scripts/Makefile.include
+
+# The default target of this Makefile is...
+all:
+
+include ../scripts/utilities.mak
+
+# Define V to have a more verbose compile.
+#
+# Define VF to have a more verbose feature check output.
+#
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
+#
+# Define EXCLUDE_EXTLIBS=-lmylib to exclude libmylib from the auto-generated
+# EXTLIBS.
+#
+# Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS.
+#
+# Define NO_DWARF if you do not want debug-info analysis feature at all.
+#
+# Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support. (deprecated)
+#
+# Define NO_SLANG if you do not want TUI support.
+#
+# Define NO_GTK2 if you do not want GTK+ GUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+#
+# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+#
+# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
+# backtrace post unwind.
+#
+# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
+#
+# Define NO_LIBAUDIT if you do not want libaudit support
+#
+# Define NO_LIBBIONIC if you do not want bionic support
+#
+# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
+# used for generating build-ids for ELFs generated by jitdump.
+#
+# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
+# for dwarf backtrace post unwind.
+#
+# Define NO_PERF_READ_VDSO32 if you do not want to build perf-read-vdso32
+# for reading the 32-bit compatibility VDSO in 64-bit mode
+#
+# Define NO_PERF_READ_VDSOX32 if you do not want to build perf-read-vdsox32
+# for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode
+#
+# Define NO_ZLIB if you do not want to support compressed kernel modules
+#
+# Define NO_LIBBABELTRACE if you do not want libbabeltrace support
+# for CTF data format.
+#
+# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
+#
+# Define NO_AUXTRACE if you do not want AUX area tracing support
+#
+# Define NO_LIBBPF if you do not want BPF support
+#
+# Define NO_SDT if you do not want to define SDT event in perf tools,
+# note that it doesn't disable SDT scanning support.
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
+#
+# Define NO_JVMTI if you do not want jvmti agent built
+#
+# Define LIBCLANGLLVM if you DO want builtin clang and llvm support.
+# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
+# llvm-config is not in $PATH.
+
+# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding.
+
+# As per kernel Makefile, avoid funny character set dependencies
+unexport LC_ALL
+LC_COLLATE=C
+LC_NUMERIC=C
+export LC_COLLATE LC_NUMERIC
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+# Adding $(OUTPUT) as a directory to look for source files,
+# because use generated output files as sources dependency
+# for flex/bison parsers.
+VPATH += $(OUTPUT)
+export VPATH
+endif
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+LD += $(EXTRA_LDFLAGS)
+
+HOSTCC  ?= gcc
+HOSTLD  ?= ld
+HOSTAR  ?= ar
+
+PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+LLVM_CONFIG ?= llvm-config
+
+RM      = rm -f
+LN      = ln -f
+MKDIR   = mkdir
+FIND    = find
+INSTALL = install
+FLEX    ?= flex
+BISON   ?= bison
+STRIP   = strip
+AWK     = awk
+
+# include Makefile.config by default and rule out
+# non-config cases
+config := 1
+
+NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
+
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+  config := 0
+endif
+endif
+
+# The fixdep build - we force fixdep tool to be built as
+# the first target in the separate make session not to be
+# disturbed by any parallel make jobs. Once fixdep is done
+# we issue the requested build with FIXDEP=1 variable.
+#
+# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
+# targets, because it's not necessary.
+
+ifdef FIXDEP
+  force_fixdep := 0
+else
+  force_fixdep := $(config)
+endif
+
+export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
+export HOSTCC HOSTLD HOSTAR
+
+include $(srctree)/tools/build/Makefile.include
+
+ifeq ($(force_fixdep),1)
+goals := $(filter-out all sub-make, $(MAKECMDGOALS))
+
+$(goals) all: sub-make
+
+sub-make: fixdep
+	@./check-headers.sh
+	$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
+
+else # force_fixdep
+
+LIB_DIR         = $(srctree)/tools/lib/api/
+TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+BPF_DIR         = $(srctree)/tools/lib/bpf/
+SUBCMD_DIR      = $(srctree)/tools/lib/subcmd/
+
+# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
+# Without this setting the output feature dump file misses some features, for
+# example, liberty. Select all checkers so we won't get an incomplete feature
+# dump file.
+ifeq ($(config),1)
+ifdef MAKECMDGOALS
+ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
+FEATURE_TESTS := all
+endif
+endif
+include Makefile.config
+endif
+
+ifeq ($(config),0)
+include $(srctree)/tools/scripts/Makefile.arch
+-include arch/$(SRCARCH)/Makefile
+endif
+
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(realpath $(FEATURES_DUMP))
+endif
+
+export prefix bindir sharedir sysconfdir DESTDIR
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+# Guard against environment variables
+PYRF_OBJS =
+SCRIPT_SH =
+
+SCRIPT_SH += perf-archive.sh
+SCRIPT_SH += perf-with-kcore.sh
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+ifneq ($(OUTPUT),)
+  TE_PATH=$(OUTPUT)
+  BPF_PATH=$(OUTPUT)
+  SUBCMD_PATH=$(OUTPUT)
+ifneq ($(subdir),)
+  API_PATH=$(OUTPUT)/../lib/api/
+else
+  API_PATH=$(OUTPUT)
+endif
+else
+  TE_PATH=$(TRACE_EVENT_DIR)
+  API_PATH=$(LIB_DIR)
+  BPF_PATH=$(BPF_DIR)
+  SUBCMD_PATH=$(SUBCMD_DIR)
+endif
+
+LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+export LIBTRACEEVENT
+
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+
+#
+# The static build has no dynsym table, so this does not work for
+# static build. Looks like linker starts to scream about that now
+# (in Fedora 26) so we need to switch it off for static build.
+DYNAMIC_LIST_LDFLAGS               = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
+
+LIBAPI = $(API_PATH)libapi.a
+export LIBAPI
+
+LIBBPF = $(BPF_PATH)libbpf.a
+
+LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
+
+# python extension build directories
+PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
+
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+
+PROGRAMS += $(OUTPUT)perf
+
+ifndef NO_PERF_READ_VDSO32
+PROGRAMS += $(OUTPUT)perf-read-vdso32
+endif
+
+ifndef NO_PERF_READ_VDSOX32
+PROGRAMS += $(OUTPUT)perf-read-vdsox32
+endif
+
+LIBJVMTI = libperf-jvmti.so
+
+ifndef NO_JVMTI
+PROGRAMS += $(OUTPUT)$(LIBJVMTI)
+endif
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = $(OUTPUT)perf
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+  SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+  PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+LIB_FILE=$(OUTPUT)libperf.a
+
+PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
+ifndef NO_LIBBPF
+  PERFLIBS += $(LIBBPF)
+endif
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain.  If
+# we had "elif" things would have been much nicer...
+
+ifneq ($(OUTPUT),)
+  CFLAGS += -I$(OUTPUT)
+endif
+
+ifndef NO_GTK2
+  ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
+  GTK_IN := $(OUTPUT)gtk-in.o
+endif
+
+ifdef ASCIIDOC8
+  export ASCIIDOC8
+endif
+
+EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
+LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
+ifeq ($(USE_CLANG), 1)
+  CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
+  CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
+  LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
+  LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
+endif
+
+ifeq ($(USE_LLVM), 1)
+  LIBLLVM = $(shell $(LLVM_CONFIG) --libs all) $(shell $(LLVM_CONFIG) --system-libs)
+  LIBS += -L$(shell $(LLVM_CONFIG) --libdir) $(LIBLLVM)
+endif
+
+ifeq ($(USE_CXX), 1)
+  LIBS += -lstdc++
+endif
+
+export INSTALL SHELL_PATH
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+beauty_outdir := $(OUTPUT)trace/beauty/generated
+beauty_ioctl_outdir := $(beauty_outdir)/ioctl
+drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c
+drm_hdr_dir := $(srctree)/tools/include/uapi/drm
+drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
+
+$(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
+	$(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
+
+pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
+asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
+pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+
+$(pkey_alloc_access_rights_array): $(asm_generic_hdr_dir)/mman-common.h $(pkey_alloc_access_rights_tbl)
+	$(Q)$(SHELL) '$(pkey_alloc_access_rights_tbl)' $(asm_generic_hdr_dir) > $@
+
+sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c
+sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound
+sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+
+$(sndrv_ctl_ioctl_array): $(sndrv_ctl_hdr_dir)/asound.h $(sndrv_ctl_ioctl_tbl)
+	$(Q)$(SHELL) '$(sndrv_ctl_ioctl_tbl)' $(sndrv_ctl_hdr_dir) > $@
+
+sndrv_pcm_ioctl_array := $(beauty_ioctl_outdir)/sndrv_pcm_ioctl_array.c
+sndrv_pcm_hdr_dir := $(srctree)/tools/include/uapi/sound
+sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+
+$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl)
+	$(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@
+
+kcmp_type_array := $(beauty_outdir)/kcmp_type_array.c
+kcmp_hdr_dir := $(srctree)/tools/include/uapi/linux/
+kcmp_type_tbl := $(srctree)/tools/perf/trace/beauty/kcmp_type.sh
+
+$(kcmp_type_array): $(kcmp_hdr_dir)/kcmp.h $(kcmp_type_tbl)
+	$(Q)$(SHELL) '$(kcmp_type_tbl)' $(kcmp_hdr_dir) > $@
+
+kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c
+kvm_hdr_dir := $(srctree)/tools/include/uapi/linux
+kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
+
+$(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
+	$(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
+
+vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
+vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
+vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+
+$(vhost_virtio_ioctl_array): $(vhost_virtio_hdr_dir)/vhost.h $(vhost_virtio_ioctl_tbl)
+	$(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(vhost_virtio_hdr_dir) > $@
+
+perf_ioctl_array := $(beauty_ioctl_outdir)/perf_ioctl_array.c
+perf_hdr_dir := $(srctree)/tools/include/uapi/linux
+perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh
+
+$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl)
+	$(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@
+
+madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c
+madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
+madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
+
+$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
+	$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
+
+prctl_option_array := $(beauty_outdir)/prctl_option_array.c
+prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
+prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
+
+$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
+	$(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
+
+arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c
+arch_errno_hdr_dir := $(srctree)/tools
+arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
+
+$(arch_errno_name_array): $(arch_errno_tbl)
+	$(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
+
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+	$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
+        CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+	  $(PYTHON_WORD) util/setup.py \
+	  --quiet build_ext; \
+	mkdir -p $(OUTPUT)python && \
+	cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+	$(Q)$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) $(OUTPUT)perf
+	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
+
+PERF_IN := $(OUTPUT)perf-in.o
+
+JEVENTS       := $(OUTPUT)pmu-events/jevents
+JEVENTS_IN    := $(OUTPUT)pmu-events/jevents-in.o
+
+PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
+
+export JEVENTS
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+$(PERF_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=perf
+
+$(JEVENTS_IN): FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents
+
+$(JEVENTS): $(JEVENTS_IN)
+	$(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@
+
+$(PMU_EVENTS_IN): $(JEVENTS) FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
+
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+		$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
+
+$(GTK_IN): FORCE
+	$(Q)$(MAKE) $(build)=gtk
+
+$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
+	$(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+
+$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
+	$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(SCRIPTS) : % : %.sh
+	$(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
+
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
+	$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+	$(Q)touch $(OUTPUT)PERF-VERSION-FILE
+
+# These can record PERF_VERSION
+perf.spec $(SCRIPTS) \
+	: $(OUTPUT)PERF-VERSION-FILE
+
+.SUFFIXES:
+
+#
+# If a target does not match any of the later rules then prefix it by $(OUTPUT)
+# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
+#
+ifneq ($(OUTPUT),)
+%.o: $(OUTPUT)%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+pmu-events/%.o: $(OUTPUT)pmu-events/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+util/%.o: $(OUTPUT)util/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+bench/%.o: $(OUTPUT)bench/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+tests/%.o: $(OUTPUT)tests/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+endif
+
+# These two need to be here so that when O= is not used they take precedence
+# over the general rule for .o
+
+# get relative building directory (to $(OUTPUT))
+# and '.' if it's $(OUTPUT) itself
+__build-dir = $(subst $(OUTPUT),,$(dir $@))
+build-dir   = $(if $(__build-dir),$(__build-dir),.)
+
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \
+	$(pkey_alloc_access_rights_array) \
+	$(sndrv_pcm_ioctl_array) \
+	$(sndrv_ctl_ioctl_array) \
+	$(kcmp_type_array) \
+	$(kvm_ioctl_array) \
+	$(vhost_virtio_ioctl_array) \
+	$(madvise_behavior_array) \
+	$(perf_ioctl_array) \
+	$(prctl_option_array) \
+	$(arch_errno_name_array)
+
+$(OUTPUT)%.o: %.c prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.i: %.c prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.s: %.c prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%-bison.o: %.c prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%-flex.o: %.c prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.o: %.S prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.i: %.S prepare FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+ifndef NO_PERF_READ_VDSO32
+$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-vdso-map.c
+	$(QUIET_CC)$(CC) -m32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
+endif
+
+ifndef NO_PERF_READ_VDSOX32
+$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
+	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
+endif
+
+ifndef NO_JVMTI
+LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
+
+$(LIBJVMTI_IN): FORCE
+	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
+
+$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
+	$(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $<
+endif
+
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
+
+LIBPERF_IN := $(OUTPUT)libperf-in.o
+
+$(LIBPERF_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=libperf
+
+$(LIB_FILE): $(LIBPERF_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
+
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+
+$(LIBTRACEEVENT): FORCE
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
+
+libtraceevent_plugins: FORCE
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
+
+$(LIBTRACEEVENT)-clean:
+	$(call QUIET_CLEAN, libtraceevent)
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+
+install-traceevent-plugins: libtraceevent_plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
+
+$(LIBAPI): FORCE
+	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+
+$(LIBAPI)-clean:
+	$(call QUIET_CLEAN, libapi)
+	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+
+$(LIBBPF): FORCE
+	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
+
+$(LIBBPF)-clean:
+	$(call QUIET_CLEAN, libbpf)
+	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
+
+$(LIBSUBCMD): FORCE
+	$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
+
+$(LIBSUBCMD)-clean:
+	$(call QUIET_CLEAN, libsubcmd)
+	$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
+
+help:
+	@echo 'Perf make targets:'
+	@echo '  doc		- make *all* documentation (see below)'
+	@echo '  man		- make manpage documentation (access with man <foo>)'
+	@echo '  html		- make html documentation'
+	@echo '  info		- make GNU info documentation (access with info <foo>)'
+	@echo '  pdf		- make pdf documentation'
+	@echo '  TAGS		- use etags to make tag information for source browsing'
+	@echo '  tags		- use ctags to make tag information for source browsing'
+	@echo '  cscope	- use cscope to make interactive browsing database'
+	@echo ''
+	@echo 'Perf install targets:'
+	@echo '  NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+	@echo '  HINT: use "prefix" or "DESTDIR" to install to a particular'
+	@echo '        path like "make prefix=/usr/local install install-doc"'
+	@echo '  install	- install compiled binaries'
+	@echo '  install-doc	- install *all* documentation'
+	@echo '  install-man	- install manpage documentation'
+	@echo '  install-html	- install html documentation'
+	@echo '  install-info	- install GNU info documentation'
+	@echo '  install-pdf	- install pdf documentation'
+	@echo ''
+	@echo '  quick-install-doc	- alias for quick-install-man'
+	@echo '  quick-install-man	- install the documentation quickly'
+	@echo '  quick-install-html	- install the html documentation quickly'
+	@echo ''
+	@echo 'Perf maintainer targets:'
+	@echo '  clean			- clean all binary objects and build output'
+
+
+DOC_TARGETS := doc man html info pdf
+
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
+
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+
+TAG_FOLDERS= . ../lib ../include
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
+TAGS:
+	$(QUIET_GEN)$(RM) TAGS; \
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
+
+tags:
+	$(QUIET_GEN)$(RM) tags; \
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
+
+cscope:
+	$(QUIET_GEN)$(RM) cscope*; \
+	$(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
+
+### Testing rules
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+check: $(OUTPUT)common-cmds.h
+	if sparse; \
+	then \
+		for i in *.c */*.c; \
+		do \
+			sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+		done; \
+	else \
+		exit 1; \
+	fi
+
+### Installation rules
+
+ifndef NO_GTK2
+install-gtk: $(OUTPUT)libperf-gtk.so
+	$(call QUIET_INSTALL, 'GTK UI') \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+		$(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
+else
+install-gtk:
+endif
+
+install-tools: all install-gtk
+	$(call QUIET_INSTALL, binaries) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
+		$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
+		$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+ifndef NO_PERF_READ_VDSO32
+	$(call QUIET_INSTALL, perf-read-vdso32) \
+		$(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)';
+endif
+ifndef NO_PERF_READ_VDSOX32
+	$(call QUIET_INSTALL, perf-read-vdsox32) \
+		$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
+endif
+ifndef NO_JVMTI
+	$(call QUIET_INSTALL, $(LIBJVMTI)) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+		$(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
+endif
+	$(call QUIET_INSTALL, libexec) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(call QUIET_INSTALL, perf-archive) \
+		$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(call QUIET_INSTALL, perf-with-kcore) \
+		$(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBAUDIT
+	$(call QUIET_INSTALL, strace/groups) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
+		$(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
+endif
+ifndef NO_LIBPERL
+	$(call QUIET_INSTALL, perl-scripts) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
+		$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
+	$(call QUIET_INSTALL, python-scripts) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
+		$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
+		$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
+	$(call QUIET_INSTALL, perf_completion-script) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
+		$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+	$(call QUIET_INSTALL, perf-tip) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
+		$(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
+
+install-tests: all install-gtk
+	$(call QUIET_INSTALL, tests) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+		$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+		$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
+		$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
+		$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
+
+install-bin: install-tools install-tests install-traceevent-plugins
+
+install: install-bin try-install-man
+
+install-python_ext:
+	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+
+### Cleaning rules
+
+#
+# This is here, not in Makefile.config, because Makefile.config does
+# not get included for the clean target:
+#
+config-clean:
+	$(call QUIET_CLEAN, config)
+	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
+
+python-clean:
+	$(python-clean)
+
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
+	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
+	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)$(RM) $(OUTPUT).config-detected
+	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
+	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
+		$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
+		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
+		$(OUTPUT)pmu-events/pmu-events.c \
+		$(OUTPUT)$(madvise_behavior_array) \
+		$(OUTPUT)$(drm_ioctl_array) \
+		$(OUTPUT)$(pkey_alloc_access_rights_array) \
+		$(OUTPUT)$(sndrv_ctl_ioctl_array) \
+		$(OUTPUT)$(sndrv_pcm_ioctl_array) \
+		$(OUTPUT)$(kvm_ioctl_array) \
+		$(OUTPUT)$(kcmp_type_array) \
+		$(OUTPUT)$(vhost_virtio_ioctl_array) \
+		$(OUTPUT)$(perf_ioctl_array) \
+		$(OUTPUT)$(prctl_option_array) \
+		$(OUTPUT)$(arch_errno_name_array)
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+	@cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+	@echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+	@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+    GIT-HEAD-PHONY = ../../.git/HEAD
+else
+    GIT-HEAD-PHONY =
+endif
+
+FORCE:
+
+.PHONY: all install clean config-clean strip install-gtk
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
+.PHONY: libtraceevent_plugins archheaders
+
+endif # force_fixdep
diff --git a/arch/Build b/arch/Build
new file mode 100644
index 0000000..d9b6af8
--- /dev/null
+++ b/arch/Build
@@ -0,0 +1,2 @@
+libperf-y += common.o
+libperf-y += $(SRCARCH)/
diff --git a/arch/alpha/Build b/arch/alpha/Build
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/arch/alpha/Build
@@ -0,0 +1 @@
+# empty
diff --git a/arch/arm/Build b/arch/arm/Build
new file mode 100644
index 0000000..41bf61d
--- /dev/null
+++ b/arch/arm/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
new file mode 100644
index 0000000..18b1351
--- /dev/null
+++ b/arch/arm/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+PERF_HAVE_JITDUMP := 1
diff --git a/arch/arm/annotate/instructions.c b/arch/arm/annotate/instructions.c
new file mode 100644
index 0000000..f64516d
--- /dev/null
+++ b/arch/arm/annotate/instructions.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <regex.h>
+
+struct arm_annotate {
+	regex_t call_insn,
+		jump_insn;
+};
+
+static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	struct arm_annotate *arm = arch->priv;
+	struct ins_ops *ops;
+	regmatch_t match[2];
+
+	if (!regexec(&arm->call_insn, name, 2, match, 0))
+		ops = &call_ops;
+	else if (!regexec(&arm->jump_insn, name, 2, match, 0))
+		ops = &jump_ops;
+	else
+		return NULL;
+
+	arch__associate_ins_ops(arch, name, ops);
+	return ops;
+}
+
+static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	struct arm_annotate *arm;
+	int err;
+
+	if (arch->initialized)
+		return 0;
+
+	arm = zalloc(sizeof(*arm));
+	if (!arm)
+		return -1;
+
+#define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)"
+	err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED);
+	if (err)
+		goto out_free_arm;
+	err = regcomp(&arm->jump_insn, "^bx?" ARM_CONDS "?$", REG_EXTENDED);
+	if (err)
+		goto out_free_call;
+#undef ARM_CONDS
+
+	arch->initialized = true;
+	arch->priv	  = arm;
+	arch->associate_instruction_ops   = arm__associate_instruction_ops;
+	arch->objdump.comment_char	  = ';';
+	arch->objdump.skip_functions_char = '+';
+	return 0;
+
+out_free_call:
+	regfree(&arm->call_insn);
+out_free_arm:
+	free(arm);
+	return -1;
+}
diff --git a/arch/arm/include/arch-tests.h b/arch/arm/include/arch-tests.h
new file mode 100644
index 0000000..90ec4c8
--- /dev/null
+++ b/arch/arm/include/arch-tests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/arch/arm/include/dwarf-regs-table.h b/arch/arm/include/dwarf-regs-table.h
new file mode 100644
index 0000000..5a45046
--- /dev/null
+++ b/arch/arm/include/dwarf-regs-table.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const arm_regstr_tbl[] = {
+	"%r0", "%r1", "%r2", "%r3", "%r4",
+	"%r5", "%r6", "%r7", "%r8", "%r9", "%r10",
+	"%fp", "%ip", "%sp", "%lr", "%pc",
+};
+#endif
diff --git a/arch/arm/include/perf_regs.h b/arch/arm/include/perf_regs.h
new file mode 100644
index 0000000..ed20e02
--- /dev/null
+++ b/arch/arm/include/perf_regs.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK	((1ULL << PERF_REG_ARM_MAX) - 1)
+#define PERF_REGS_MAX	PERF_REG_ARM_MAX
+#define PERF_SAMPLE_REGS_ABI	PERF_SAMPLE_REGS_ABI_32
+
+#define PERF_REG_IP	PERF_REG_ARM_PC
+#define PERF_REG_SP	PERF_REG_ARM_SP
+
+static inline const char *perf_reg_name(int id)
+{
+	switch (id) {
+	case PERF_REG_ARM_R0:
+		return "r0";
+	case PERF_REG_ARM_R1:
+		return "r1";
+	case PERF_REG_ARM_R2:
+		return "r2";
+	case PERF_REG_ARM_R3:
+		return "r3";
+	case PERF_REG_ARM_R4:
+		return "r4";
+	case PERF_REG_ARM_R5:
+		return "r5";
+	case PERF_REG_ARM_R6:
+		return "r6";
+	case PERF_REG_ARM_R7:
+		return "r7";
+	case PERF_REG_ARM_R8:
+		return "r8";
+	case PERF_REG_ARM_R9:
+		return "r9";
+	case PERF_REG_ARM_R10:
+		return "r10";
+	case PERF_REG_ARM_FP:
+		return "fp";
+	case PERF_REG_ARM_IP:
+		return "ip";
+	case PERF_REG_ARM_SP:
+		return "sp";
+	case PERF_REG_ARM_LR:
+		return "lr";
+	case PERF_REG_ARM_PC:
+		return "pc";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/arch/arm/tests/Build b/arch/arm/tests/Build
new file mode 100644
index 0000000..883c57f
--- /dev/null
+++ b/arch/arm/tests/Build
@@ -0,0 +1,4 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/arch/arm/tests/arch-tests.c b/arch/arm/tests/arch-tests.c
new file mode 100644
index 0000000..5b1543c
--- /dev/null
+++ b/arch/arm/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "DWARF unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+};
diff --git a/arch/arm/tests/dwarf-unwind.c b/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 0000000..8cb3477
--- /dev/null
+++ b/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+			 struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_ARM_SP];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/arch/arm/tests/regs_load.S b/arch/arm/tests/regs_load.S
new file mode 100644
index 0000000..6e2495c
--- /dev/null
+++ b/arch/arm/tests/regs_load.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+#define R0 0x00
+#define R1 0x08
+#define R2 0x10
+#define R3 0x18
+#define R4 0x20
+#define R5 0x28
+#define R6 0x30
+#define R7 0x38
+#define R8 0x40
+#define R9 0x48
+#define SL 0x50
+#define FP 0x58
+#define IP 0x60
+#define SP 0x68
+#define LR 0x70
+#define PC 0x78
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ *   is called using a bl instruction, the PC value is taken from LR.
+ * The built-in unwinding test then unwinds the call stack from the dwarf
+ * information in unwind__get_entries.
+ *
+ * Notes:
+ * - the 8 bytes stride in the registers offsets comes from the fact
+ * that the registers are stored in an u64 array (u64 *regs),
+ * - the regs buffer needs to be zeroed before the call to this function,
+ * in this case using a calloc in dwarf-unwind.c.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+	str r0, [r0, #R0]
+	str r1, [r0, #R1]
+	str r2, [r0, #R2]
+	str r3, [r0, #R3]
+	str r4, [r0, #R4]
+	str r5, [r0, #R5]
+	str r6, [r0, #R6]
+	str r7, [r0, #R7]
+	str r8, [r0, #R8]
+	str r9, [r0, #R9]
+	str sl, [r0, #SL]
+	str fp, [r0, #FP]
+	str ip, [r0, #IP]
+	str sp, [r0, #SP]
+	str lr, [r0, #LR]
+	str lr, [r0, #PC]	// store pc as lr in order to skip the call
+	                        //  to this function
+	mov pc, lr
+ENDPROC(perf_regs_load)
diff --git a/arch/arm/util/Build b/arch/arm/util/Build
new file mode 100644
index 0000000..e64c5f2
--- /dev/null
+++ b/arch/arm/util/Build
@@ -0,0 +1,6 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
diff --git a/arch/arm/util/auxtrace.c b/arch/arm/util/auxtrace.c
new file mode 100644
index 0000000..1ce6bdb
--- /dev/null
+++ b/arch/arm/util/auxtrace.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <stdbool.h>
+#include <linux/coresight-pmu.h>
+
+#include "../../util/auxtrace.h"
+#include "../../util/evlist.h"
+#include "../../util/pmu.h"
+#include "cs-etm.h"
+#include "arm-spe.h"
+
+static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
+{
+	struct perf_pmu **arm_spe_pmus = NULL;
+	int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	/* arm_spe_xxxxxxxxx\0 */
+	char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
+
+	arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
+	if (!arm_spe_pmus) {
+		pr_err("spes alloc failed\n");
+		*err = -ENOMEM;
+		return NULL;
+	}
+
+	for (i = 0; i < nr_cpus; i++) {
+		ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
+		if (ret < 0) {
+			pr_err("sprintf failed\n");
+			*err = -ENOMEM;
+			return NULL;
+		}
+
+		arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
+		if (arm_spe_pmus[*nr_spes]) {
+			pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
+				 __func__, __LINE__, *nr_spes,
+				 arm_spe_pmus[*nr_spes]->type,
+				 arm_spe_pmus[*nr_spes]->name);
+			(*nr_spes)++;
+		}
+	}
+
+	return arm_spe_pmus;
+}
+
+struct auxtrace_record
+*auxtrace_record__init(struct perf_evlist *evlist, int *err)
+{
+	struct perf_pmu	*cs_etm_pmu;
+	struct perf_evsel *evsel;
+	bool found_etm = false;
+	bool found_spe = false;
+	static struct perf_pmu **arm_spe_pmus = NULL;
+	static int nr_spes = 0;
+	int i = 0;
+
+	if (!evlist)
+		return NULL;
+
+	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+	if (!arm_spe_pmus)
+		arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (cs_etm_pmu &&
+		    evsel->attr.type == cs_etm_pmu->type)
+			found_etm = true;
+
+		if (!nr_spes)
+			continue;
+
+		for (i = 0; i < nr_spes; i++) {
+			if (evsel->attr.type == arm_spe_pmus[i]->type) {
+				found_spe = true;
+				break;
+			}
+		}
+	}
+
+	if (found_etm && found_spe) {
+		pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
+		*err = -EOPNOTSUPP;
+		return NULL;
+	}
+
+	if (found_etm)
+		return cs_etm_record_init(err);
+
+#if defined(__aarch64__)
+	if (found_spe)
+		return arm_spe_recording_init(err, arm_spe_pmus[i]);
+#endif
+
+	/*
+	 * Clear 'err' even if we haven't found an event - that way perf
+	 * record can still be used even if tracers aren't present.  The NULL
+	 * return value will take care of telling the infrastructure HW tracing
+	 * isn't available.
+	 */
+	*err = 0;
+	return NULL;
+}
diff --git a/arch/arm/util/cs-etm.c b/arch/arm/util/cs-etm.c
new file mode 100644
index 0000000..2f595cd
--- /dev/null
+++ b/arch/arm/util/cs-etm.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <api/fs/fs.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/coresight-pmu.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include "cs-etm.h"
+#include "../../perf.h"
+#include "../../util/auxtrace.h"
+#include "../../util/cpumap.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/pmu.h"
+#include "../../util/thread_map.h"
+#include "../../util/cs-etm.h"
+
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#define ENABLE_SINK_MAX	128
+#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
+
+struct cs_etm_recording {
+	struct auxtrace_record	itr;
+	struct perf_pmu		*cs_etm_pmu;
+	struct perf_evlist	*evlist;
+	bool			snapshot_mode;
+	size_t			snapshot_size;
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+
+static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
+					 struct record_opts *opts,
+					 const char *str)
+{
+	struct cs_etm_recording *ptr =
+				container_of(itr, struct cs_etm_recording, itr);
+	unsigned long long snapshot_size = 0;
+	char *endptr;
+
+	if (str) {
+		snapshot_size = strtoull(str, &endptr, 0);
+		if (*endptr || snapshot_size > SIZE_MAX)
+			return -1;
+	}
+
+	opts->auxtrace_snapshot_mode = true;
+	opts->auxtrace_snapshot_size = snapshot_size;
+	ptr->snapshot_size = snapshot_size;
+
+	return 0;
+}
+
+static int cs_etm_recording_options(struct auxtrace_record *itr,
+				    struct perf_evlist *evlist,
+				    struct record_opts *opts)
+{
+	struct cs_etm_recording *ptr =
+				container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct perf_evsel *evsel, *cs_etm_evsel = NULL;
+	const struct cpu_map *cpus = evlist->cpus;
+	bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+
+	ptr->evlist = evlist;
+	ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == cs_etm_pmu->type) {
+			if (cs_etm_evsel) {
+				pr_err("There may be only one %s event\n",
+				       CORESIGHT_ETM_PMU_NAME);
+				return -EINVAL;
+			}
+			evsel->attr.freq = 0;
+			evsel->attr.sample_period = 1;
+			cs_etm_evsel = evsel;
+			opts->full_auxtrace = true;
+		}
+	}
+
+	/* no need to continue if at least one event of interest was found */
+	if (!cs_etm_evsel)
+		return 0;
+
+	if (opts->use_clockid) {
+		pr_err("Cannot use clockid (-k option) with %s\n",
+		       CORESIGHT_ETM_PMU_NAME);
+		return -EINVAL;
+	}
+
+	/* we are in snapshot mode */
+	if (opts->auxtrace_snapshot_mode) {
+		/*
+		 * No size were given to '-S' or '-m,', so go with
+		 * the default
+		 */
+		if (!opts->auxtrace_snapshot_size &&
+		    !opts->auxtrace_mmap_pages) {
+			if (privileged) {
+				opts->auxtrace_mmap_pages = MiB(4) / page_size;
+			} else {
+				opts->auxtrace_mmap_pages =
+							KiB(128) / page_size;
+				if (opts->mmap_pages == UINT_MAX)
+					opts->mmap_pages = KiB(256) / page_size;
+			}
+		} else if (!opts->auxtrace_mmap_pages && !privileged &&
+						opts->mmap_pages == UINT_MAX) {
+			opts->mmap_pages = KiB(256) / page_size;
+		}
+
+		/*
+		 * '-m,xyz' was specified but no snapshot size, so make the
+		 * snapshot size as big as the auxtrace mmap area.
+		 */
+		if (!opts->auxtrace_snapshot_size) {
+			opts->auxtrace_snapshot_size =
+				opts->auxtrace_mmap_pages * (size_t)page_size;
+		}
+
+		/*
+		 * -Sxyz was specified but no auxtrace mmap area, so make the
+		 * auxtrace mmap area big enough to fit the requested snapshot
+		 * size.
+		 */
+		if (!opts->auxtrace_mmap_pages) {
+			size_t sz = opts->auxtrace_snapshot_size;
+
+			sz = round_up(sz, page_size) / page_size;
+			opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+		}
+
+		/* Snapshost size can't be bigger than the auxtrace area */
+		if (opts->auxtrace_snapshot_size >
+				opts->auxtrace_mmap_pages * (size_t)page_size) {
+			pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+			       opts->auxtrace_snapshot_size,
+			       opts->auxtrace_mmap_pages * (size_t)page_size);
+			return -EINVAL;
+		}
+
+		/* Something went wrong somewhere - this shouldn't happen */
+		if (!opts->auxtrace_snapshot_size ||
+		    !opts->auxtrace_mmap_pages) {
+			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+			return -EINVAL;
+		}
+	}
+
+	/* We are in full trace mode but '-m,xyz' wasn't specified */
+	if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+
+	}
+
+	/* Validate auxtrace_mmap_pages provided by user */
+	if (opts->auxtrace_mmap_pages) {
+		unsigned int max_page = (KiB(128) / page_size);
+		size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+		if (!privileged &&
+		    opts->auxtrace_mmap_pages > max_page) {
+			opts->auxtrace_mmap_pages = max_page;
+			pr_err("auxtrace too big, truncating to %d\n",
+			       max_page);
+		}
+
+		if (!is_power_of_2(sz)) {
+			pr_err("Invalid mmap size for %s: must be a power of 2\n",
+			       CORESIGHT_ETM_PMU_NAME);
+			return -EINVAL;
+		}
+	}
+
+	if (opts->auxtrace_snapshot_mode)
+		pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
+			  opts->auxtrace_snapshot_size);
+
+	/*
+	 * To obtain the auxtrace buffer file descriptor, the auxtrace
+	 * event must come first.
+	 */
+	perf_evlist__to_front(evlist, cs_etm_evsel);
+
+	/*
+	 * In the case of per-cpu mmaps, we need the CPU on the
+	 * AUX event.
+	 */
+	if (!cpu_map__empty(cpus))
+		perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+
+	/* Add dummy event to keep tracking */
+	if (opts->full_auxtrace) {
+		struct perf_evsel *tracking_evsel;
+		int err;
+
+		err = parse_events(evlist, "dummy:u", NULL);
+		if (err)
+			return err;
+
+		tracking_evsel = perf_evlist__last(evlist);
+		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+		tracking_evsel->attr.freq = 0;
+		tracking_evsel->attr.sample_period = 1;
+
+		/* In per-cpu case, always need the time of mmap events etc */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(tracking_evsel, TIME);
+	}
+
+	return 0;
+}
+
+static u64 cs_etm_get_config(struct auxtrace_record *itr)
+{
+	u64 config = 0;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct perf_evlist *evlist = ptr->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == cs_etm_pmu->type) {
+			/*
+			 * Variable perf_event_attr::config is assigned to
+			 * ETMv3/PTM.  The bit fields have been made to match
+			 * the ETMv3.5 ETRMCR register specification.  See the
+			 * PMU_FORMAT_ATTR() declarations in
+			 * drivers/hwtracing/coresight/coresight-perf.c for
+			 * details.
+			 */
+			config = evsel->attr.config;
+			break;
+		}
+	}
+
+	return config;
+}
+
+#ifndef BIT
+#define BIT(N) (1UL << (N))
+#endif
+
+static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
+{
+	u64 config = 0;
+	u64 config_opts = 0;
+
+	/*
+	 * The perf event variable config bits represent both
+	 * the command line options and register programming
+	 * bits in ETMv3/PTM. For ETMv4 we must remap options
+	 * to real bits
+	 */
+	config_opts = cs_etm_get_config(itr);
+	if (config_opts & BIT(ETM_OPT_CYCACC))
+		config |= BIT(ETM4_CFG_BIT_CYCACC);
+	if (config_opts & BIT(ETM_OPT_TS))
+		config |= BIT(ETM4_CFG_BIT_TS);
+	if (config_opts & BIT(ETM_OPT_RETSTK))
+		config |= BIT(ETM4_CFG_BIT_RETSTK);
+
+	return config;
+}
+
+static size_t
+cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	int i;
+	int etmv3 = 0, etmv4 = 0;
+	struct cpu_map *event_cpus = evlist->cpus;
+	struct cpu_map *online_cpus = cpu_map__new(NULL);
+
+	/* cpu map is not empty, we have specific CPUs to work with */
+	if (!cpu_map__empty(event_cpus)) {
+		for (i = 0; i < cpu__max_cpu(); i++) {
+			if (!cpu_map__has(event_cpus, i) ||
+			    !cpu_map__has(online_cpus, i))
+				continue;
+
+			if (cs_etm_is_etmv4(itr, i))
+				etmv4++;
+			else
+				etmv3++;
+		}
+	} else {
+		/* get configuration for all CPUs in the system */
+		for (i = 0; i < cpu__max_cpu(); i++) {
+			if (!cpu_map__has(online_cpus, i))
+				continue;
+
+			if (cs_etm_is_etmv4(itr, i))
+				etmv4++;
+			else
+				etmv3++;
+		}
+	}
+
+	cpu_map__put(online_cpus);
+
+	return (CS_ETM_HEADER_SIZE +
+	       (etmv4 * CS_ETMV4_PRIV_SIZE) +
+	       (etmv3 * CS_ETMV3_PRIV_SIZE));
+}
+
+static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
+	[CS_ETM_ETMCCER]	= "mgmt/etmccer",
+	[CS_ETM_ETMIDR]		= "mgmt/etmidr",
+};
+
+static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+	[CS_ETMV4_TRCIDR0]		= "trcidr/trcidr0",
+	[CS_ETMV4_TRCIDR1]		= "trcidr/trcidr1",
+	[CS_ETMV4_TRCIDR2]		= "trcidr/trcidr2",
+	[CS_ETMV4_TRCIDR8]		= "trcidr/trcidr8",
+	[CS_ETMV4_TRCAUTHSTATUS]	= "mgmt/trcauthstatus",
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
+{
+	bool ret = false;
+	char path[PATH_MAX];
+	int scan;
+	unsigned int val;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* Take any of the RO files for ETMv4 and see if it present */
+	snprintf(path, PATH_MAX, "cpu%d/%s",
+		 cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+	scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+	/* The file was read successfully, we have a winner */
+	if (scan == 1)
+		ret = true;
+
+	return ret;
+}
+
+static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
+{
+	char pmu_path[PATH_MAX];
+	int scan;
+	unsigned int val = 0;
+
+	/* Get RO metadata from sysfs */
+	snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
+
+	scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val);
+	if (scan != 1)
+		pr_err("%s: error reading: %s\n", __func__, pmu_path);
+
+	return val;
+}
+
+static void cs_etm_get_metadata(int cpu, u32 *offset,
+				struct auxtrace_record *itr,
+				struct auxtrace_info_event *info)
+{
+	u32 increment;
+	u64 magic;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* first see what kind of tracer this cpu is affined to */
+	if (cs_etm_is_etmv4(itr, cpu)) {
+		magic = __perf_cs_etmv4_magic;
+		/* Get trace configuration register */
+		info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
+						cs_etmv4_get_config(itr);
+		/* Get traceID from the framework */
+		info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
+						coresight_get_trace_id(cpu);
+		/* Get read-only information from sysFS */
+		info->priv[*offset + CS_ETMV4_TRCIDR0] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+		info->priv[*offset + CS_ETMV4_TRCIDR1] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
+		info->priv[*offset + CS_ETMV4_TRCIDR2] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+		info->priv[*offset + CS_ETMV4_TRCIDR8] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
+		info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro
+				      [CS_ETMV4_TRCAUTHSTATUS]);
+
+		/* How much space was used */
+		increment = CS_ETMV4_PRIV_MAX;
+	} else {
+		magic = __perf_cs_etmv3_magic;
+		/* Get configuration register */
+		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+		/* Get traceID from the framework */
+		info->priv[*offset + CS_ETM_ETMTRACEIDR] =
+						coresight_get_trace_id(cpu);
+		/* Get read-only information from sysFS */
+		info->priv[*offset + CS_ETM_ETMCCER] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv3_ro[CS_ETM_ETMCCER]);
+		info->priv[*offset + CS_ETM_ETMIDR] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv3_ro[CS_ETM_ETMIDR]);
+
+		/* How much space was used */
+		increment = CS_ETM_PRIV_MAX;
+	}
+
+	/* Build generic header portion */
+	info->priv[*offset + CS_ETM_MAGIC] = magic;
+	info->priv[*offset + CS_ETM_CPU] = cpu;
+	/* Where the next CPU entry should start from */
+	*offset += increment;
+}
+
+static int cs_etm_info_fill(struct auxtrace_record *itr,
+			    struct perf_session *session,
+			    struct auxtrace_info_event *info,
+			    size_t priv_size)
+{
+	int i;
+	u32 offset;
+	u64 nr_cpu, type;
+	struct cpu_map *cpu_map;
+	struct cpu_map *event_cpus = session->evlist->cpus;
+	struct cpu_map *online_cpus = cpu_map__new(NULL);
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
+		return -EINVAL;
+
+	if (!session->evlist->nr_mmaps)
+		return -EINVAL;
+
+	/* If the cpu_map is empty all online CPUs are involved */
+	if (cpu_map__empty(event_cpus)) {
+		cpu_map = online_cpus;
+	} else {
+		/* Make sure all specified CPUs are online */
+		for (i = 0; i < cpu_map__nr(event_cpus); i++) {
+			if (cpu_map__has(event_cpus, i) &&
+			    !cpu_map__has(online_cpus, i))
+				return -EINVAL;
+		}
+
+		cpu_map = event_cpus;
+	}
+
+	nr_cpu = cpu_map__nr(cpu_map);
+	/* Get PMU type as dynamically assigned by the core */
+	type = cs_etm_pmu->type;
+
+	/* First fill out the session header */
+	info->type = PERF_AUXTRACE_CS_ETM;
+	info->priv[CS_HEADER_VERSION_0] = 0;
+	info->priv[CS_PMU_TYPE_CPUS] = type << 32;
+	info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
+	info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
+
+	offset = CS_ETM_SNAPSHOT + 1;
+
+	for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
+		if (cpu_map__has(cpu_map, i))
+			cs_etm_get_metadata(i, &offset, itr, info);
+
+	cpu_map__put(online_cpus);
+
+	return 0;
+}
+
+static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
+				int idx, struct auxtrace_mmap *mm,
+				unsigned char *data __maybe_unused,
+				u64 *head, u64 *old)
+{
+	pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+		  __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+	*old = *head;
+	*head += mm->len;
+
+	return 0;
+}
+
+static int cs_etm_snapshot_start(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evsel__disable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int cs_etm_snapshot_finish(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evsel__enable(evsel);
+	}
+	return -EINVAL;
+}
+
+static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	return (((u64) rand() <<  0) & 0x00000000FFFFFFFFull) |
+		(((u64) rand() << 32) & 0xFFFFFFFF00000000ull);
+}
+
+static void cs_etm_recording_free(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	free(ptr);
+}
+
+static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evlist__enable_event_idx(ptr->evlist,
+							     evsel, idx);
+	}
+
+	return -EINVAL;
+}
+
+struct auxtrace_record *cs_etm_record_init(int *err)
+{
+	struct perf_pmu *cs_etm_pmu;
+	struct cs_etm_recording *ptr;
+
+	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+	if (!cs_etm_pmu) {
+		*err = -EINVAL;
+		goto out;
+	}
+
+	ptr = zalloc(sizeof(struct cs_etm_recording));
+	if (!ptr) {
+		*err = -ENOMEM;
+		goto out;
+	}
+
+	ptr->cs_etm_pmu			= cs_etm_pmu;
+	ptr->itr.parse_snapshot_options	= cs_etm_parse_snapshot_options;
+	ptr->itr.recording_options	= cs_etm_recording_options;
+	ptr->itr.info_priv_size		= cs_etm_info_priv_size;
+	ptr->itr.info_fill		= cs_etm_info_fill;
+	ptr->itr.find_snapshot		= cs_etm_find_snapshot;
+	ptr->itr.snapshot_start		= cs_etm_snapshot_start;
+	ptr->itr.snapshot_finish	= cs_etm_snapshot_finish;
+	ptr->itr.reference		= cs_etm_reference;
+	ptr->itr.free			= cs_etm_recording_free;
+	ptr->itr.read_finish		= cs_etm_read_finish;
+
+	*err = 0;
+	return &ptr->itr;
+out:
+	return NULL;
+}
+
+static FILE *cs_device__open_file(const char *name)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		return NULL;
+
+	snprintf(path, PATH_MAX,
+		 "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return NULL;
+
+	return fopen(path, "w");
+
+}
+
+static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
+{
+	va_list args;
+	FILE *file;
+	int ret = -EINVAL;
+
+	va_start(args, fmt);
+	file = cs_device__open_file(name);
+	if (file) {
+		ret = vfprintf(file, fmt, args);
+		fclose(file);
+	}
+	va_end(args);
+	return ret;
+}
+
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
+{
+	int ret;
+	char enable_sink[ENABLE_SINK_MAX];
+
+	snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
+		 term->val.drv_cfg, "enable_sink");
+
+	ret = cs_device__print_file(enable_sink, "%d", 1);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
diff --git a/arch/arm/util/cs-etm.h b/arch/arm/util/cs-etm.h
new file mode 100644
index 0000000..1a12e64
--- /dev/null
+++ b/arch/arm/util/cs-etm.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__PERF_CS_ETM_H__
+#define INCLUDE__PERF_CS_ETM_H__
+
+#include "../../util/evsel.h"
+
+struct auxtrace_record *cs_etm_record_init(int *err);
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
+
+#endif
diff --git a/arch/arm/util/dwarf-regs.c b/arch/arm/util/dwarf-regs.c
new file mode 100644
index 0000000..8bb176a
--- /dev/null
+++ b/arch/arm/util/dwarf-regs.c
@@ -0,0 +1,64 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <linux/stringify.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+	const char *name;
+	unsigned int dwarfnum;
+};
+
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+	{.name = __stringify(%r##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+	GPR_DWARFNUM_NAME(0),
+	GPR_DWARFNUM_NAME(1),
+	GPR_DWARFNUM_NAME(2),
+	GPR_DWARFNUM_NAME(3),
+	GPR_DWARFNUM_NAME(4),
+	GPR_DWARFNUM_NAME(5),
+	GPR_DWARFNUM_NAME(6),
+	GPR_DWARFNUM_NAME(7),
+	GPR_DWARFNUM_NAME(8),
+	GPR_DWARFNUM_NAME(9),
+	GPR_DWARFNUM_NAME(10),
+	REG_DWARFNUM_NAME("%fp", 11),
+	REG_DWARFNUM_NAME("%ip", 12),
+	REG_DWARFNUM_NAME("%sp", 13),
+	REG_DWARFNUM_NAME("%lr", 14),
+	REG_DWARFNUM_NAME("%pc", 15),
+	REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n:	the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+	const struct pt_regs_dwarfnum *roff;
+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+		if (roff->dwarfnum == n)
+			return roff->name;
+	return NULL;
+}
diff --git a/arch/arm/util/pmu.c b/arch/arm/util/pmu.c
new file mode 100644
index 0000000..e047571
--- /dev/null
+++ b/arch/arm/util/pmu.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <string.h>
+#include <linux/coresight-pmu.h>
+#include <linux/perf_event.h>
+
+#include "cs-etm.h"
+#include "arm-spe.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr
+*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+	if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
+		/* add ETM default config here */
+		pmu->selectable = true;
+		pmu->set_drv_config = cs_etm_set_drv_config;
+#if defined(__aarch64__)
+	} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
+		return arm_spe_pmu_default_config(pmu);
+#endif
+	}
+
+#endif
+	return NULL;
+}
diff --git a/arch/arm/util/unwind-libdw.c b/arch/arm/util/unwind-libdw.c
new file mode 100644
index 0000000..36ba4c6
--- /dev/null
+++ b/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_ARM_##r);	\
+	val;							\
+})
+
+	dwarf_regs[0]  = REG(R0);
+	dwarf_regs[1]  = REG(R1);
+	dwarf_regs[2]  = REG(R2);
+	dwarf_regs[3]  = REG(R3);
+	dwarf_regs[4]  = REG(R4);
+	dwarf_regs[5]  = REG(R5);
+	dwarf_regs[6]  = REG(R6);
+	dwarf_regs[7]  = REG(R7);
+	dwarf_regs[8]  = REG(R8);
+	dwarf_regs[9]  = REG(R9);
+	dwarf_regs[10] = REG(R10);
+	dwarf_regs[11] = REG(FP);
+	dwarf_regs[12] = REG(IP);
+	dwarf_regs[13] = REG(SP);
+	dwarf_regs[14] = REG(LR);
+	dwarf_regs[15] = REG(PC);
+
+	return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
+					   dwarf_regs);
+}
diff --git a/arch/arm/util/unwind-libunwind.c b/arch/arm/util/unwind-libunwind.c
new file mode 100644
index 0000000..3a55022
--- /dev/null
+++ b/arch/arm/util/unwind-libunwind.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+	switch (regnum) {
+	case UNW_ARM_R0:
+		return PERF_REG_ARM_R0;
+	case UNW_ARM_R1:
+		return PERF_REG_ARM_R1;
+	case UNW_ARM_R2:
+		return PERF_REG_ARM_R2;
+	case UNW_ARM_R3:
+		return PERF_REG_ARM_R3;
+	case UNW_ARM_R4:
+		return PERF_REG_ARM_R4;
+	case UNW_ARM_R5:
+		return PERF_REG_ARM_R5;
+	case UNW_ARM_R6:
+		return PERF_REG_ARM_R6;
+	case UNW_ARM_R7:
+		return PERF_REG_ARM_R7;
+	case UNW_ARM_R8:
+		return PERF_REG_ARM_R8;
+	case UNW_ARM_R9:
+		return PERF_REG_ARM_R9;
+	case UNW_ARM_R10:
+		return PERF_REG_ARM_R10;
+	case UNW_ARM_R11:
+		return PERF_REG_ARM_FP;
+	case UNW_ARM_R12:
+		return PERF_REG_ARM_IP;
+	case UNW_ARM_R13:
+		return PERF_REG_ARM_SP;
+	case UNW_ARM_R14:
+		return PERF_REG_ARM_LR;
+	case UNW_ARM_R15:
+		return PERF_REG_ARM_PC;
+	default:
+		pr_err("unwind: invalid reg id %d\n", regnum);
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
diff --git a/arch/arm64/Build b/arch/arm64/Build
new file mode 100644
index 0000000..41bf61d
--- /dev/null
+++ b/arch/arm64/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
new file mode 100644
index 0000000..91de486
--- /dev/null
+++ b/arch/arm64/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+PERF_HAVE_JITDUMP := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
diff --git a/arch/arm64/annotate/instructions.c b/arch/arm64/annotate/instructions.c
new file mode 100644
index 0000000..6688977
--- /dev/null
+++ b/arch/arm64/annotate/instructions.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <regex.h>
+
+struct arm64_annotate {
+	regex_t call_insn,
+		jump_insn;
+};
+
+static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	struct arm64_annotate *arm = arch->priv;
+	struct ins_ops *ops;
+	regmatch_t match[2];
+
+	if (!regexec(&arm->jump_insn, name, 2, match, 0))
+		ops = &jump_ops;
+	else if (!regexec(&arm->call_insn, name, 2, match, 0))
+		ops = &call_ops;
+	else if (!strcmp(name, "ret"))
+		ops = &ret_ops;
+	else
+		return NULL;
+
+	arch__associate_ins_ops(arch, name, ops);
+	return ops;
+}
+
+static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	struct arm64_annotate *arm;
+	int err;
+
+	if (arch->initialized)
+		return 0;
+
+	arm = zalloc(sizeof(*arm));
+	if (!arm)
+		return -1;
+
+	/* bl, blr */
+	err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED);
+	if (err)
+		goto out_free_arm;
+	/* b, b.cond, br, cbz/cbnz, tbz/tbnz */
+	err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl)?n?z?$",
+		      REG_EXTENDED);
+	if (err)
+		goto out_free_call;
+
+	arch->initialized = true;
+	arch->priv	  = arm;
+	arch->associate_instruction_ops   = arm64__associate_instruction_ops;
+	arch->objdump.comment_char	  = '/';
+	arch->objdump.skip_functions_char = '+';
+	return 0;
+
+out_free_call:
+	regfree(&arm->call_insn);
+out_free_arm:
+	free(arm);
+	return -1;
+}
diff --git a/arch/arm64/include/arch-tests.h b/arch/arm64/include/arch-tests.h
new file mode 100644
index 0000000..90ec4c8
--- /dev/null
+++ b/arch/arm64/include/arch-tests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/arch/arm64/include/dwarf-regs-table.h b/arch/arm64/include/dwarf-regs-table.h
new file mode 100644
index 0000000..177b285
--- /dev/null
+++ b/arch/arm64/include/dwarf-regs-table.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const aarch64_regstr_tbl[] = {
+	"%x0", "%x1", "%x2", "%x3", "%x4",
+	"%x5", "%x6", "%x7", "%x8", "%x9",
+	"%x10", "%x11", "%x12", "%x13", "%x14",
+	"%x15", "%x16", "%x17", "%x18", "%x19",
+	"%x20", "%x21", "%x22", "%x23", "%x24",
+	"%x25", "%x26", "%x27", "%x28", "%x29",
+	"%lr", "%sp",
+};
+#endif
diff --git a/arch/arm64/include/perf_regs.h b/arch/arm64/include/perf_regs.h
new file mode 100644
index 0000000..baaa5e6
--- /dev/null
+++ b/arch/arm64/include/perf_regs.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK	((1ULL << PERF_REG_ARM64_MAX) - 1)
+#define PERF_REGS_MAX	PERF_REG_ARM64_MAX
+#define PERF_SAMPLE_REGS_ABI	PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP	PERF_REG_ARM64_PC
+#define PERF_REG_SP	PERF_REG_ARM64_SP
+
+static inline const char *perf_reg_name(int id)
+{
+	switch (id) {
+	case PERF_REG_ARM64_X0:
+		return "x0";
+	case PERF_REG_ARM64_X1:
+		return "x1";
+	case PERF_REG_ARM64_X2:
+		return "x2";
+	case PERF_REG_ARM64_X3:
+		return "x3";
+	case PERF_REG_ARM64_X4:
+		return "x4";
+	case PERF_REG_ARM64_X5:
+		return "x5";
+	case PERF_REG_ARM64_X6:
+		return "x6";
+	case PERF_REG_ARM64_X7:
+		return "x7";
+	case PERF_REG_ARM64_X8:
+		return "x8";
+	case PERF_REG_ARM64_X9:
+		return "x9";
+	case PERF_REG_ARM64_X10:
+		return "x10";
+	case PERF_REG_ARM64_X11:
+		return "x11";
+	case PERF_REG_ARM64_X12:
+		return "x12";
+	case PERF_REG_ARM64_X13:
+		return "x13";
+	case PERF_REG_ARM64_X14:
+		return "x14";
+	case PERF_REG_ARM64_X15:
+		return "x15";
+	case PERF_REG_ARM64_X16:
+		return "x16";
+	case PERF_REG_ARM64_X17:
+		return "x17";
+	case PERF_REG_ARM64_X18:
+		return "x18";
+	case PERF_REG_ARM64_X19:
+		return "x19";
+	case PERF_REG_ARM64_X20:
+		return "x20";
+	case PERF_REG_ARM64_X21:
+		return "x21";
+	case PERF_REG_ARM64_X22:
+		return "x22";
+	case PERF_REG_ARM64_X23:
+		return "x23";
+	case PERF_REG_ARM64_X24:
+		return "x24";
+	case PERF_REG_ARM64_X25:
+		return "x25";
+	case PERF_REG_ARM64_X26:
+		return "x26";
+	case PERF_REG_ARM64_X27:
+		return "x27";
+	case PERF_REG_ARM64_X28:
+		return "x28";
+	case PERF_REG_ARM64_X29:
+		return "x29";
+	case PERF_REG_ARM64_SP:
+		return "sp";
+	case PERF_REG_ARM64_LR:
+		return "lr";
+	case PERF_REG_ARM64_PC:
+		return "pc";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/arch/arm64/tests/Build b/arch/arm64/tests/Build
new file mode 100644
index 0000000..883c57f
--- /dev/null
+++ b/arch/arm64/tests/Build
@@ -0,0 +1,4 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/arch/arm64/tests/arch-tests.c b/arch/arm64/tests/arch-tests.c
new file mode 100644
index 0000000..5b1543c
--- /dev/null
+++ b/arch/arm64/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "DWARF unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+};
diff --git a/arch/arm64/tests/dwarf-unwind.c b/arch/arm64/tests/dwarf-unwind.c
new file mode 100644
index 0000000..e907f0f
--- /dev/null
+++ b/arch/arm64/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+		struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_ARM64_SP];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+		struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/arch/arm64/tests/regs_load.S b/arch/arm64/tests/regs_load.S
new file mode 100644
index 0000000..0704251
--- /dev/null
+++ b/arch/arm64/tests/regs_load.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+.text
+.type perf_regs_load,%function
+#define STR_REG(r)	str x##r, [x0, 8 * r]
+#define LDR_REG(r)	ldr x##r, [x0, 8 * r]
+#define SP	(8 * 31)
+#define PC	(8 * 32)
+ENTRY(perf_regs_load)
+	STR_REG(0)
+	STR_REG(1)
+	STR_REG(2)
+	STR_REG(3)
+	STR_REG(4)
+	STR_REG(5)
+	STR_REG(6)
+	STR_REG(7)
+	STR_REG(8)
+	STR_REG(9)
+	STR_REG(10)
+	STR_REG(11)
+	STR_REG(12)
+	STR_REG(13)
+	STR_REG(14)
+	STR_REG(15)
+	STR_REG(16)
+	STR_REG(17)
+	STR_REG(18)
+	STR_REG(19)
+	STR_REG(20)
+	STR_REG(21)
+	STR_REG(22)
+	STR_REG(23)
+	STR_REG(24)
+	STR_REG(25)
+	STR_REG(26)
+	STR_REG(27)
+	STR_REG(28)
+	STR_REG(29)
+	STR_REG(30)
+	mov x1, sp
+	str x1, [x0, #SP]
+	str x30, [x0, #PC]
+	LDR_REG(1)
+	ret
+ENDPROC(perf_regs_load)
diff --git a/arch/arm64/util/Build b/arch/arm64/util/Build
new file mode 100644
index 0000000..68f8a8e
--- /dev/null
+++ b/arch/arm64/util/Build
@@ -0,0 +1,10 @@
+libperf-y += header.o
+libperf-y += sym-handling.o
+libperf-$(CONFIG_DWARF)     += dwarf-regs.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
+			      ../../arm/util/auxtrace.o \
+			      ../../arm/util/cs-etm.o \
+			      arm-spe.o
diff --git a/arch/arm64/util/arm-spe.c b/arch/arm64/util/arm-spe.c
new file mode 100644
index 0000000..1120e39
--- /dev/null
+++ b/arch/arm64/util/arm-spe.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <time.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/arm-spe.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+struct arm_spe_recording {
+	struct auxtrace_record		itr;
+	struct perf_pmu			*arm_spe_pmu;
+	struct perf_evlist		*evlist;
+};
+
+static size_t
+arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+		       struct perf_evlist *evlist __maybe_unused)
+{
+	return ARM_SPE_AUXTRACE_PRIV_SIZE;
+}
+
+static int arm_spe_info_fill(struct auxtrace_record *itr,
+			     struct perf_session *session,
+			     struct auxtrace_info_event *auxtrace_info,
+			     size_t priv_size)
+{
+	struct arm_spe_recording *sper =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+
+	if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
+		return -EINVAL;
+
+	if (!session->evlist->nr_mmaps)
+		return -EINVAL;
+
+	auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
+	auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
+
+	return 0;
+}
+
+static int arm_spe_recording_options(struct auxtrace_record *itr,
+				     struct perf_evlist *evlist,
+				     struct record_opts *opts)
+{
+	struct arm_spe_recording *sper =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+	struct perf_evsel *evsel, *arm_spe_evsel = NULL;
+	bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+	struct perf_evsel *tracking_evsel;
+	int err;
+
+	sper->evlist = evlist;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == arm_spe_pmu->type) {
+			if (arm_spe_evsel) {
+				pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
+				return -EINVAL;
+			}
+			evsel->attr.freq = 0;
+			evsel->attr.sample_period = 1;
+			arm_spe_evsel = evsel;
+			opts->full_auxtrace = true;
+		}
+	}
+
+	if (!opts->full_auxtrace)
+		return 0;
+
+	/* We are in full trace mode but '-m,xyz' wasn't specified */
+	if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+	}
+
+	/* Validate auxtrace_mmap_pages */
+	if (opts->auxtrace_mmap_pages) {
+		size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+		size_t min_sz = KiB(8);
+
+		if (sz < min_sz || !is_power_of_2(sz)) {
+			pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
+			       min_sz / 1024);
+			return -EINVAL;
+		}
+	}
+
+
+	/*
+	 * To obtain the auxtrace buffer file descriptor, the auxtrace event
+	 * must come first.
+	 */
+	perf_evlist__to_front(evlist, arm_spe_evsel);
+
+	perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
+	perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
+	perf_evsel__set_sample_bit(arm_spe_evsel, TID);
+
+	/* Add dummy event to keep tracking */
+	err = parse_events(evlist, "dummy:u", NULL);
+	if (err)
+		return err;
+
+	tracking_evsel = perf_evlist__last(evlist);
+	perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+	tracking_evsel->attr.freq = 0;
+	tracking_evsel->attr.sample_period = 1;
+	perf_evsel__set_sample_bit(tracking_evsel, TIME);
+	perf_evsel__set_sample_bit(tracking_evsel, CPU);
+	perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+
+	return 0;
+}
+
+static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+
+	return ts.tv_sec ^ ts.tv_nsec;
+}
+
+static void arm_spe_recording_free(struct auxtrace_record *itr)
+{
+	struct arm_spe_recording *sper =
+			container_of(itr, struct arm_spe_recording, itr);
+
+	free(sper);
+}
+
+static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct arm_spe_recording *sper =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(sper->evlist, evsel) {
+		if (evsel->attr.type == sper->arm_spe_pmu->type)
+			return perf_evlist__enable_event_idx(sper->evlist,
+							     evsel, idx);
+	}
+	return -EINVAL;
+}
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+					       struct perf_pmu *arm_spe_pmu)
+{
+	struct arm_spe_recording *sper;
+
+	if (!arm_spe_pmu) {
+		*err = -ENODEV;
+		return NULL;
+	}
+
+	sper = zalloc(sizeof(struct arm_spe_recording));
+	if (!sper) {
+		*err = -ENOMEM;
+		return NULL;
+	}
+
+	sper->arm_spe_pmu = arm_spe_pmu;
+	sper->itr.recording_options = arm_spe_recording_options;
+	sper->itr.info_priv_size = arm_spe_info_priv_size;
+	sper->itr.info_fill = arm_spe_info_fill;
+	sper->itr.free = arm_spe_recording_free;
+	sper->itr.reference = arm_spe_reference;
+	sper->itr.read_finish = arm_spe_read_finish;
+	sper->itr.alignment = 0;
+
+	return &sper->itr;
+}
+
+struct perf_event_attr
+*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
+{
+	struct perf_event_attr *attr;
+
+	attr = zalloc(sizeof(struct perf_event_attr));
+	if (!attr) {
+		pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
+		return NULL;
+	}
+
+	/*
+	 * If kernel driver doesn't advertise a minimum,
+	 * use max allowable by PMSIDR_EL1.INTERVAL
+	 */
+	if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
+				  &attr->sample_period) != 1) {
+		pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
+		attr->sample_period = 4096;
+	}
+
+	arm_spe_pmu->selectable = true;
+	arm_spe_pmu->is_uncore = false;
+
+	return attr;
+}
diff --git a/arch/arm64/util/dwarf-regs.c b/arch/arm64/util/dwarf-regs.c
new file mode 100644
index 0000000..cd764a9
--- /dev/null
+++ b/arch/arm64/util/dwarf-regs.c
@@ -0,0 +1,96 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <dwarf-regs.h>
+#include <linux/ptrace.h> /* for struct user_pt_regs */
+#include <linux/stringify.h>
+#include "util.h"
+
+struct pt_regs_dwarfnum {
+	const char *name;
+	unsigned int dwarfnum;
+};
+
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+	{.name = __stringify(%x##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+#define DWARFNUM2OFFSET(index) \
+	(index * sizeof((struct user_pt_regs *)0)->regs[0])
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+	GPR_DWARFNUM_NAME(0),
+	GPR_DWARFNUM_NAME(1),
+	GPR_DWARFNUM_NAME(2),
+	GPR_DWARFNUM_NAME(3),
+	GPR_DWARFNUM_NAME(4),
+	GPR_DWARFNUM_NAME(5),
+	GPR_DWARFNUM_NAME(6),
+	GPR_DWARFNUM_NAME(7),
+	GPR_DWARFNUM_NAME(8),
+	GPR_DWARFNUM_NAME(9),
+	GPR_DWARFNUM_NAME(10),
+	GPR_DWARFNUM_NAME(11),
+	GPR_DWARFNUM_NAME(12),
+	GPR_DWARFNUM_NAME(13),
+	GPR_DWARFNUM_NAME(14),
+	GPR_DWARFNUM_NAME(15),
+	GPR_DWARFNUM_NAME(16),
+	GPR_DWARFNUM_NAME(17),
+	GPR_DWARFNUM_NAME(18),
+	GPR_DWARFNUM_NAME(19),
+	GPR_DWARFNUM_NAME(20),
+	GPR_DWARFNUM_NAME(21),
+	GPR_DWARFNUM_NAME(22),
+	GPR_DWARFNUM_NAME(23),
+	GPR_DWARFNUM_NAME(24),
+	GPR_DWARFNUM_NAME(25),
+	GPR_DWARFNUM_NAME(26),
+	GPR_DWARFNUM_NAME(27),
+	GPR_DWARFNUM_NAME(28),
+	GPR_DWARFNUM_NAME(29),
+	REG_DWARFNUM_NAME("%lr", 30),
+	REG_DWARFNUM_NAME("%sp", 31),
+	REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n:	the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+	const struct pt_regs_dwarfnum *roff;
+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+		if (roff->dwarfnum == n)
+			return roff->name;
+	return NULL;
+}
+
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_dwarfnum *roff;
+
+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return DWARFNUM2OFFSET(roff->dwarfnum);
+	return -EINVAL;
+}
diff --git a/arch/arm64/util/header.c b/arch/arm64/util/header.c
new file mode 100644
index 0000000..534cd25
--- /dev/null
+++ b/arch/arm64/util/header.c
@@ -0,0 +1,65 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include "header.h"
+
+#define MIDR "/regs/identification/midr_el1"
+#define MIDR_SIZE 19
+#define MIDR_REVISION_MASK      0xf
+#define MIDR_VARIANT_SHIFT      20
+#define MIDR_VARIANT_MASK       (0xf << MIDR_VARIANT_SHIFT)
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+	char *buf = NULL;
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+	int cpu;
+	u64 midr = 0;
+	struct cpu_map *cpus;
+	FILE *file;
+
+	if (!sysfs || !pmu || !pmu->cpus)
+		return NULL;
+
+	buf = malloc(MIDR_SIZE);
+	if (!buf)
+		return NULL;
+
+	/* read midr from list of cpus mapped to this pmu */
+	cpus = cpu_map__get(pmu->cpus);
+	for (cpu = 0; cpu < cpus->nr; cpu++) {
+		scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
+				sysfs, cpus->map[cpu]);
+
+		file = fopen(path, "r");
+		if (!file) {
+			pr_debug("fopen failed for file %s\n", path);
+			continue;
+		}
+
+		if (!fgets(buf, MIDR_SIZE, file)) {
+			fclose(file);
+			continue;
+		}
+		fclose(file);
+
+		/* Ignore/clear Variant[23:20] and
+		 * Revision[3:0] of MIDR
+		 */
+		midr = strtoul(buf, NULL, 16);
+		midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
+		scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
+		/* got midr break loop */
+		break;
+	}
+
+	if (!midr) {
+		pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
+		free(buf);
+		buf = NULL;
+	}
+
+	cpu_map__put(cpus);
+	return buf;
+}
diff --git a/arch/arm64/util/sym-handling.c b/arch/arm64/util/sym-handling.c
new file mode 100644
index 0000000..0051b1e
--- /dev/null
+++ b/arch/arm64/util/sym-handling.c
@@ -0,0 +1,22 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+#include "map.h"
+#include "probe-event.h"
+#include "probe-file.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	return ehdr.e_type == ET_EXEC ||
+	       ehdr.e_type == ET_REL ||
+	       ehdr.e_type == ET_DYN;
+}
+#endif
diff --git a/arch/arm64/util/unwind-libdw.c b/arch/arm64/util/unwind-libdw.c
new file mode 100644
index 0000000..7623d85
--- /dev/null
+++ b/arch/arm64/util/unwind-libdw.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc;
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r);	\
+	val;							\
+})
+
+	dwarf_regs[0]  = REG(X0);
+	dwarf_regs[1]  = REG(X1);
+	dwarf_regs[2]  = REG(X2);
+	dwarf_regs[3]  = REG(X3);
+	dwarf_regs[4]  = REG(X4);
+	dwarf_regs[5]  = REG(X5);
+	dwarf_regs[6]  = REG(X6);
+	dwarf_regs[7]  = REG(X7);
+	dwarf_regs[8]  = REG(X8);
+	dwarf_regs[9]  = REG(X9);
+	dwarf_regs[10] = REG(X10);
+	dwarf_regs[11] = REG(X11);
+	dwarf_regs[12] = REG(X12);
+	dwarf_regs[13] = REG(X13);
+	dwarf_regs[14] = REG(X14);
+	dwarf_regs[15] = REG(X15);
+	dwarf_regs[16] = REG(X16);
+	dwarf_regs[17] = REG(X17);
+	dwarf_regs[18] = REG(X18);
+	dwarf_regs[19] = REG(X19);
+	dwarf_regs[20] = REG(X20);
+	dwarf_regs[21] = REG(X21);
+	dwarf_regs[22] = REG(X22);
+	dwarf_regs[23] = REG(X23);
+	dwarf_regs[24] = REG(X24);
+	dwarf_regs[25] = REG(X25);
+	dwarf_regs[26] = REG(X26);
+	dwarf_regs[27] = REG(X27);
+	dwarf_regs[28] = REG(X28);
+	dwarf_regs[29] = REG(X29);
+	dwarf_regs[30] = REG(LR);
+	dwarf_regs[31] = REG(SP);
+
+	if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX,
+					 dwarf_regs))
+		return false;
+
+	dwarf_pc = REG(PC);
+	dwfl_thread_state_register_pc(thread, dwarf_pc);
+
+	return true;
+}
diff --git a/arch/arm64/util/unwind-libunwind.c b/arch/arm64/util/unwind-libunwind.c
new file mode 100644
index 0000000..002520d
--- /dev/null
+++ b/arch/arm64/util/unwind-libunwind.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum)
+{
+	switch (regnum) {
+	case UNW_AARCH64_X0:
+		return PERF_REG_ARM64_X0;
+	case UNW_AARCH64_X1:
+		return PERF_REG_ARM64_X1;
+	case UNW_AARCH64_X2:
+		return PERF_REG_ARM64_X2;
+	case UNW_AARCH64_X3:
+		return PERF_REG_ARM64_X3;
+	case UNW_AARCH64_X4:
+		return PERF_REG_ARM64_X4;
+	case UNW_AARCH64_X5:
+		return PERF_REG_ARM64_X5;
+	case UNW_AARCH64_X6:
+		return PERF_REG_ARM64_X6;
+	case UNW_AARCH64_X7:
+		return PERF_REG_ARM64_X7;
+	case UNW_AARCH64_X8:
+		return PERF_REG_ARM64_X8;
+	case UNW_AARCH64_X9:
+		return PERF_REG_ARM64_X9;
+	case UNW_AARCH64_X10:
+		return PERF_REG_ARM64_X10;
+	case UNW_AARCH64_X11:
+		return PERF_REG_ARM64_X11;
+	case UNW_AARCH64_X12:
+		return PERF_REG_ARM64_X12;
+	case UNW_AARCH64_X13:
+		return PERF_REG_ARM64_X13;
+	case UNW_AARCH64_X14:
+		return PERF_REG_ARM64_X14;
+	case UNW_AARCH64_X15:
+		return PERF_REG_ARM64_X15;
+	case UNW_AARCH64_X16:
+		return PERF_REG_ARM64_X16;
+	case UNW_AARCH64_X17:
+		return PERF_REG_ARM64_X17;
+	case UNW_AARCH64_X18:
+		return PERF_REG_ARM64_X18;
+	case UNW_AARCH64_X19:
+		return PERF_REG_ARM64_X19;
+	case UNW_AARCH64_X20:
+		return PERF_REG_ARM64_X20;
+	case UNW_AARCH64_X21:
+		return PERF_REG_ARM64_X21;
+	case UNW_AARCH64_X22:
+		return PERF_REG_ARM64_X22;
+	case UNW_AARCH64_X23:
+		return PERF_REG_ARM64_X23;
+	case UNW_AARCH64_X24:
+		return PERF_REG_ARM64_X24;
+	case UNW_AARCH64_X25:
+		return PERF_REG_ARM64_X25;
+	case UNW_AARCH64_X26:
+		return PERF_REG_ARM64_X26;
+	case UNW_AARCH64_X27:
+		return PERF_REG_ARM64_X27;
+	case UNW_AARCH64_X28:
+		return PERF_REG_ARM64_X28;
+	case UNW_AARCH64_X29:
+		return PERF_REG_ARM64_X29;
+	case UNW_AARCH64_X30:
+		return PERF_REG_ARM64_LR;
+	case UNW_AARCH64_SP:
+		return PERF_REG_ARM64_SP;
+	case UNW_AARCH64_PC:
+		return PERF_REG_ARM64_PC;
+	default:
+		pr_err("unwind: invalid reg id %d\n", regnum);
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
diff --git a/arch/common.c b/arch/common.c
new file mode 100644
index 0000000..c6f3735
--- /dev/null
+++ b/arch/common.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "common.h"
+#include "../util/env.h"
+#include "../util/util.h"
+#include "../util/debug.h"
+
+const char *const arm_triplets[] = {
+	"arm-eabi-",
+	"arm-linux-androideabi-",
+	"arm-unknown-linux-",
+	"arm-unknown-linux-gnu-",
+	"arm-unknown-linux-gnueabi-",
+	"arm-linux-gnu-",
+	"arm-linux-gnueabihf-",
+	"arm-none-eabi-",
+	NULL
+};
+
+const char *const arm64_triplets[] = {
+	"aarch64-linux-android-",
+	"aarch64-linux-gnu-",
+	NULL
+};
+
+const char *const powerpc_triplets[] = {
+	"powerpc-unknown-linux-gnu-",
+	"powerpc-linux-gnu-",
+	"powerpc64-unknown-linux-gnu-",
+	"powerpc64-linux-gnu-",
+	"powerpc64le-linux-gnu-",
+	NULL
+};
+
+const char *const s390_triplets[] = {
+	"s390-ibm-linux-",
+	"s390x-linux-gnu-",
+	NULL
+};
+
+const char *const sh_triplets[] = {
+	"sh-unknown-linux-gnu-",
+	"sh64-unknown-linux-gnu-",
+	"sh-linux-gnu-",
+	"sh64-linux-gnu-",
+	NULL
+};
+
+const char *const sparc_triplets[] = {
+	"sparc-unknown-linux-gnu-",
+	"sparc64-unknown-linux-gnu-",
+	"sparc64-linux-gnu-",
+	NULL
+};
+
+const char *const x86_triplets[] = {
+	"x86_64-pc-linux-gnu-",
+	"x86_64-unknown-linux-gnu-",
+	"i686-pc-linux-gnu-",
+	"i586-pc-linux-gnu-",
+	"i486-pc-linux-gnu-",
+	"i386-pc-linux-gnu-",
+	"i686-linux-android-",
+	"i686-android-linux-",
+	"x86_64-linux-gnu-",
+	"i586-linux-gnu-",
+	NULL
+};
+
+const char *const mips_triplets[] = {
+	"mips-unknown-linux-gnu-",
+	"mipsel-linux-android-",
+	"mips-linux-gnu-",
+	"mips64-linux-gnu-",
+	"mips64el-linux-gnuabi64-",
+	"mips64-linux-gnuabi64-",
+	"mipsel-linux-gnu-",
+	NULL
+};
+
+static bool lookup_path(char *name)
+{
+	bool found = false;
+	char *path, *tmp = NULL;
+	char buf[PATH_MAX];
+	char *env = getenv("PATH");
+
+	if (!env)
+		return false;
+
+	env = strdup(env);
+	if (!env)
+		return false;
+
+	path = strtok_r(env, ":", &tmp);
+	while (path) {
+		scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+		if (access(buf, F_OK) == 0) {
+			found = true;
+			break;
+		}
+		path = strtok_r(NULL, ":", &tmp);
+	}
+	free(env);
+	return found;
+}
+
+static int lookup_triplets(const char *const *triplets, const char *name)
+{
+	int i;
+	char buf[PATH_MAX];
+
+	for (i = 0; triplets[i] != NULL; i++) {
+		scnprintf(buf, sizeof(buf), "%s%s", triplets[i], name);
+		if (lookup_path(buf))
+			return i;
+	}
+	return -1;
+}
+
+static int perf_env__lookup_binutils_path(struct perf_env *env,
+					  const char *name, const char **path)
+{
+	int idx;
+	const char *arch = perf_env__arch(env), *cross_env;
+	const char *const *path_list;
+	char *buf = NULL;
+
+	/*
+	 * We don't need to try to find objdump path for native system.
+	 * Just use default binutils path (e.g.: "objdump").
+	 */
+	if (!strcmp(perf_env__arch(NULL), arch))
+		goto out;
+
+	cross_env = getenv("CROSS_COMPILE");
+	if (cross_env) {
+		if (asprintf(&buf, "%s%s", cross_env, name) < 0)
+			goto out_error;
+		if (buf[0] == '/') {
+			if (access(buf, F_OK) == 0)
+				goto out;
+			goto out_error;
+		}
+		if (lookup_path(buf))
+			goto out;
+		zfree(&buf);
+	}
+
+	if (!strcmp(arch, "arm"))
+		path_list = arm_triplets;
+	else if (!strcmp(arch, "arm64"))
+		path_list = arm64_triplets;
+	else if (!strcmp(arch, "powerpc"))
+		path_list = powerpc_triplets;
+	else if (!strcmp(arch, "sh"))
+		path_list = sh_triplets;
+	else if (!strcmp(arch, "s390"))
+		path_list = s390_triplets;
+	else if (!strcmp(arch, "sparc"))
+		path_list = sparc_triplets;
+	else if (!strcmp(arch, "x86"))
+		path_list = x86_triplets;
+	else if (!strcmp(arch, "mips"))
+		path_list = mips_triplets;
+	else {
+		ui__error("binutils for %s not supported.\n", arch);
+		goto out_error;
+	}
+
+	idx = lookup_triplets(path_list, name);
+	if (idx < 0) {
+		ui__error("Please install %s for %s.\n"
+			  "You can add it to PATH, set CROSS_COMPILE or "
+			  "override the default using --%s.\n",
+			  name, arch, name);
+		goto out_error;
+	}
+
+	if (asprintf(&buf, "%s%s", path_list[idx], name) < 0)
+		goto out_error;
+
+out:
+	*path = buf;
+	return 0;
+out_error:
+	free(buf);
+	*path = NULL;
+	return -1;
+}
+
+int perf_env__lookup_objdump(struct perf_env *env)
+{
+	/*
+	 * For live mode, env->arch will be NULL and we can use
+	 * the native objdump tool.
+	 */
+	if (env->arch == NULL)
+		return 0;
+
+	return perf_env__lookup_binutils_path(env, "objdump", &objdump_path);
+}
diff --git a/arch/common.h b/arch/common.h
new file mode 100644
index 0000000..2d875ba
--- /dev/null
+++ b/arch/common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_COMMON_H
+#define ARCH_PERF_COMMON_H
+
+#include "../util/env.h"
+
+extern const char *objdump_path;
+
+int perf_env__lookup_objdump(struct perf_env *env);
+
+#endif /* ARCH_PERF_COMMON_H */
diff --git a/arch/mips/Build b/arch/mips/Build
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/arch/mips/Build
@@ -0,0 +1 @@
+# empty
diff --git a/arch/parisc/Build b/arch/parisc/Build
new file mode 100644
index 0000000..1bb8bf6
--- /dev/null
+++ b/arch/parisc/Build
@@ -0,0 +1 @@
+# empty
diff --git a/arch/powerpc/Build b/arch/powerpc/Build
new file mode 100644
index 0000000..db52fa2
--- /dev/null
+++ b/arch/powerpc/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-y += tests/
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
new file mode 100644
index 0000000..a111239
--- /dev/null
+++ b/arch/powerpc/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
+
+#
+# Syscall table generation for perf
+#
+
+out    := $(OUTPUT)arch/powerpc/include/generated/asm
+header32 := $(out)/syscalls_32.c
+header64 := $(out)/syscalls_64.c
+sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
+sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header64): $(sysdef) $(systbl)
+	$(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
+
+$(header32): $(sysdef) $(systbl)
+	$(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
+
+clean::
+	$(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
+
+archheaders: $(header32) $(header64)
diff --git a/arch/powerpc/annotate/instructions.c b/arch/powerpc/annotate/instructions.c
new file mode 100644
index 0000000..a3f423c
--- /dev/null
+++ b/arch/powerpc/annotate/instructions.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+
+static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	int i;
+	struct ins_ops *ops;
+
+	/*
+	 * - Interested only if instruction starts with 'b'.
+	 * - Few start with 'b', but aren't branch instructions.
+	 */
+	if (name[0] != 'b'             ||
+	    !strncmp(name, "bcd", 3)   ||
+	    !strncmp(name, "brinc", 5) ||
+	    !strncmp(name, "bper", 4))
+		return NULL;
+
+	ops = &jump_ops;
+
+	i = strlen(name) - 1;
+	if (i < 0)
+		return NULL;
+
+	/* ignore optional hints at the end of the instructions */
+	if (name[i] == '+' || name[i] == '-')
+		i--;
+
+	if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+		/*
+		 * if the instruction ends up with 'l' or 'la', then
+		 * those are considered 'calls' since they update LR.
+		 * ... except for 'bnl' which is branch if not less than
+		 * and the absolute form of the same.
+		 */
+		if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+		    strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+		    strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+			ops = &call_ops;
+	}
+	if (name[i] == 'r' && name[i-1] == 'l')
+		/*
+		 * instructions ending with 'lr' are considered to be
+		 * return instructions
+		 */
+		ops = &ret_ops;
+
+	arch__associate_ins_ops(arch, name, ops);
+	return ops;
+}
+
+static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	if (!arch->initialized) {
+		arch->initialized = true;
+		arch->associate_instruction_ops = powerpc__associate_instruction_ops;
+		arch->objdump.comment_char      = '#';
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/entry/syscalls/mksyscalltbl b/arch/powerpc/entry/syscalls/mksyscalltbl
new file mode 100755
index 0000000..ef52e1d
--- /dev/null
+++ b/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# s390 script.
+#
+# Copyright IBM Corp. 2017
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+wordsize=$1
+gcc=$2
+input=$3
+
+if ! test -r $input; then
+	echo "Could not read input file" >&2
+	exit 1
+fi
+
+create_table()
+{
+	local wordsize=$1
+	local max_nr
+
+	echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
+	while read sc nr; do
+		printf '\t[%d] = "%s",\n' $nr $sc
+		max_nr=$nr
+	done
+	echo '};'
+	echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
+}
+
+$gcc -m${wordsize} -E -dM -x c  $input	       \
+	|sed -ne 's/^#define __NR_//p' \
+	|sort -t' ' -k2 -nu	       \
+	|create_table ${wordsize}
diff --git a/arch/powerpc/include/arch-tests.h b/arch/powerpc/include/arch-tests.h
new file mode 100644
index 0000000..1c7be75
--- /dev/null
+++ b/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/arch/powerpc/include/dwarf-regs-table.h b/arch/powerpc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..66dc015
--- /dev/null
+++ b/arch/powerpc/include/dwarf-regs-table.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
+ */
+#define REG_DWARFNUM_NAME(reg, idx)	[idx] = "%" #reg
+
+static const char * const powerpc_regstr_tbl[] = {
+	"%gpr0", "%gpr1", "%gpr2", "%gpr3", "%gpr4",
+	"%gpr5", "%gpr6", "%gpr7", "%gpr8", "%gpr9",
+	"%gpr10", "%gpr11", "%gpr12", "%gpr13", "%gpr14",
+	"%gpr15", "%gpr16", "%gpr17", "%gpr18", "%gpr19",
+	"%gpr20", "%gpr21", "%gpr22", "%gpr23", "%gpr24",
+	"%gpr25", "%gpr26", "%gpr27", "%gpr28", "%gpr29",
+	"%gpr30", "%gpr31",
+	REG_DWARFNUM_NAME(msr,   66),
+	REG_DWARFNUM_NAME(ctr,   109),
+	REG_DWARFNUM_NAME(link,  108),
+	REG_DWARFNUM_NAME(xer,   101),
+	REG_DWARFNUM_NAME(dar,   119),
+	REG_DWARFNUM_NAME(dsisr, 118),
+};
+
+#endif
diff --git a/arch/powerpc/include/perf_regs.h b/arch/powerpc/include/perf_regs.h
new file mode 100644
index 0000000..00e37b1
--- /dev/null
+++ b/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#ifdef __powerpc64__
+	#define PERF_SAMPLE_REGS_ABI	PERF_SAMPLE_REGS_ABI_64
+#else
+	#define PERF_SAMPLE_REGS_ABI	PERF_SAMPLE_REGS_ABI_32
+#endif
+
+#define PERF_REG_IP     PERF_REG_POWERPC_NIP
+#define PERF_REG_SP     PERF_REG_POWERPC_R1
+
+static const char *reg_names[] = {
+	[PERF_REG_POWERPC_R0] = "r0",
+	[PERF_REG_POWERPC_R1] = "r1",
+	[PERF_REG_POWERPC_R2] = "r2",
+	[PERF_REG_POWERPC_R3] = "r3",
+	[PERF_REG_POWERPC_R4] = "r4",
+	[PERF_REG_POWERPC_R5] = "r5",
+	[PERF_REG_POWERPC_R6] = "r6",
+	[PERF_REG_POWERPC_R7] = "r7",
+	[PERF_REG_POWERPC_R8] = "r8",
+	[PERF_REG_POWERPC_R9] = "r9",
+	[PERF_REG_POWERPC_R10] = "r10",
+	[PERF_REG_POWERPC_R11] = "r11",
+	[PERF_REG_POWERPC_R12] = "r12",
+	[PERF_REG_POWERPC_R13] = "r13",
+	[PERF_REG_POWERPC_R14] = "r14",
+	[PERF_REG_POWERPC_R15] = "r15",
+	[PERF_REG_POWERPC_R16] = "r16",
+	[PERF_REG_POWERPC_R17] = "r17",
+	[PERF_REG_POWERPC_R18] = "r18",
+	[PERF_REG_POWERPC_R19] = "r19",
+	[PERF_REG_POWERPC_R20] = "r20",
+	[PERF_REG_POWERPC_R21] = "r21",
+	[PERF_REG_POWERPC_R22] = "r22",
+	[PERF_REG_POWERPC_R23] = "r23",
+	[PERF_REG_POWERPC_R24] = "r24",
+	[PERF_REG_POWERPC_R25] = "r25",
+	[PERF_REG_POWERPC_R26] = "r26",
+	[PERF_REG_POWERPC_R27] = "r27",
+	[PERF_REG_POWERPC_R28] = "r28",
+	[PERF_REG_POWERPC_R29] = "r29",
+	[PERF_REG_POWERPC_R30] = "r30",
+	[PERF_REG_POWERPC_R31] = "r31",
+	[PERF_REG_POWERPC_NIP] = "nip",
+	[PERF_REG_POWERPC_MSR] = "msr",
+	[PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+	[PERF_REG_POWERPC_CTR] = "ctr",
+	[PERF_REG_POWERPC_LINK] = "link",
+	[PERF_REG_POWERPC_XER] = "xer",
+	[PERF_REG_POWERPC_CCR] = "ccr",
+	[PERF_REG_POWERPC_SOFTE] = "softe",
+	[PERF_REG_POWERPC_TRAP] = "trap",
+	[PERF_REG_POWERPC_DAR] = "dar",
+	[PERF_REG_POWERPC_DSISR] = "dsisr"
+};
+
+static inline const char *perf_reg_name(int id)
+{
+	return reg_names[id];
+}
+#endif /* ARCH_PERF_REGS_H */
diff --git a/arch/powerpc/tests/Build b/arch/powerpc/tests/Build
new file mode 100644
index 0000000..d827ef3
--- /dev/null
+++ b/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/arch/powerpc/tests/arch-tests.c b/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 0000000..8c3fbd4
--- /dev/null
+++ b/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "Test dwarf unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+};
diff --git a/arch/powerpc/tests/dwarf-unwind.c b/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 0000000..30cbbd6
--- /dev/null
+++ b/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+			 struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/arch/powerpc/tests/regs_load.S b/arch/powerpc/tests/regs_load.S
new file mode 100644
index 0000000..36a20b0
--- /dev/null
+++ b/arch/powerpc/tests/regs_load.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
+#define R0	 0
+#define R1	 1 * 8
+#define R2	 2 * 8
+#define R3	 3 * 8
+#define R4	 4 * 8
+#define R5	 5 * 8
+#define R6	 6 * 8
+#define R7	 7 * 8
+#define R8	 8 * 8
+#define R9	 9 * 8
+#define R10	10 * 8
+#define R11	11 * 8
+#define R12	12 * 8
+#define R13	13 * 8
+#define R14	14 * 8
+#define R15	15 * 8
+#define R16	16 * 8
+#define R17	17 * 8
+#define R18	18 * 8
+#define R19	19 * 8
+#define R20	20 * 8
+#define R21	21 * 8
+#define R22	22 * 8
+#define R23	23 * 8
+#define R24	24 * 8
+#define R25	25 * 8
+#define R26	26 * 8
+#define R27	27 * 8
+#define R28	28 * 8
+#define R29	29 * 8
+#define R30	30 * 8
+#define R31	31 * 8
+#define NIP	32 * 8
+#define CTR	35 * 8
+#define LINK	36 * 8
+#define XER	37 * 8
+
+.globl perf_regs_load
+perf_regs_load:
+	std 0, R0(3)
+	std 1, R1(3)
+	std 2, R2(3)
+	std 3, R3(3)
+	std 4, R4(3)
+	std 5, R5(3)
+	std 6, R6(3)
+	std 7, R7(3)
+	std 8, R8(3)
+	std 9, R9(3)
+	std 10, R10(3)
+	std 11, R11(3)
+	std 12, R12(3)
+	std 13, R13(3)
+	std 14, R14(3)
+	std 15, R15(3)
+	std 16, R16(3)
+	std 17, R17(3)
+	std 18, R18(3)
+	std 19, R19(3)
+	std 20, R20(3)
+	std 21, R21(3)
+	std 22, R22(3)
+	std 23, R23(3)
+	std 24, R24(3)
+	std 25, R25(3)
+	std 26, R26(3)
+	std 27, R27(3)
+	std 28, R28(3)
+	std 29, R29(3)
+	std 30, R30(3)
+	std 31, R31(3)
+
+	/* store NIP */
+	mflr 4
+	std 4, NIP(3)
+
+	/* Store LR */
+	std 4, LINK(3)
+
+	/* Store XER */
+	mfxer 4
+	std 4, XER(3)
+
+	/* Store CTR */
+	mfctr 4
+	std 4, CTR(3)
+
+	/* Restore original value of r4 */
+	ld 4, R4(3)
+
+	blr
diff --git a/arch/powerpc/util/Build b/arch/powerpc/util/Build
new file mode 100644
index 0000000..2e65953
--- /dev/null
+++ b/arch/powerpc/util/Build
@@ -0,0 +1,10 @@
+libperf-y += header.o
+libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+
+libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/arch/powerpc/util/book3s_hcalls.h b/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 0000000..54cfa05
--- /dev/null
+++ b/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+	{0x4, "H_REMOVE"},					\
+	{0x8, "H_ENTER"},					\
+	{0xc, "H_READ"},					\
+	{0x10, "H_CLEAR_MOD"},					\
+	{0x14, "H_CLEAR_REF"},					\
+	{0x18, "H_PROTECT"},					\
+	{0x1c, "H_GET_TCE"},					\
+	{0x20, "H_PUT_TCE"},					\
+	{0x24, "H_SET_SPRG0"},					\
+	{0x28, "H_SET_DABR"},					\
+	{0x2c, "H_PAGE_INIT"},					\
+	{0x30, "H_SET_ASR"},					\
+	{0x34, "H_ASR_ON"},					\
+	{0x38, "H_ASR_OFF"},					\
+	{0x3c, "H_LOGICAL_CI_LOAD"},				\
+	{0x40, "H_LOGICAL_CI_STORE"},				\
+	{0x44, "H_LOGICAL_CACHE_LOAD"},				\
+	{0x48, "H_LOGICAL_CACHE_STORE"},			\
+	{0x4c, "H_LOGICAL_ICBI"},				\
+	{0x50, "H_LOGICAL_DCBF"},				\
+	{0x54, "H_GET_TERM_CHAR"},				\
+	{0x58, "H_PUT_TERM_CHAR"},				\
+	{0x5c, "H_REAL_TO_LOGICAL"},				\
+	{0x60, "H_HYPERVISOR_DATA"},				\
+	{0x64, "H_EOI"},					\
+	{0x68, "H_CPPR"},					\
+	{0x6c, "H_IPI"},					\
+	{0x70, "H_IPOLL"},					\
+	{0x74, "H_XIRR"},					\
+	{0x78, "H_MIGRATE_DMA"},				\
+	{0x7c, "H_PERFMON"},					\
+	{0xdc, "H_REGISTER_VPA"},				\
+	{0xe0, "H_CEDE"},					\
+	{0xe4, "H_CONFER"},					\
+	{0xe8, "H_PROD"},					\
+	{0xec, "H_GET_PPP"},					\
+	{0xf0, "H_SET_PPP"},					\
+	{0xf4, "H_PURR"},					\
+	{0xf8, "H_PIC"},					\
+	{0xfc, "H_REG_CRQ"},					\
+	{0x100, "H_FREE_CRQ"},					\
+	{0x104, "H_VIO_SIGNAL"},				\
+	{0x108, "H_SEND_CRQ"},					\
+	{0x110, "H_COPY_RDMA"},					\
+	{0x114, "H_REGISTER_LOGICAL_LAN"},			\
+	{0x118, "H_FREE_LOGICAL_LAN"},				\
+	{0x11c, "H_ADD_LOGICAL_LAN_BUFFER"},			\
+	{0x120, "H_SEND_LOGICAL_LAN"},				\
+	{0x124, "H_BULK_REMOVE"},				\
+	{0x130, "H_MULTICAST_CTRL"},				\
+	{0x134, "H_SET_XDABR"},					\
+	{0x138, "H_STUFF_TCE"},					\
+	{0x13c, "H_PUT_TCE_INDIRECT"},				\
+	{0x14c, "H_CHANGE_LOGICAL_LAN_MAC"},			\
+	{0x150, "H_VTERM_PARTNER_INFO"},			\
+	{0x154, "H_REGISTER_VTERM"},				\
+	{0x158, "H_FREE_VTERM"},				\
+	{0x15c, "H_RESET_EVENTS"},				\
+	{0x160, "H_ALLOC_RESOURCE"},				\
+	{0x164, "H_FREE_RESOURCE"},				\
+	{0x168, "H_MODIFY_QP"},					\
+	{0x16c, "H_QUERY_QP"},					\
+	{0x170, "H_REREGISTER_PMR"},				\
+	{0x174, "H_REGISTER_SMR"},				\
+	{0x178, "H_QUERY_MR"},					\
+	{0x17c, "H_QUERY_MW"},					\
+	{0x180, "H_QUERY_HCA"},					\
+	{0x184, "H_QUERY_PORT"},				\
+	{0x188, "H_MODIFY_PORT"},				\
+	{0x18c, "H_DEFINE_AQP1"},				\
+	{0x190, "H_GET_TRACE_BUFFER"},				\
+	{0x194, "H_DEFINE_AQP0"},				\
+	{0x198, "H_RESIZE_MR"},					\
+	{0x19c, "H_ATTACH_MCQP"},				\
+	{0x1a0, "H_DETACH_MCQP"},				\
+	{0x1a4, "H_CREATE_RPT"},				\
+	{0x1a8, "H_REMOVE_RPT"},				\
+	{0x1ac, "H_REGISTER_RPAGES"},				\
+	{0x1b0, "H_DISABLE_AND_GETC"},				\
+	{0x1b4, "H_ERROR_DATA"},				\
+	{0x1b8, "H_GET_HCA_INFO"},				\
+	{0x1bc, "H_GET_PERF_COUNT"},				\
+	{0x1c0, "H_MANAGE_TRACE"},				\
+	{0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"},			\
+	{0x1d8, "H_POLL_PENDING"},				\
+	{0x1e4, "H_QUERY_INT_STATE"},				\
+	{0x244, "H_ILLAN_ATTRIBUTES"},				\
+	{0x250, "H_MODIFY_HEA_QP"},				\
+	{0x254, "H_QUERY_HEA_QP"},				\
+	{0x258, "H_QUERY_HEA"},					\
+	{0x25c, "H_QUERY_HEA_PORT"},				\
+	{0x260, "H_MODIFY_HEA_PORT"},				\
+	{0x264, "H_REG_BCMC"},					\
+	{0x268, "H_DEREG_BCMC"},				\
+	{0x26c, "H_REGISTER_HEA_RPAGES"},			\
+	{0x270, "H_DISABLE_AND_GET_HEA"},			\
+	{0x274, "H_GET_HEA_INFO"},				\
+	{0x278, "H_ALLOC_HEA_RESOURCE"},			\
+	{0x284, "H_ADD_CONN"},					\
+	{0x288, "H_DEL_CONN"},					\
+	{0x298, "H_JOIN"},					\
+	{0x2a4, "H_VASI_STATE"},				\
+	{0x2b0, "H_ENABLE_CRQ"},				\
+	{0x2b8, "H_GET_EM_PARMS"},				\
+	{0x2d0, "H_SET_MPP"},					\
+	{0x2d4, "H_GET_MPP"},					\
+	{0x2ec, "H_HOME_NODE_ASSOCIATIVITY"},			\
+	{0x2f4, "H_BEST_ENERGY"},				\
+	{0x2fc, "H_XIRR_X"},					\
+	{0x300, "H_RANDOM"},					\
+	{0x304, "H_COP"},					\
+	{0x314, "H_GET_MPP_X"},					\
+	{0x31c, "H_SET_MODE"},					\
+	{0xf000, "H_RTAS"}					\
+
+#endif
diff --git a/arch/powerpc/util/book3s_hv_exits.h b/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 0000000..853b95d
--- /dev/null
+++ b/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x0,	"RETURN_TO_HOST"}, \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/util/dwarf-regs.c b/arch/powerpc/util/dwarf-regs.c
new file mode 100644
index 0000000..98ac870
--- /dev/null
+++ b/arch/powerpc/util/dwarf-regs.c
@@ -0,0 +1,105 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Ian Munsie, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+#include <dwarf-regs.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+#include "util.h"
+
+struct pt_regs_dwarfnum {
+	const char *name;
+	unsigned int dwarfnum;
+	unsigned int ptregs_offset;
+};
+
+#define REG_DWARFNUM_NAME(r, num)					\
+		{.name = __stringify(%)__stringify(r), .dwarfnum = num,			\
+		.ptregs_offset = offsetof(struct pt_regs, r)}
+#define GPR_DWARFNUM_NAME(num)						\
+		{.name = __stringify(%gpr##num), .dwarfnum = num,		\
+		.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+	GPR_DWARFNUM_NAME(0),
+	GPR_DWARFNUM_NAME(1),
+	GPR_DWARFNUM_NAME(2),
+	GPR_DWARFNUM_NAME(3),
+	GPR_DWARFNUM_NAME(4),
+	GPR_DWARFNUM_NAME(5),
+	GPR_DWARFNUM_NAME(6),
+	GPR_DWARFNUM_NAME(7),
+	GPR_DWARFNUM_NAME(8),
+	GPR_DWARFNUM_NAME(9),
+	GPR_DWARFNUM_NAME(10),
+	GPR_DWARFNUM_NAME(11),
+	GPR_DWARFNUM_NAME(12),
+	GPR_DWARFNUM_NAME(13),
+	GPR_DWARFNUM_NAME(14),
+	GPR_DWARFNUM_NAME(15),
+	GPR_DWARFNUM_NAME(16),
+	GPR_DWARFNUM_NAME(17),
+	GPR_DWARFNUM_NAME(18),
+	GPR_DWARFNUM_NAME(19),
+	GPR_DWARFNUM_NAME(20),
+	GPR_DWARFNUM_NAME(21),
+	GPR_DWARFNUM_NAME(22),
+	GPR_DWARFNUM_NAME(23),
+	GPR_DWARFNUM_NAME(24),
+	GPR_DWARFNUM_NAME(25),
+	GPR_DWARFNUM_NAME(26),
+	GPR_DWARFNUM_NAME(27),
+	GPR_DWARFNUM_NAME(28),
+	GPR_DWARFNUM_NAME(29),
+	GPR_DWARFNUM_NAME(30),
+	GPR_DWARFNUM_NAME(31),
+	REG_DWARFNUM_NAME(msr,   66),
+	REG_DWARFNUM_NAME(ctr,   109),
+	REG_DWARFNUM_NAME(link,  108),
+	REG_DWARFNUM_NAME(xer,   101),
+	REG_DWARFNUM_NAME(dar,   119),
+	REG_DWARFNUM_NAME(dsisr, 118),
+	REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n:	the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+	const struct pt_regs_dwarfnum *roff;
+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+		if (roff->dwarfnum == n)
+			return roff->name;
+	return NULL;
+}
+
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_dwarfnum *roff;
+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->ptregs_offset;
+	return -EINVAL;
+}
diff --git a/arch/powerpc/util/header.c b/arch/powerpc/util/header.c
new file mode 100644
index 0000000..0b24266
--- /dev/null
+++ b/arch/powerpc/util/header.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/stringify.h>
+#include "header.h"
+#include "util.h"
+
+#define mfspr(rn)       ({unsigned long rval; \
+			 asm volatile("mfspr %0," __stringify(rn) \
+				      : "=r" (rval)); rval; })
+
+#define SPRN_PVR        0x11F	/* Processor Version Register */
+#define PVR_VER(pvr)    (((pvr) >>  16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr)    (((pvr) >>   0) & 0xFFFF) /* Revison field */
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+	unsigned long pvr;
+	int nb;
+
+	pvr = mfspr(SPRN_PVR);
+
+	nb = scnprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
+
+	/* look for end marker to ensure the entire data fit */
+	if (strchr(buffer, '$')) {
+		buffer[nb-1] = '\0';
+		return 0;
+	}
+	return -1;
+}
+
+char *
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	char *bufp;
+
+	if (asprintf(&bufp, "%.8lx", mfspr(SPRN_PVR)) < 0)
+		bufp = NULL;
+
+	return bufp;
+}
diff --git a/arch/powerpc/util/kvm-stat.c b/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 0000000..596ad6a
--- /dev/null
+++ b/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include "util/kvm-stat.h"
+#include "util/parse-events.h"
+#include "util/debug.h"
+
+#include "book3s_hv_exits.h"
+#include "book3s_hcalls.h"
+
+#define NR_TPS 4
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 40;
+const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
+const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
+
+/* Tracepoints specific to ppc_book3s_hv */
+const char *ppc_book3s_hv_kvm_tp[] = {
+	"kvm_hv:kvm_guest_enter",
+	"kvm_hv:kvm_guest_exit",
+	"kvm_hv:kvm_hcall_enter",
+	"kvm_hv:kvm_hcall_exit",
+	NULL,
+};
+
+/* 1 extra placeholder for NULL */
+const char *kvm_events_tp[NR_TPS + 1];
+const char *kvm_exit_reason;
+
+static void hcall_event_get_key(struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct event_key *key)
+{
+	key->info = 0;
+	key->key = perf_evsel__intval(evsel, sample, "req");
+}
+
+static const char *get_hcall_exit_reason(u64 exit_code)
+{
+	struct exit_reasons_table *tbl = hcall_reasons;
+
+	while (tbl->reason != NULL) {
+		if (tbl->exit_code == exit_code)
+			return tbl->reason;
+		tbl++;
+	}
+
+	pr_debug("Unknown hcall code: %lld\n",
+	       (unsigned long long)exit_code);
+	return "UNKNOWN";
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+			    struct perf_sample *sample __maybe_unused,
+			    struct event_key *key __maybe_unused)
+{
+	return (!strcmp(evsel->name, kvm_events_tp[3]));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+			      struct perf_sample *sample, struct event_key *key)
+{
+	if (!strcmp(evsel->name, kvm_events_tp[2])) {
+		hcall_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+				   struct event_key *key,
+				   char *decode)
+{
+	const char *hcall_reason = get_hcall_exit_reason(key->key);
+
+	scnprintf(decode, decode_str_len, "%s", hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+	.is_begin_event = hcall_event_begin,
+	.is_end_event = hcall_event_end,
+	.decode_key = hcall_event_decode_key,
+	.name = "HCALL-EVENT",
+};
+
+static struct kvm_events_ops exit_events = {
+	.is_begin_event = exit_event_begin,
+	.is_end_event = exit_event_end,
+	.decode_key = exit_event_decode_key,
+	.name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+	{ .name = "vmexit", .ops = &exit_events },
+	{ .name = "hcall", .ops = &hcall_events },
+	{ NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+	NULL,
+};
+
+
+static int is_tracepoint_available(const char *str, struct perf_evlist *evlist)
+{
+	struct parse_events_error err;
+	int ret;
+
+	err.str = NULL;
+	ret = parse_events(evlist, str, &err);
+	if (err.str)
+		pr_err("%s : %s\n", str, err.str);
+	return ret;
+}
+
+static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm,
+				struct perf_evlist *evlist)
+{
+	const char **events_ptr;
+	int i, nr_tp = 0, err = -1;
+
+	/* Check for book3s_hv tracepoints */
+	for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) {
+		err = is_tracepoint_available(*events_ptr, evlist);
+		if (err)
+			return -1;
+		nr_tp++;
+	}
+
+	for (i = 0; i < nr_tp; i++)
+		kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i];
+
+	kvm_events_tp[i] = NULL;
+	kvm_exit_reason = "trap";
+	kvm->exit_reasons = hv_exit_reasons;
+	kvm->exit_reasons_isa = "HV";
+
+	return 0;
+}
+
+/* Wrapper to setup kvm tracepoints */
+static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist == NULL)
+		return -ENOMEM;
+
+	/* Right now, only supported on book3s_hv */
+	return ppc__setup_book3s_hv(kvm, evlist);
+}
+
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm)
+{
+	return ppc__setup_kvm_tp(kvm);
+}
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+	int ret;
+
+	ret = ppc__setup_kvm_tp(kvm);
+	if (ret) {
+		kvm->exit_reasons = NULL;
+		kvm->exit_reasons_isa = NULL;
+	}
+
+	return ret;
+}
diff --git a/arch/powerpc/util/perf_regs.c b/arch/powerpc/util/perf_regs.c
new file mode 100644
index 0000000..ec50939
--- /dev/null
+++ b/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include <regex.h>
+
+#include "../../perf.h"
+#include "../../util/util.h"
+#include "../../util/perf_regs.h"
+#include "../../util/debug.h"
+
+const struct sample_reg sample_reg_masks[] = {
+	SMPL_REG(r0, PERF_REG_POWERPC_R0),
+	SMPL_REG(r1, PERF_REG_POWERPC_R1),
+	SMPL_REG(r2, PERF_REG_POWERPC_R2),
+	SMPL_REG(r3, PERF_REG_POWERPC_R3),
+	SMPL_REG(r4, PERF_REG_POWERPC_R4),
+	SMPL_REG(r5, PERF_REG_POWERPC_R5),
+	SMPL_REG(r6, PERF_REG_POWERPC_R6),
+	SMPL_REG(r7, PERF_REG_POWERPC_R7),
+	SMPL_REG(r8, PERF_REG_POWERPC_R8),
+	SMPL_REG(r9, PERF_REG_POWERPC_R9),
+	SMPL_REG(r10, PERF_REG_POWERPC_R10),
+	SMPL_REG(r11, PERF_REG_POWERPC_R11),
+	SMPL_REG(r12, PERF_REG_POWERPC_R12),
+	SMPL_REG(r13, PERF_REG_POWERPC_R13),
+	SMPL_REG(r14, PERF_REG_POWERPC_R14),
+	SMPL_REG(r15, PERF_REG_POWERPC_R15),
+	SMPL_REG(r16, PERF_REG_POWERPC_R16),
+	SMPL_REG(r17, PERF_REG_POWERPC_R17),
+	SMPL_REG(r18, PERF_REG_POWERPC_R18),
+	SMPL_REG(r19, PERF_REG_POWERPC_R19),
+	SMPL_REG(r20, PERF_REG_POWERPC_R20),
+	SMPL_REG(r21, PERF_REG_POWERPC_R21),
+	SMPL_REG(r22, PERF_REG_POWERPC_R22),
+	SMPL_REG(r23, PERF_REG_POWERPC_R23),
+	SMPL_REG(r24, PERF_REG_POWERPC_R24),
+	SMPL_REG(r25, PERF_REG_POWERPC_R25),
+	SMPL_REG(r26, PERF_REG_POWERPC_R26),
+	SMPL_REG(r27, PERF_REG_POWERPC_R27),
+	SMPL_REG(r28, PERF_REG_POWERPC_R28),
+	SMPL_REG(r29, PERF_REG_POWERPC_R29),
+	SMPL_REG(r30, PERF_REG_POWERPC_R30),
+	SMPL_REG(r31, PERF_REG_POWERPC_R31),
+	SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+	SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+	SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+	SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+	SMPL_REG(link, PERF_REG_POWERPC_LINK),
+	SMPL_REG(xer, PERF_REG_POWERPC_XER),
+	SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+	SMPL_REG(softe, PERF_REG_POWERPC_SOFTE),
+	SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+	SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+	SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+	SMPL_REG_END
+};
+
+/* REG or %rREG */
+#define SDT_OP_REGEX1  "^(%r)?([1-2]?[0-9]|3[0-1])$"
+
+/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */
+#define SDT_OP_REGEX2  "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$"
+
+static regex_t sdt_op_regex1, sdt_op_regex2;
+
+static int sdt_init_op_regex(void)
+{
+	static int initialized;
+	int ret = 0;
+
+	if (initialized)
+		return 0;
+
+	ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
+	if (ret)
+		goto error;
+
+	ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
+	if (ret)
+		goto free_regex1;
+
+	initialized = 1;
+	return 0;
+
+free_regex1:
+	regfree(&sdt_op_regex1);
+error:
+	pr_debug4("Regex compilation error.\n");
+	return ret;
+}
+
+/*
+ * Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG).
+ * Possible variants of OP are:
+ *	Format		Example
+ *	-------------------------
+ *	NUM(REG)	48(18)
+ *	-NUM(REG)	-48(18)
+ *	NUM(%rREG)	48(%r18)
+ *	-NUM(%rREG)	-48(%r18)
+ *	REG		18
+ *	%rREG		%r18
+ *	iNUM		i0
+ *	i-NUM		i-1
+ *
+ * SDT marker arguments on Powerpc uses %rREG form with -mregnames flag
+ * and REG form with -mno-regnames. Here REG is general purpose register,
+ * which is in 0 to 31 range.
+ */
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+	int ret, new_len;
+	regmatch_t rm[5];
+	char prefix;
+
+	/* Constant argument. Uprobe does not support it */
+	if (old_op[0] == 'i') {
+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+		return SDT_ARG_SKIP;
+	}
+
+	ret = sdt_init_op_regex();
+	if (ret < 0)
+		return ret;
+
+	if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
+		/* REG or %rREG --> %gprREG */
+
+		new_len = 5;	/* % g p r NULL */
+		new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+
+		*new_op = zalloc(new_len);
+		if (!*new_op)
+			return -ENOMEM;
+
+		scnprintf(*new_op, new_len, "%%gpr%.*s",
+			(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so);
+	} else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
+		/*
+		 * -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) -->
+		 *	+/-NUM(%gprREG)
+		 */
+		prefix = (rm[1].rm_so == -1) ? '+' : '-';
+
+		new_len = 8;	/* +/- ( % g p r ) NULL */
+		new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+		new_len += (int)(rm[4].rm_eo - rm[4].rm_so);
+
+		*new_op = zalloc(new_len);
+		if (!*new_op)
+			return -ENOMEM;
+
+		scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix,
+			(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+			(int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so);
+	} else {
+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+		return SDT_ARG_SKIP;
+	}
+
+	return SDT_ARG_VALID;
+}
diff --git a/arch/powerpc/util/skip-callchain-idx.c b/arch/powerpc/util/skip-callchain-idx.c
new file mode 100644
index 0000000..0c370f8
--- /dev/null
+++ b/arch/powerpc/util/skip-callchain-idx.c
@@ -0,0 +1,280 @@
+/*
+ * Use DWARF Debug information to skip unnecessary callchain entries.
+ *
+ * Copyright (C) 2014 Sukadev Bhattiprolu, IBM Corporation.
+ * Copyright (C) 2014 Ulrich Weigand, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <inttypes.h>
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
+
+#include "util/thread.h"
+#include "util/callchain.h"
+#include "util/debug.h"
+
+/*
+ * When saving the callchain on Power, the kernel conservatively saves
+ * excess entries in the callchain. A few of these entries are needed
+ * in some cases but not others. If the unnecessary entries are not
+ * ignored, we end up with duplicate arcs in the call-graphs. Use
+ * DWARF debug information to skip over any unnecessary callchain
+ * entries.
+ *
+ * See function header for arch_adjust_callchain() below for more details.
+ *
+ * The libdwfl code in this file is based on code from elfutils
+ * (libdwfl/argp-std.c, libdwfl/tests/addrcfi.c, etc).
+ */
+static char *debuginfo_path;
+
+static const Dwfl_Callbacks offline_callbacks = {
+	.debuginfo_path = &debuginfo_path,
+	.find_debuginfo = dwfl_standard_find_debuginfo,
+	.section_address = dwfl_offline_section_address,
+};
+
+
+/*
+ * Use the DWARF expression for the Call-frame-address and determine
+ * if return address is in LR and if a new frame was allocated.
+ */
+static int check_return_reg(int ra_regno, Dwarf_Frame *frame)
+{
+	Dwarf_Op ops_mem[2];
+	Dwarf_Op dummy;
+	Dwarf_Op *ops = &dummy;
+	size_t nops;
+	int result;
+
+	result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops);
+	if (result < 0) {
+		pr_debug("dwarf_frame_register() %s\n", dwarf_errmsg(-1));
+		return -1;
+	}
+
+	/*
+	 * Check if return address is on the stack.
+	 */
+	if (nops != 0 || ops != NULL)
+		return 0;
+
+	/*
+	 * Return address is in LR. Check if a frame was allocated
+	 * but not-yet used.
+	 */
+	result = dwarf_frame_cfa(frame, &ops, &nops);
+	if (result < 0) {
+		pr_debug("dwarf_frame_cfa() returns %d, %s\n", result,
+					dwarf_errmsg(-1));
+		return -1;
+	}
+
+	/*
+	 * If call frame address is in r1, no new frame was allocated.
+	 */
+	if (nops == 1 && ops[0].atom == DW_OP_bregx && ops[0].number == 1 &&
+				ops[0].number2 == 0)
+		return 1;
+
+	/*
+	 * A new frame was allocated but has not yet been used.
+	 */
+	return 2;
+}
+
+/*
+ * Get the DWARF frame from the .eh_frame section.
+ */
+static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc)
+{
+	int		result;
+	Dwarf_Addr	bias;
+	Dwarf_CFI	*cfi;
+	Dwarf_Frame	*frame;
+
+	cfi = dwfl_module_eh_cfi(mod, &bias);
+	if (!cfi) {
+		pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
+		return NULL;
+	}
+
+	result = dwarf_cfi_addrframe(cfi, pc-bias, &frame);
+	if (result) {
+		pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
+		return NULL;
+	}
+
+	return frame;
+}
+
+/*
+ * Get the DWARF frame from the .debug_frame section.
+ */
+static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
+{
+	Dwarf_CFI       *cfi;
+	Dwarf_Addr      bias;
+	Dwarf_Frame     *frame;
+	int             result;
+
+	cfi = dwfl_module_dwarf_cfi(mod, &bias);
+	if (!cfi) {
+		pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
+		return NULL;
+	}
+
+	result = dwarf_cfi_addrframe(cfi, pc-bias, &frame);
+	if (result) {
+		pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
+		return NULL;
+	}
+
+	return frame;
+}
+
+/*
+ * Return:
+ *	0 if return address for the program counter @pc is on stack
+ *	1 if return address is in LR and no new stack frame was allocated
+ *	2 if return address is in LR and a new frame was allocated (but not
+ *		yet used)
+ *	-1 in case of errors
+ */
+static int check_return_addr(struct dso *dso, u64 map_start, Dwarf_Addr pc)
+{
+	int		rc = -1;
+	Dwfl		*dwfl;
+	Dwfl_Module	*mod;
+	Dwarf_Frame	*frame;
+	int		ra_regno;
+	Dwarf_Addr	start = pc;
+	Dwarf_Addr	end = pc;
+	bool		signalp;
+	const char	*exec_file = dso->long_name;
+
+	dwfl = dso->dwfl;
+
+	if (!dwfl) {
+		dwfl = dwfl_begin(&offline_callbacks);
+		if (!dwfl) {
+			pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
+			return -1;
+		}
+
+		mod = dwfl_report_elf(dwfl, exec_file, exec_file, -1,
+						map_start, false);
+		if (!mod) {
+			pr_debug("dwfl_report_elf() failed %s\n",
+						dwarf_errmsg(-1));
+			/*
+			 * We normally cache the DWARF debug info and never
+			 * call dwfl_end(). But to prevent fd leak, free in
+			 * case of error.
+			 */
+			dwfl_end(dwfl);
+			goto out;
+		}
+		dso->dwfl = dwfl;
+	}
+
+	mod = dwfl_addrmodule(dwfl, pc);
+	if (!mod) {
+		pr_debug("dwfl_addrmodule() failed, %s\n", dwarf_errmsg(-1));
+		goto out;
+	}
+
+	/*
+	 * To work with split debug info files (eg: glibc), check both
+	 * .eh_frame and .debug_frame sections of the ELF header.
+	 */
+	frame = get_eh_frame(mod, pc);
+	if (!frame) {
+		frame = get_dwarf_frame(mod, pc);
+		if (!frame)
+			goto out;
+	}
+
+	ra_regno = dwarf_frame_info(frame, &start, &end, &signalp);
+	if (ra_regno < 0) {
+		pr_debug("Return address register unavailable: %s\n",
+				dwarf_errmsg(-1));
+		goto out;
+	}
+
+	rc = check_return_reg(ra_regno, frame);
+
+out:
+	return rc;
+}
+
+/*
+ * The callchain saved by the kernel always includes the link register (LR).
+ *
+ *	0:	PERF_CONTEXT_USER
+ *	1:	Program counter (Next instruction pointer)
+ *	2:	LR value
+ *	3:	Caller's caller
+ *	4:	...
+ *
+ * The value in LR is only needed when it holds a return address. If the
+ * return address is on the stack, we should ignore the LR value.
+ *
+ * Further, when the return address is in the LR, if a new frame was just
+ * allocated but the LR was not saved into it, then the LR contains the
+ * caller, slot 4: contains the caller's caller and the contents of slot 3:
+ * (chain->ips[3]) is undefined and must be ignored.
+ *
+ * Use DWARF debug information to determine if any entries need to be skipped.
+ *
+ * Return:
+ *	index:	of callchain entry that needs to be ignored (if any)
+ *	-1	if no entry needs to be ignored or in case of errors
+ */
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
+{
+	struct addr_location al;
+	struct dso *dso = NULL;
+	int rc;
+	u64 ip;
+	u64 skip_slot = -1;
+
+	if (chain->nr < 3)
+		return skip_slot;
+
+	ip = chain->ips[2];
+
+	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
+			MAP__FUNCTION, ip, &al);
+
+	if (al.map)
+		dso = al.map->dso;
+
+	if (!dso) {
+		pr_debug("%" PRIx64 " dso is NULL\n", ip);
+		return skip_slot;
+	}
+
+	rc = check_return_addr(dso, al.map->start, ip);
+
+	pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n",
+				dso->long_name, al.sym->name, ip, rc);
+
+	if (rc == 0) {
+		/*
+		 * Return address on stack. Ignore LR value in callchain
+		 */
+		skip_slot = 2;
+	} else if (rc == 2) {
+		/*
+		 * New frame allocated but return address still in LR.
+		 * Ignore the caller's caller entry in callchain.
+		 */
+		skip_slot = 3;
+	}
+	return skip_slot;
+}
diff --git a/arch/powerpc/util/sym-handling.c b/arch/powerpc/util/sym-handling.c
new file mode 100644
index 0000000..53d83d7
--- /dev/null
+++ b/arch/powerpc/util/sym-handling.c
@@ -0,0 +1,151 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+#include "map.h"
+#include "probe-event.h"
+#include "probe-file.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	return ehdr.e_type == ET_EXEC ||
+	       ehdr.e_type == ET_REL ||
+	       ehdr.e_type == ET_DYN;
+}
+
+#endif
+
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+int arch__choose_best_symbol(struct symbol *syma,
+			     struct symbol *symb __maybe_unused)
+{
+	char *sym = syma->name;
+
+	/* Skip over any initial dot */
+	if (*sym == '.')
+		sym++;
+
+	/* Avoid "SyS" kernel syscall aliases */
+	if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
+		return SYMBOL_B;
+	if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
+}
+
+/* Allow matching against dot variants */
+int arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	/* Skip over initial dot */
+	if (*namea == '.')
+		namea++;
+	if (*nameb == '.')
+		nameb++;
+
+	return strcmp(namea, nameb);
+}
+
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n)
+{
+	/* Skip over initial dot */
+	if (*namea == '.')
+		namea++;
+	if (*nameb == '.')
+		nameb++;
+
+	return strncmp(namea, nameb, n);
+}
+
+const char *arch__normalize_symbol_name(const char *name)
+{
+	/* Skip over initial dot */
+	if (name && *name == '.')
+		name++;
+	return name;
+}
+#endif
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
+{
+	s->arch_sym = sym->st_other;
+}
+#endif
+
+#define PPC64LE_LEP_OFFSET	8
+
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+			     struct probe_trace_event *tev, struct map *map,
+			     struct symbol *sym)
+{
+	int lep_offset;
+
+	/*
+	 * When probing at a function entry point, we normally always want the
+	 * LEP since that catches calls to the function through both the GEP and
+	 * the LEP. Hence, we would like to probe at an offset of 8 bytes if
+	 * the user only specified the function entry.
+	 *
+	 * However, if the user specifies an offset, we fall back to using the
+	 * GEP since all userspace applications (objdump/readelf) show function
+	 * disassembly with offsets from the GEP.
+	 */
+	if (pev->point.offset || !map || !sym)
+		return;
+
+	/* For kretprobes, add an offset only if the kernel supports it */
+	if (!pev->uprobes && pev->point.retprobe) {
+#ifdef HAVE_LIBELF_SUPPORT
+		if (!kretprobe_offset_is_supported())
+#endif
+			return;
+	}
+
+	lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+
+	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
+		tev->point.offset += PPC64LE_LEP_OFFSET;
+	else if (lep_offset) {
+		if (pev->uprobes)
+			tev->point.address += lep_offset;
+		else
+			tev->point.offset += lep_offset;
+	}
+}
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs)
+{
+	struct probe_trace_event *tev;
+	struct map *map;
+	struct symbol *sym = NULL;
+	struct rb_node *tmp;
+	int i = 0;
+
+	map = get_target_map(pev->target, pev->nsi, pev->uprobes);
+	if (!map || map__load(map) < 0)
+		return;
+
+	for (i = 0; i < ntevs; i++) {
+		tev = &pev->tevs[i];
+		map__for_each_symbol(map, sym, tmp) {
+			if (map->unmap_ip(map, sym->start) == tev->point.address)
+				arch__fix_tev_from_maps(pev, tev, map, sym);
+		}
+	}
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
+#endif
diff --git a/arch/powerpc/util/unwind-libdw.c b/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 0000000..7a1f05e
--- /dev/null
+++ b/arch/powerpc/util/unwind-libdw.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
+static const int special_regs[3][2] = {
+	{ 65, PERF_REG_POWERPC_LINK },
+	{ 101, PERF_REG_POWERPC_XER },
+	{ 109, PERF_REG_POWERPC_CTR },
+};
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[32], dwarf_nip;
+	size_t i;
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r);	\
+	val;							\
+})
+
+	dwarf_regs[0]  = REG(R0);
+	dwarf_regs[1]  = REG(R1);
+	dwarf_regs[2]  = REG(R2);
+	dwarf_regs[3]  = REG(R3);
+	dwarf_regs[4]  = REG(R4);
+	dwarf_regs[5]  = REG(R5);
+	dwarf_regs[6]  = REG(R6);
+	dwarf_regs[7]  = REG(R7);
+	dwarf_regs[8]  = REG(R8);
+	dwarf_regs[9]  = REG(R9);
+	dwarf_regs[10] = REG(R10);
+	dwarf_regs[11] = REG(R11);
+	dwarf_regs[12] = REG(R12);
+	dwarf_regs[13] = REG(R13);
+	dwarf_regs[14] = REG(R14);
+	dwarf_regs[15] = REG(R15);
+	dwarf_regs[16] = REG(R16);
+	dwarf_regs[17] = REG(R17);
+	dwarf_regs[18] = REG(R18);
+	dwarf_regs[19] = REG(R19);
+	dwarf_regs[20] = REG(R20);
+	dwarf_regs[21] = REG(R21);
+	dwarf_regs[22] = REG(R22);
+	dwarf_regs[23] = REG(R23);
+	dwarf_regs[24] = REG(R24);
+	dwarf_regs[25] = REG(R25);
+	dwarf_regs[26] = REG(R26);
+	dwarf_regs[27] = REG(R27);
+	dwarf_regs[28] = REG(R28);
+	dwarf_regs[29] = REG(R29);
+	dwarf_regs[30] = REG(R30);
+	dwarf_regs[31] = REG(R31);
+	if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
+		return false;
+
+	dwarf_nip = REG(NIP);
+	dwfl_thread_state_register_pc(thread, dwarf_nip);
+	for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
+		Dwarf_Word val = 0;
+		perf_reg_value(&val, user_regs, special_regs[i][1]);
+		if (!dwfl_thread_state_registers(thread,
+						 special_regs[i][0], 1,
+						 &val))
+			return false;
+	}
+
+	return true;
+}
diff --git a/arch/powerpc/util/unwind-libunwind.c b/arch/powerpc/util/unwind-libunwind.c
new file mode 100644
index 0000000..9e15f92
--- /dev/null
+++ b/arch/powerpc/util/unwind-libunwind.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2016 Chandan Kumar, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <errno.h>
+#include <libunwind.h>
+#include <asm/perf_regs.h>
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+	switch (regnum) {
+	case UNW_PPC64_R0:
+		return PERF_REG_POWERPC_R0;
+	case UNW_PPC64_R1:
+		return PERF_REG_POWERPC_R1;
+	case UNW_PPC64_R2:
+		return PERF_REG_POWERPC_R2;
+	case UNW_PPC64_R3:
+		return PERF_REG_POWERPC_R3;
+	case UNW_PPC64_R4:
+		return PERF_REG_POWERPC_R4;
+	case UNW_PPC64_R5:
+		return PERF_REG_POWERPC_R5;
+	case UNW_PPC64_R6:
+		return PERF_REG_POWERPC_R6;
+	case UNW_PPC64_R7:
+		return PERF_REG_POWERPC_R7;
+	case UNW_PPC64_R8:
+		return PERF_REG_POWERPC_R8;
+	case UNW_PPC64_R9:
+		return PERF_REG_POWERPC_R9;
+	case UNW_PPC64_R10:
+		return PERF_REG_POWERPC_R10;
+	case UNW_PPC64_R11:
+		return PERF_REG_POWERPC_R11;
+	case UNW_PPC64_R12:
+		return PERF_REG_POWERPC_R12;
+	case UNW_PPC64_R13:
+		return PERF_REG_POWERPC_R13;
+	case UNW_PPC64_R14:
+		return PERF_REG_POWERPC_R14;
+	case UNW_PPC64_R15:
+		return PERF_REG_POWERPC_R15;
+	case UNW_PPC64_R16:
+		return PERF_REG_POWERPC_R16;
+	case UNW_PPC64_R17:
+		return PERF_REG_POWERPC_R17;
+	case UNW_PPC64_R18:
+		return PERF_REG_POWERPC_R18;
+	case UNW_PPC64_R19:
+		return PERF_REG_POWERPC_R19;
+	case UNW_PPC64_R20:
+		return PERF_REG_POWERPC_R20;
+	case UNW_PPC64_R21:
+		return PERF_REG_POWERPC_R21;
+	case UNW_PPC64_R22:
+		return PERF_REG_POWERPC_R22;
+	case UNW_PPC64_R23:
+		return PERF_REG_POWERPC_R23;
+	case UNW_PPC64_R24:
+		return PERF_REG_POWERPC_R24;
+	case UNW_PPC64_R25:
+		return PERF_REG_POWERPC_R25;
+	case UNW_PPC64_R26:
+		return PERF_REG_POWERPC_R26;
+	case UNW_PPC64_R27:
+		return PERF_REG_POWERPC_R27;
+	case UNW_PPC64_R28:
+		return PERF_REG_POWERPC_R28;
+	case UNW_PPC64_R29:
+		return PERF_REG_POWERPC_R29;
+	case UNW_PPC64_R30:
+		return PERF_REG_POWERPC_R30;
+	case UNW_PPC64_R31:
+		return PERF_REG_POWERPC_R31;
+	case UNW_PPC64_LR:
+		return PERF_REG_POWERPC_LINK;
+	case UNW_PPC64_CTR:
+		return PERF_REG_POWERPC_CTR;
+	case UNW_PPC64_XER:
+		return PERF_REG_POWERPC_XER;
+	case UNW_PPC64_NIP:
+		return PERF_REG_POWERPC_NIP;
+	default:
+		pr_err("unwind: invalid reg id %d\n", regnum);
+		return -EINVAL;
+	}
+	return -EINVAL;
+}
diff --git a/arch/s390/Build b/arch/s390/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/arch/s390/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
new file mode 100644
index 0000000..dfa6e31
--- /dev/null
+++ b/arch/s390/Makefile
@@ -0,0 +1,30 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+
+#
+# Syscall table generation for perf
+#
+
+out    := $(OUTPUT)arch/s390/include/generated/asm
+header := $(out)/syscalls_64.c
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sysdef) $(systbl)
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B $(sysdef) $(syskrn) >/dev/null) \
+        || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
+
+clean::
+	$(call QUIET_CLEAN, s390) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/arch/s390/annotate/instructions.c b/arch/s390/annotate/instructions.c
new file mode 100644
index 0000000..cee4e2f
--- /dev/null
+++ b/arch/s390/annotate/instructions.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+
+static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
+			    struct map_symbol *ms)
+{
+	char *endptr, *tok, *name;
+	struct map *map = ms->map;
+	struct addr_map_symbol target = {
+		.map = map,
+	};
+
+	tok = strchr(ops->raw, ',');
+	if (!tok)
+		return -1;
+
+	ops->target.addr = strtoull(tok + 1, &endptr, 16);
+
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		return -1;
+
+	name++;
+
+	if (arch->objdump.skip_functions_char &&
+	    strchr(name, arch->objdump.skip_functions_char))
+		return -1;
+
+	tok = strchr(name, '>');
+	if (tok == NULL)
+		return -1;
+
+	*tok = '\0';
+	ops->target.name = strdup(name);
+	*tok = '>';
+
+	if (ops->target.name == NULL)
+		return -1;
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+
+	if (map_groups__find_ams(&target) == 0 &&
+	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.sym;
+
+	return 0;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops);
+
+static struct ins_ops s390_call_ops = {
+	.parse	   = s390_call__parse,
+	.scnprintf = call__scnprintf,
+};
+
+static int s390_mov__parse(struct arch *arch __maybe_unused,
+			   struct ins_operands *ops,
+			   struct map_symbol *ms __maybe_unused)
+{
+	char *s = strchr(ops->raw, ','), *target, *endptr;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+	ops->target.raw = strdup(target);
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	ops->target.addr = strtoull(target, &endptr, 16);
+	if (endptr == target)
+		goto out_free_target;
+
+	s = strchr(endptr, '<');
+	if (s == NULL)
+		goto out_free_target;
+	endptr = strchr(s + 1, '>');
+	if (endptr == NULL)
+		goto out_free_target;
+
+	*endptr = '\0';
+	ops->target.name = strdup(s + 1);
+	*endptr = '>';
+	if (ops->target.name == NULL)
+		goto out_free_target;
+
+	return 0;
+
+out_free_target:
+	zfree(&ops->target.raw);
+out_free_source:
+	zfree(&ops->source.raw);
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			  struct ins_operands *ops);
+
+static struct ins_ops s390_mov_ops = {
+	.parse	   = s390_mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
+static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = NULL;
+
+	/* catch all kind of jumps */
+	if (strchr(name, 'j') ||
+	    !strncmp(name, "bct", 3) ||
+	    !strncmp(name, "br", 2))
+		ops = &jump_ops;
+	/* override call/returns */
+	if (!strcmp(name, "bras") ||
+	    !strcmp(name, "brasl") ||
+	    !strcmp(name, "basr"))
+		ops = &s390_call_ops;
+	if (!strcmp(name, "br"))
+		ops = &ret_ops;
+	/* override load/store relative to PC */
+	if (!strcmp(name, "lrl") ||
+	    !strcmp(name, "lgrl") ||
+	    !strcmp(name, "lgfrl") ||
+	    !strcmp(name, "llgfrl") ||
+	    !strcmp(name, "strl") ||
+	    !strcmp(name, "stgrl"))
+		ops = &s390_mov_ops;
+
+	if (ops)
+		arch__associate_ins_ops(arch, name, ops);
+	return ops;
+}
+
+static int s390__cpuid_parse(struct arch *arch, char *cpuid)
+{
+	unsigned int family;
+	char model[16], model_c[16], cpumf_v[16], cpumf_a[16];
+	int ret;
+
+	/*
+	 * cpuid string format:
+	 * "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]"
+	 */
+	ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c,
+		     model, cpumf_v, cpumf_a);
+	if (ret >= 2) {
+		arch->family = family;
+		arch->model = 0;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	int err = 0;
+
+	if (!arch->initialized) {
+		arch->initialized = true;
+		arch->associate_instruction_ops = s390__associate_ins_ops;
+		if (cpuid)
+			err = s390__cpuid_parse(arch, cpuid);
+	}
+
+	return err;
+}
diff --git a/arch/s390/entry/syscalls/mksyscalltbl b/arch/s390/entry/syscalls/mksyscalltbl
new file mode 100755
index 0000000..72ecbb6
--- /dev/null
+++ b/arch/s390/entry/syscalls/mksyscalltbl
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf
+#
+# Copyright IBM Corp. 2017, 2018
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+#
+
+SYSCALL_TBL=$1
+
+if ! test -r $SYSCALL_TBL; then
+	echo "Could not read input file" >&2
+	exit 1
+fi
+
+create_table()
+{
+	local max_nr nr abi sc discard
+
+	echo 'static const char *syscalltbl_s390_64[] = {'
+	while read nr abi sc discard; do
+		printf '\t[%d] = "%s",\n' $nr $sc
+		max_nr=$nr
+	done
+	echo '};'
+	echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
+}
+
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL	\
+	|sort -k1 -n					\
+	|create_table
diff --git a/arch/s390/entry/syscalls/syscall.tbl b/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644
index 0000000..b38d484
--- /dev/null
+++ b/arch/s390/entry/syscalls/syscall.tbl
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common	exit			sys_exit			sys_exit
+2    common	fork			sys_fork			sys_fork
+3    common	read			sys_read			compat_sys_s390_read
+4    common	write			sys_write			compat_sys_s390_write
+5    common	open			sys_open			compat_sys_open
+6    common	close			sys_close			sys_close
+7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
+8    common	creat			sys_creat			compat_sys_creat
+9    common	link			sys_link			compat_sys_link
+10   common	unlink			sys_unlink			compat_sys_unlink
+11   common	execve			sys_execve			compat_sys_execve
+12   common	chdir			sys_chdir			compat_sys_chdir
+13   32		time			-				compat_sys_time
+14   common	mknod			sys_mknod			compat_sys_mknod
+15   common	chmod			sys_chmod			compat_sys_chmod
+16   32		lchown			-				compat_sys_s390_lchown16
+19   common	lseek			sys_lseek			compat_sys_lseek
+20   common	getpid			sys_getpid			sys_getpid
+21   common	mount			sys_mount			compat_sys_mount
+22   common	umount			sys_oldumount			compat_sys_oldumount
+23   32		setuid			-				compat_sys_s390_setuid16
+24   32		getuid			-				compat_sys_s390_getuid16
+25   32		stime			-				compat_sys_stime
+26   common	ptrace			sys_ptrace			compat_sys_ptrace
+27   common	alarm			sys_alarm			sys_alarm
+29   common	pause			sys_pause			sys_pause
+30   common	utime			sys_utime			compat_sys_utime
+33   common	access			sys_access			compat_sys_access
+34   common	nice			sys_nice			sys_nice
+36   common	sync			sys_sync			sys_sync
+37   common	kill			sys_kill			sys_kill
+38   common	rename			sys_rename			compat_sys_rename
+39   common	mkdir			sys_mkdir			compat_sys_mkdir
+40   common	rmdir			sys_rmdir			compat_sys_rmdir
+41   common	dup			sys_dup				sys_dup
+42   common	pipe			sys_pipe			compat_sys_pipe
+43   common	times			sys_times			compat_sys_times
+45   common	brk			sys_brk				compat_sys_brk
+46   32		setgid			-				compat_sys_s390_setgid16
+47   32		getgid			-				compat_sys_s390_getgid16
+48   common	signal			sys_signal			compat_sys_signal
+49   32		geteuid			-				compat_sys_s390_geteuid16
+50   32		getegid			-				compat_sys_s390_getegid16
+51   common	acct			sys_acct			compat_sys_acct
+52   common	umount2			sys_umount			compat_sys_umount
+54   common	ioctl			sys_ioctl			compat_sys_ioctl
+55   common	fcntl			sys_fcntl			compat_sys_fcntl
+57   common	setpgid			sys_setpgid			sys_setpgid
+60   common	umask			sys_umask			sys_umask
+61   common	chroot			sys_chroot			compat_sys_chroot
+62   common	ustat			sys_ustat			compat_sys_ustat
+63   common	dup2			sys_dup2			sys_dup2
+64   common	getppid			sys_getppid			sys_getppid
+65   common	getpgrp			sys_getpgrp			sys_getpgrp
+66   common	setsid			sys_setsid			sys_setsid
+67   common	sigaction		sys_sigaction			compat_sys_sigaction
+70   32		setreuid		-				compat_sys_s390_setreuid16
+71   32		setregid		-				compat_sys_s390_setregid16
+72   common	sigsuspend		sys_sigsuspend			compat_sys_sigsuspend
+73   common	sigpending		sys_sigpending			compat_sys_sigpending
+74   common	sethostname		sys_sethostname			compat_sys_sethostname
+75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76   32		getrlimit		-				compat_sys_old_getrlimit
+77   common	getrusage		sys_getrusage			compat_sys_getrusage
+78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80   32		getgroups		-				compat_sys_s390_getgroups16
+81   32		setgroups		-				compat_sys_s390_setgroups16
+83   common	symlink			sys_symlink			compat_sys_symlink
+85   common	readlink		sys_readlink			compat_sys_readlink
+86   common	uselib			sys_uselib			compat_sys_uselib
+87   common	swapon			sys_swapon			compat_sys_swapon
+88   common	reboot			sys_reboot			compat_sys_reboot
+89   common	readdir			-				compat_sys_old_readdir
+90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
+91   common	munmap			sys_munmap			compat_sys_munmap
+92   common	truncate		sys_truncate			compat_sys_truncate
+93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94   common	fchmod			sys_fchmod			sys_fchmod
+95   32		fchown			-				compat_sys_s390_fchown16
+96   common	getpriority		sys_getpriority			sys_getpriority
+97   common	setpriority		sys_setpriority			sys_setpriority
+99   common	statfs			sys_statfs			compat_sys_statfs
+100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101  32		ioperm			-				-
+102  common	socketcall		sys_socketcall			compat_sys_socketcall
+103  common	syslog			sys_syslog			compat_sys_syslog
+104  common	setitimer		sys_setitimer			compat_sys_setitimer
+105  common	getitimer		sys_getitimer			compat_sys_getitimer
+106  common	stat			sys_newstat			compat_sys_newstat
+107  common	lstat			sys_newlstat			compat_sys_newlstat
+108  common	fstat			sys_newfstat			compat_sys_newfstat
+110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+111  common	vhangup			sys_vhangup			sys_vhangup
+112  common	idle			-				-
+114  common	wait4			sys_wait4			compat_sys_wait4
+115  common	swapoff			sys_swapoff			compat_sys_swapoff
+116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
+118  common	fsync			sys_fsync			sys_fsync
+119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
+120  common	clone			sys_clone			compat_sys_clone
+121  common	setdomainname		sys_setdomainname		compat_sys_setdomainname
+122  common	uname			sys_newuname			compat_sys_newuname
+124  common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+125  common	mprotect		sys_mprotect			compat_sys_mprotect
+126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+127  common	create_module		-				-
+128  common	init_module		sys_init_module			compat_sys_init_module
+129  common	delete_module		sys_delete_module		compat_sys_delete_module
+130  common	get_kernel_syms		-				-
+131  common	quotactl		sys_quotactl			compat_sys_quotactl
+132  common	getpgid			sys_getpgid			sys_getpgid
+133  common	fchdir			sys_fchdir			sys_fchdir
+134  common	bdflush			sys_bdflush			compat_sys_bdflush
+135  common	sysfs			sys_sysfs			compat_sys_sysfs
+136  common	personality		sys_s390_personality		sys_s390_personality
+137  common	afs_syscall		-				-
+138  32		setfsuid		-				compat_sys_s390_setfsuid16
+139  32		setfsgid		-				compat_sys_s390_setfsgid16
+140  32		_llseek			-				compat_sys_llseek
+141  common	getdents		sys_getdents			compat_sys_getdents
+142  32		_newselect		-				compat_sys_select
+142  64		select			sys_select			-
+143  common	flock			sys_flock			sys_flock
+144  common	msync			sys_msync			compat_sys_msync
+145  common	readv			sys_readv			compat_sys_readv
+146  common	writev			sys_writev			compat_sys_writev
+147  common	getsid			sys_getsid			sys_getsid
+148  common	fdatasync		sys_fdatasync			sys_fdatasync
+149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+150  common	mlock			sys_mlock			compat_sys_mlock
+151  common	munlock			sys_munlock			compat_sys_munlock
+152  common	mlockall		sys_mlockall			sys_mlockall
+153  common	munlockall		sys_munlockall			sys_munlockall
+154  common	sched_setparam		sys_sched_setparam		compat_sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		compat_sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		compat_sys_sched_setscheduler
+157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
+158  common	sched_yield		sys_sched_yield			sys_sched_yield
+159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
+160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
+162  common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+163  common	mremap			sys_mremap			compat_sys_mremap
+164  32		setresuid		-				compat_sys_s390_setresuid16
+165  32		getresuid		-				compat_sys_s390_getresuid16
+167  common	query_module		-				-
+168  common	poll			sys_poll			compat_sys_poll
+169  common	nfsservctl		-				-
+170  32		setresgid		-				compat_sys_s390_setresgid16
+171  32		getresgid		-				compat_sys_s390_getresgid16
+172  common	prctl			sys_prctl			compat_sys_prctl
+173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
+174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180  common	pread64			sys_pread64			compat_sys_s390_pread64
+181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
+182  32		chown			-				compat_sys_s390_chown16
+183  common	getcwd			sys_getcwd			compat_sys_getcwd
+184  common	capget			sys_capget			compat_sys_capget
+185  common	capset			sys_capset			compat_sys_capset
+186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+187  common	sendfile		sys_sendfile64			compat_sys_sendfile
+188  common	getpmsg			-				-
+189  common	putpmsg			-				-
+190  common	vfork			sys_vfork			sys_vfork
+191  32		ugetrlimit		-				compat_sys_getrlimit
+191  64		getrlimit		sys_getrlimit			-
+192  32		mmap2			-				compat_sys_s390_mmap2
+193  32		truncate64		-				compat_sys_s390_truncate64
+194  32		ftruncate64		-				compat_sys_s390_ftruncate64
+195  32		stat64			-				compat_sys_s390_stat64
+196  32		lstat64			-				compat_sys_s390_lstat64
+197  32		fstat64			-				compat_sys_s390_fstat64
+198  32		lchown32		-				compat_sys_lchown
+198  64		lchown			sys_lchown			-
+199  32		getuid32		-				sys_getuid
+199  64		getuid			sys_getuid			-
+200  32		getgid32		-				sys_getgid
+200  64		getgid			sys_getgid			-
+201  32		geteuid32		-				sys_geteuid
+201  64		geteuid			sys_geteuid			-
+202  32		getegid32		-				sys_getegid
+202  64		getegid			sys_getegid			-
+203  32		setreuid32		-				sys_setreuid
+203  64		setreuid		sys_setreuid			-
+204  32		setregid32		-				sys_setregid
+204  64		setregid		sys_setregid			-
+205  32		getgroups32		-				compat_sys_getgroups
+205  64		getgroups		sys_getgroups			-
+206  32		setgroups32		-				compat_sys_setgroups
+206  64		setgroups		sys_setgroups			-
+207  32		fchown32		-				sys_fchown
+207  64		fchown			sys_fchown			-
+208  32		setresuid32		-				sys_setresuid
+208  64		setresuid		sys_setresuid			-
+209  32		getresuid32		-				compat_sys_getresuid
+209  64		getresuid		sys_getresuid			-
+210  32		setresgid32		-				sys_setresgid
+210  64		setresgid		sys_setresgid			-
+211  32		getresgid32		-				compat_sys_getresgid
+211  64		getresgid		sys_getresgid			-
+212  32		chown32			-				compat_sys_chown
+212  64		chown			sys_chown			-
+213  32		setuid32		-				sys_setuid
+213  64		setuid			sys_setuid			-
+214  32		setgid32		-				sys_setgid
+214  64		setgid			sys_setgid			-
+215  32		setfsuid32		-				sys_setfsuid
+215  64		setfsuid		sys_setfsuid			-
+216  32		setfsgid32		-				sys_setfsgid
+216  64		setfsgid		sys_setfsgid			-
+217  common	pivot_root		sys_pivot_root			compat_sys_pivot_root
+218  common	mincore			sys_mincore			compat_sys_mincore
+219  common	madvise			sys_madvise			compat_sys_madvise
+220  common	getdents64		sys_getdents64			compat_sys_getdents64
+221  32		fcntl64			-				compat_sys_fcntl64
+222  common	readahead		sys_readahead			compat_sys_s390_readahead
+223  32		sendfile64		-				compat_sys_sendfile64
+224  common	setxattr		sys_setxattr			compat_sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			compat_sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			compat_sys_fsetxattr
+227  common	getxattr		sys_getxattr			compat_sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			compat_sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			compat_sys_fgetxattr
+230  common	listxattr		sys_listxattr			compat_sys_listxattr
+231  common	llistxattr		sys_llistxattr			compat_sys_llistxattr
+232  common	flistxattr		sys_flistxattr			compat_sys_flistxattr
+233  common	removexattr		sys_removexattr			compat_sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		compat_sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		compat_sys_fremovexattr
+236  common	gettid			sys_gettid			sys_gettid
+237  common	tkill			sys_tkill			sys_tkill
+238  common	futex			sys_futex			compat_sys_futex
+239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+241  common	tgkill			sys_tgkill			sys_tgkill
+243  common	io_setup		sys_io_setup			compat_sys_io_setup
+244  common	io_destroy		sys_io_destroy			compat_sys_io_destroy
+245  common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+246  common	io_submit		sys_io_submit			compat_sys_io_submit
+247  common	io_cancel		sys_io_cancel			compat_sys_io_cancel
+248  common	exit_group		sys_exit_group			sys_exit_group
+249  common	epoll_create		sys_epoll_create		sys_epoll_create
+250  common	epoll_ctl		sys_epoll_ctl			compat_sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			compat_sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		compat_sys_set_tid_address
+253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
+254  common	timer_create		sys_timer_create		compat_sys_timer_create
+255  common	timer_settime		sys_timer_settime		compat_sys_timer_settime
+256  common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
+258  common	timer_delete		sys_timer_delete		sys_timer_delete
+259  common	clock_settime		sys_clock_settime		compat_sys_clock_settime
+260  common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
+261  common	clock_getres		sys_clock_getres		compat_sys_clock_getres
+262  common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
+265  common	statfs64		sys_statfs64			compat_sys_statfs64
+266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+267  common	remap_file_pages	sys_remap_file_pages		compat_sys_remap_file_pages
+268  common	mbind			sys_mbind			compat_sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
+271  common	mq_open			sys_mq_open			compat_sys_mq_open
+272  common	mq_unlink		sys_mq_unlink			compat_sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
+274  common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+278  common	add_key			sys_add_key			compat_sys_add_key
+279  common	request_key		sys_request_key			compat_sys_request_key
+280  common	keyctl			sys_keyctl			compat_sys_keyctl
+281  common	waitid			sys_waitid			compat_sys_waitid
+282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
+283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
+284  common	inotify_init		sys_inotify_init		sys_inotify_init
+285  common	inotify_add_watch	sys_inotify_add_watch		compat_sys_inotify_add_watch
+286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
+287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
+288  common	openat			sys_openat			compat_sys_openat
+289  common	mkdirat			sys_mkdirat			compat_sys_mkdirat
+290  common	mknodat			sys_mknodat			compat_sys_mknodat
+291  common	fchownat		sys_fchownat			compat_sys_fchownat
+292  common	futimesat		sys_futimesat			compat_sys_futimesat
+293  32		fstatat64		-				compat_sys_s390_fstatat64
+293  64		newfstatat		sys_newfstatat			-
+294  common	unlinkat		sys_unlinkat			compat_sys_unlinkat
+295  common	renameat		sys_renameat			compat_sys_renameat
+296  common	linkat			sys_linkat			compat_sys_linkat
+297  common	symlinkat		sys_symlinkat			compat_sys_symlinkat
+298  common	readlinkat		sys_readlinkat			compat_sys_readlinkat
+299  common	fchmodat		sys_fchmodat			compat_sys_fchmodat
+300  common	faccessat		sys_faccessat			compat_sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6
+302  common	ppoll			sys_ppoll			compat_sys_ppoll
+303  common	unshare			sys_unshare			compat_sys_unshare
+304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+306  common	splice			sys_splice			compat_sys_splice
+307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
+308  common	tee			sys_tee				compat_sys_tee
+309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+310  common	move_pages		sys_move_pages			compat_sys_move_pages
+311  common	getcpu			sys_getcpu			compat_sys_getcpu
+312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+313  common	utimes			sys_utimes			compat_sys_utimes
+314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
+315  common	utimensat		sys_utimensat			compat_sys_utimensat
+316  common	signalfd		sys_signalfd			compat_sys_signalfd
+317  common	timerfd			-				-
+318  common	eventfd			sys_eventfd			sys_eventfd
+319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
+320  common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
+321  common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+323  common	eventfd2		sys_eventfd2			sys_eventfd2
+324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
+325  common	pipe2			sys_pipe2			compat_sys_pipe2
+326  common	dup3			sys_dup3			sys_dup3
+327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
+328  common	preadv			sys_preadv			compat_sys_preadv
+329  common	pwritev			sys_pwritev			compat_sys_pwritev
+330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+331  common	perf_event_open		sys_perf_event_open		compat_sys_perf_event_open
+332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
+333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+334  common	prlimit64		sys_prlimit64			compat_sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		compat_sys_name_to_handle_at
+336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+338  common	syncfs			sys_syncfs			sys_syncfs
+339  common	setns			sys_setns			sys_setns
+340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
+343  common	kcmp			sys_kcmp			compat_sys_kcmp
+344  common	finit_module		sys_finit_module		compat_sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		compat_sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		compat_sys_sched_getattr
+347  common	renameat2		sys_renameat2			compat_sys_renameat2
+348  common	seccomp			sys_seccomp			compat_sys_seccomp
+349  common	getrandom		sys_getrandom			compat_sys_getrandom
+350  common	memfd_create		sys_memfd_create		compat_sys_memfd_create
+351  common	bpf			sys_bpf				compat_sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
+354  common	execveat		sys_execveat			compat_sys_execveat
+355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
+356  common	membarrier		sys_membarrier			sys_membarrier
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+359  common	socket			sys_socket			sys_socket
+360  common	socketpair		sys_socketpair			compat_sys_socketpair
+361  common	bind			sys_bind			compat_sys_bind
+362  common	connect			sys_connect			compat_sys_connect
+363  common	listen			sys_listen			sys_listen
+364  common	accept4			sys_accept4			compat_sys_accept4
+365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
+366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+367  common	getsockname		sys_getsockname			compat_sys_getsockname
+368  common	getpeername		sys_getpeername			compat_sys_getpeername
+369  common	sendto			sys_sendto			compat_sys_sendto
+370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+373  common	shutdown		sys_shutdown			sys_shutdown
+374  common	mlock2			sys_mlock2			compat_sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		compat_sys_copy_file_range
+376  common	preadv2			sys_preadv2			compat_sys_preadv2
+377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+378  common	s390_guarded_storage	sys_s390_guarded_storage	compat_sys_s390_guarded_storage
+379  common	statx			sys_statx			compat_sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
diff --git a/arch/s390/include/dwarf-regs-table.h b/arch/s390/include/dwarf-regs-table.h
new file mode 100644
index 0000000..6715535
--- /dev/null
+++ b/arch/s390/include/dwarf-regs-table.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_DWARF_REGS_TABLE_H
+#define S390_DWARF_REGS_TABLE_H
+
+#define REG_DWARFNUM_NAME(reg, idx)	[idx] = "%" #reg
+
+/*
+ * For reference, see DWARF register mapping:
+ * http://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_s390/x1542.html
+ */
+static const char * const s390_dwarf_regs[] = {
+	"%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+	"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+	REG_DWARFNUM_NAME(f0, 16),
+	REG_DWARFNUM_NAME(f1, 20),
+	REG_DWARFNUM_NAME(f2, 17),
+	REG_DWARFNUM_NAME(f3, 21),
+	REG_DWARFNUM_NAME(f4, 18),
+	REG_DWARFNUM_NAME(f5, 22),
+	REG_DWARFNUM_NAME(f6, 19),
+	REG_DWARFNUM_NAME(f7, 23),
+	REG_DWARFNUM_NAME(f8, 24),
+	REG_DWARFNUM_NAME(f9, 28),
+	REG_DWARFNUM_NAME(f10, 25),
+	REG_DWARFNUM_NAME(f11, 29),
+	REG_DWARFNUM_NAME(f12, 26),
+	REG_DWARFNUM_NAME(f13, 30),
+	REG_DWARFNUM_NAME(f14, 27),
+	REG_DWARFNUM_NAME(f15, 31),
+	REG_DWARFNUM_NAME(c0, 32),
+	REG_DWARFNUM_NAME(c1, 33),
+	REG_DWARFNUM_NAME(c2, 34),
+	REG_DWARFNUM_NAME(c3, 35),
+	REG_DWARFNUM_NAME(c4, 36),
+	REG_DWARFNUM_NAME(c5, 37),
+	REG_DWARFNUM_NAME(c6, 38),
+	REG_DWARFNUM_NAME(c7, 39),
+	REG_DWARFNUM_NAME(c8, 40),
+	REG_DWARFNUM_NAME(c9, 41),
+	REG_DWARFNUM_NAME(c10, 42),
+	REG_DWARFNUM_NAME(c11, 43),
+	REG_DWARFNUM_NAME(c12, 44),
+	REG_DWARFNUM_NAME(c13, 45),
+	REG_DWARFNUM_NAME(c14, 46),
+	REG_DWARFNUM_NAME(c15, 47),
+	REG_DWARFNUM_NAME(a0, 48),
+	REG_DWARFNUM_NAME(a1, 49),
+	REG_DWARFNUM_NAME(a2, 50),
+	REG_DWARFNUM_NAME(a3, 51),
+	REG_DWARFNUM_NAME(a4, 52),
+	REG_DWARFNUM_NAME(a5, 53),
+	REG_DWARFNUM_NAME(a6, 54),
+	REG_DWARFNUM_NAME(a7, 55),
+	REG_DWARFNUM_NAME(a8, 56),
+	REG_DWARFNUM_NAME(a9, 57),
+	REG_DWARFNUM_NAME(a10, 58),
+	REG_DWARFNUM_NAME(a11, 59),
+	REG_DWARFNUM_NAME(a12, 60),
+	REG_DWARFNUM_NAME(a13, 61),
+	REG_DWARFNUM_NAME(a14, 62),
+	REG_DWARFNUM_NAME(a15, 63),
+	REG_DWARFNUM_NAME(pswm, 64),
+	REG_DWARFNUM_NAME(pswa, 65),
+};
+
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+#define s390_regstr_tbl s390_dwarf_regs
+
+#endif	/* DEFINE_DWARF_REGSTR_TABLE */
+#endif	/* S390_DWARF_REGS_TABLE_H */
diff --git a/arch/s390/include/perf_regs.h b/arch/s390/include/perf_regs.h
new file mode 100644
index 0000000..bcfbaed
--- /dev/null
+++ b/arch/s390/include/perf_regs.h
@@ -0,0 +1,95 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_S390_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_S390_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_S390_PC
+#define PERF_REG_SP PERF_REG_S390_R15
+
+static inline const char *perf_reg_name(int id)
+{
+	switch (id) {
+	case PERF_REG_S390_R0:
+		return "R0";
+	case PERF_REG_S390_R1:
+		return "R1";
+	case PERF_REG_S390_R2:
+		return "R2";
+	case PERF_REG_S390_R3:
+		return "R3";
+	case PERF_REG_S390_R4:
+		return "R4";
+	case PERF_REG_S390_R5:
+		return "R5";
+	case PERF_REG_S390_R6:
+		return "R6";
+	case PERF_REG_S390_R7:
+		return "R7";
+	case PERF_REG_S390_R8:
+		return "R8";
+	case PERF_REG_S390_R9:
+		return "R9";
+	case PERF_REG_S390_R10:
+		return "R10";
+	case PERF_REG_S390_R11:
+		return "R11";
+	case PERF_REG_S390_R12:
+		return "R12";
+	case PERF_REG_S390_R13:
+		return "R13";
+	case PERF_REG_S390_R14:
+		return "R14";
+	case PERF_REG_S390_R15:
+		return "R15";
+	case PERF_REG_S390_FP0:
+		return "FP0";
+	case PERF_REG_S390_FP1:
+		return "FP1";
+	case PERF_REG_S390_FP2:
+		return "FP2";
+	case PERF_REG_S390_FP3:
+		return "FP3";
+	case PERF_REG_S390_FP4:
+		return "FP4";
+	case PERF_REG_S390_FP5:
+		return "FP5";
+	case PERF_REG_S390_FP6:
+		return "FP6";
+	case PERF_REG_S390_FP7:
+		return "FP7";
+	case PERF_REG_S390_FP8:
+		return "FP8";
+	case PERF_REG_S390_FP9:
+		return "FP9";
+	case PERF_REG_S390_FP10:
+		return "FP10";
+	case PERF_REG_S390_FP11:
+		return "FP11";
+	case PERF_REG_S390_FP12:
+		return "FP12";
+	case PERF_REG_S390_FP13:
+		return "FP13";
+	case PERF_REG_S390_FP14:
+		return "FP14";
+	case PERF_REG_S390_FP15:
+		return "FP15";
+	case PERF_REG_S390_MASK:
+		return "MASK";
+	case PERF_REG_S390_PC:
+		return "PC";
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/arch/s390/util/Build b/arch/s390/util/Build
new file mode 100644
index 0000000..4a23368
--- /dev/null
+++ b/arch/s390/util/Build
@@ -0,0 +1,9 @@
+libperf-y += header.o
+libperf-y += kvm-stat.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-y += machine.o
+
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/arch/s390/util/auxtrace.c b/arch/s390/util/auxtrace.c
new file mode 100644
index 0000000..3afe825
--- /dev/null
+++ b/arch/s390/util/auxtrace.c
@@ -0,0 +1,119 @@
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/evlist.h"
+#include "../../util/auxtrace.h"
+#include "../../util/evsel.h"
+
+#define PERF_EVENT_CPUM_SF		0xB0000 /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000 /* Event: Combined-sampling */
+#define DEFAULT_AUX_PAGES		128
+#define DEFAULT_FREQ			4000
+
+static void cpumsf_free(struct auxtrace_record *itr)
+{
+	free(itr);
+}
+
+static size_t cpumsf_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+				    struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static int
+cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused,
+		 struct perf_session *session __maybe_unused,
+		 struct auxtrace_info_event *auxtrace_info __maybe_unused,
+		 size_t priv_size __maybe_unused)
+{
+	return 0;
+}
+
+static unsigned long
+cpumsf_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	return 0;
+}
+
+static int
+cpumsf_recording_options(struct auxtrace_record *ar __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused,
+			 struct record_opts *opts)
+{
+	unsigned int factor = 1;
+	unsigned int pages;
+
+	opts->full_auxtrace = true;
+
+	/*
+	 * The AUX buffer size should be set properly to avoid
+	 * overflow of samples if it is not set explicitly.
+	 * DEFAULT_AUX_PAGES is an proper size when sampling frequency
+	 * is DEFAULT_FREQ. It is expected to hold about 1/2 second
+	 * of sampling data. The size used for AUX buffer will scale
+	 * according to the specified frequency and DEFAULT_FREQ.
+	 */
+	if (!opts->auxtrace_mmap_pages) {
+		if (opts->user_freq != UINT_MAX)
+			factor = (opts->user_freq + DEFAULT_FREQ
+				  - 1) / DEFAULT_FREQ;
+		pages = DEFAULT_AUX_PAGES * factor;
+		opts->auxtrace_mmap_pages = roundup_pow_of_two(pages);
+	}
+
+	return 0;
+}
+
+static int
+cpumsf_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+			      struct record_opts *opts __maybe_unused,
+			      const char *str __maybe_unused)
+{
+	return 0;
+}
+
+/*
+ * auxtrace_record__init is called when perf record
+ * check if the event really need auxtrace
+ */
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+					      int *err)
+{
+	struct auxtrace_record *aux;
+	struct perf_evsel *pos;
+	int diagnose = 0;
+
+	*err = 0;
+	if (evlist->nr_entries == 0)
+		return NULL;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (pos->attr.config == PERF_EVENT_CPUM_SF_DIAG) {
+			diagnose = 1;
+			break;
+		}
+	}
+
+	if (!diagnose)
+		return NULL;
+
+	/* sampling in diagnose mode. alloc aux buffer */
+	aux = zalloc(sizeof(*aux));
+	if (aux == NULL) {
+		*err = -ENOMEM;
+		return NULL;
+	}
+
+	aux->parse_snapshot_options = cpumsf_parse_snapshot_options;
+	aux->recording_options = cpumsf_recording_options;
+	aux->info_priv_size = cpumsf_info_priv_size;
+	aux->info_fill = cpumsf_info_fill;
+	aux->free = cpumsf_free;
+	aux->reference = cpumsf_reference;
+
+	return aux;
+}
diff --git a/arch/s390/util/dwarf-regs.c b/arch/s390/util/dwarf-regs.c
new file mode 100644
index 0000000..a8ace5c
--- /dev/null
+++ b/arch/s390/util/dwarf-regs.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright IBM Corp. 2010, 2017
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+#include <asm/ptrace.h>
+#include <string.h>
+#include <dwarf-regs.h>
+#include "dwarf-regs-table.h"
+
+const char *get_arch_regstr(unsigned int n)
+{
+	return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n];
+}
+
+/*
+ * Convert the register name into an offset to struct pt_regs (kernel).
+ * This is required by the BPF prologue generator.  The BPF
+ * program is called in the BPF overflow handler in the perf
+ * core.
+ */
+int regs_query_register_offset(const char *name)
+{
+	unsigned long gpr;
+
+	if (!name || strncmp(name, "%r", 2))
+		return -EINVAL;
+
+	errno = 0;
+	gpr = strtoul(name + 2, NULL, 10);
+	if (errno || gpr >= 16)
+		return -EINVAL;
+
+	return offsetof(user_pt_regs, gprs) + 8 * gpr;
+}
diff --git a/arch/s390/util/header.c b/arch/s390/util/header.c
new file mode 100644
index 0000000..163b92f
--- /dev/null
+++ b/arch/s390/util/header.c
@@ -0,0 +1,148 @@
+/*
+ * Implementation of get_cpuid().
+ *
+ * Copyright IBM Corp. 2014, 2018
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *	      Thomas Richter <tmricht@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "../../util/header.h"
+#include "../../util/util.h"
+
+#define SYSINFO_MANU	"Manufacturer:"
+#define SYSINFO_TYPE	"Type:"
+#define SYSINFO_MODEL	"Model:"
+#define SRVLVL_CPUMF	"CPU-MF:"
+#define SRVLVL_VERSION	"version="
+#define SRVLVL_AUTHORIZATION	"authorization="
+#define SYSINFO		"/proc/sysinfo"
+#define SRVLVL		"/proc/service_levels"
+
+int get_cpuid(char *buffer, size_t sz)
+{
+	char *cp, *line = NULL, *line2;
+	char type[8], model[33], version[8], manufacturer[32], authorization[8];
+	int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0;
+	int read;
+	unsigned long line_sz;
+	size_t nbytes;
+	FILE *sysinfo;
+
+	/*
+	 * Scan /proc/sysinfo line by line and read out values for
+	 * Manufacturer:, Type: and Model:, for example:
+	 * Manufacturer:    IBM
+	 * Type:            2964
+	 * Model:           702              N96
+	 * The first word is the Model Capacity and the second word is
+	 * Model (can be omitted). Both words have a maximum size of 16
+	 * bytes.
+	 */
+	memset(manufacturer, 0, sizeof(manufacturer));
+	memset(type, 0, sizeof(type));
+	memset(model, 0, sizeof(model));
+	memset(version, 0, sizeof(version));
+	memset(authorization, 0, sizeof(authorization));
+
+	sysinfo = fopen(SYSINFO, "r");
+	if (sysinfo == NULL)
+		return -1;
+
+	while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+		if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
+			line2 = line + strlen(SYSINFO_MANU);
+
+			while ((cp = strtok_r(line2, "\n ", &line2))) {
+				mfsize += scnprintf(manufacturer + mfsize,
+						    sizeof(manufacturer) - mfsize, "%s", cp);
+			}
+		}
+
+		if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) {
+			line2 = line + strlen(SYSINFO_TYPE);
+
+			while ((cp = strtok_r(line2, "\n ", &line2))) {
+				tpsize += scnprintf(type + tpsize,
+						    sizeof(type) - tpsize, "%s", cp);
+			}
+		}
+
+		if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) {
+			line2 = line + strlen(SYSINFO_MODEL);
+
+			while ((cp = strtok_r(line2, "\n ", &line2))) {
+				mdsize += scnprintf(model + mdsize, sizeof(model) - mdsize,
+						    "%s%s", model[0] ? "," : "", cp);
+			}
+			break;
+		}
+	}
+	fclose(sysinfo);
+
+	/* Missing manufacturer, type or model information should not happen */
+	if (!manufacturer[0] || !type[0] || !model[0])
+		return -1;
+
+	/*
+	 * Scan /proc/service_levels and return the CPU-MF counter facility
+	 * version number and authorization level.
+	 * Optional, does not exist on z/VM guests.
+	 */
+	sysinfo = fopen(SRVLVL, "r");
+	if (sysinfo == NULL)
+		goto skip_sysinfo;
+	while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+		if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF)))
+			continue;
+
+		line2 = line + strlen(SRVLVL_CPUMF);
+		while ((cp = strtok_r(line2, "\n ", &line2))) {
+			if (!strncmp(cp, SRVLVL_VERSION,
+				     strlen(SRVLVL_VERSION))) {
+				char *sep = strchr(cp, '=');
+
+				vssize += scnprintf(version + vssize,
+						    sizeof(version) - vssize, "%s", sep + 1);
+			}
+			if (!strncmp(cp, SRVLVL_AUTHORIZATION,
+				     strlen(SRVLVL_AUTHORIZATION))) {
+				char *sep = strchr(cp, '=');
+
+				atsize += scnprintf(authorization + atsize,
+						    sizeof(authorization) - atsize, "%s", sep + 1);
+			}
+		}
+	}
+	fclose(sysinfo);
+
+skip_sysinfo:
+	free(line);
+
+	if (version[0] && authorization[0] )
+		nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s",
+				  manufacturer, type, model, version,
+				  authorization);
+	else
+		nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
+				  model);
+	return (nbytes >= sz) ? -1 : 0;
+}
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	char *buf = malloc(128);
+
+	if (buf && get_cpuid(buf, 128) < 0)
+		zfree(&buf);
+	return buf;
+}
diff --git a/arch/s390/util/kvm-stat.c b/arch/s390/util/kvm-stat.c
new file mode 100644
index 0000000..d233e2e
--- /dev/null
+++ b/arch/s390/util/kvm-stat.c
@@ -0,0 +1,112 @@
+/*
+ * Arch specific functions for perf kvm stat.
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <errno.h>
+#include "../../util/kvm-stat.h"
+#include <asm/sie.h>
+
+define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
+define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
+define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes);
+define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
+define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
+
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
+static void event_icpt_insn_get_key(struct perf_evsel *evsel,
+				    struct perf_sample *sample,
+				    struct event_key *key)
+{
+	unsigned long insn;
+
+	insn = perf_evsel__intval(evsel, sample, "instruction");
+	key->key = icpt_insn_decoder(insn);
+	key->exit_reasons = sie_icpt_insn_codes;
+}
+
+static void event_sigp_get_key(struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       struct event_key *key)
+{
+	key->key = perf_evsel__intval(evsel, sample, "order_code");
+	key->exit_reasons = sie_sigp_order_codes;
+}
+
+static void event_diag_get_key(struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       struct event_key *key)
+{
+	key->key = perf_evsel__intval(evsel, sample, "code");
+	key->exit_reasons = sie_diagnose_codes;
+}
+
+static void event_icpt_prog_get_key(struct perf_evsel *evsel,
+				    struct perf_sample *sample,
+				    struct event_key *key)
+{
+	key->key = perf_evsel__intval(evsel, sample, "code");
+	key->exit_reasons = sie_icpt_prog_codes;
+}
+
+static struct child_event_ops child_events[] = {
+	{ .name = "kvm:kvm_s390_intercept_instruction",
+	  .get_key = event_icpt_insn_get_key },
+	{ .name = "kvm:kvm_s390_handle_sigp",
+	  .get_key = event_sigp_get_key },
+	{ .name = "kvm:kvm_s390_handle_diag",
+	  .get_key = event_diag_get_key },
+	{ .name = "kvm:kvm_s390_intercept_prog",
+	  .get_key = event_icpt_prog_get_key },
+	{ NULL, NULL },
+};
+
+static struct kvm_events_ops exit_events = {
+	.is_begin_event = exit_event_begin,
+	.is_end_event = exit_event_end,
+	.child_ops = child_events,
+	.decode_key = exit_event_decode_key,
+	.name = "VM-EXIT"
+};
+
+const char *kvm_events_tp[] = {
+	"kvm:kvm_s390_sie_enter",
+	"kvm:kvm_s390_sie_exit",
+	"kvm:kvm_s390_intercept_instruction",
+	"kvm:kvm_s390_handle_sigp",
+	"kvm:kvm_s390_handle_diag",
+	"kvm:kvm_s390_intercept_prog",
+	NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+	{ .name = "vmexit", .ops = &exit_events },
+	{ NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+	"Wait state",
+	NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
+{
+	if (strstr(cpuid, "IBM/S390")) {
+		kvm->exit_reasons = sie_exit_reasons;
+		kvm->exit_reasons_isa = "SIE";
+	} else
+		return -ENOTSUP;
+
+	return 0;
+}
diff --git a/arch/s390/util/machine.c b/arch/s390/util/machine.c
new file mode 100644
index 0000000..0b20540
--- /dev/null
+++ b/arch/s390/util/machine.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "machine.h"
+#include "api/fs/fs.h"
+
+int arch__fix_module_text_start(u64 *start, const char *name)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, PATH_MAX, "module/%.*s/sections/.text",
+				(int)strlen(name) - 2, name + 1);
+
+	if (sysfs__read_ull(path, (unsigned long long *)start) < 0)
+		return -1;
+
+	return 0;
+}
diff --git a/arch/s390/util/unwind-libdw.c b/arch/s390/util/unwind-libdw.c
new file mode 100644
index 0000000..387c698
--- /dev/null
+++ b/arch/s390/util/unwind-libdw.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+#include "dwarf-regs-table.h"
+
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[ARRAY_SIZE(s390_dwarf_regs)];
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_S390_##r);	\
+	val;							\
+})
+	/*
+	 * For DWARF register mapping details,
+	 * see also perf/arch/s390/include/dwarf-regs-table.h
+	 */
+	dwarf_regs[0]  = REG(R0);
+	dwarf_regs[1]  = REG(R1);
+	dwarf_regs[2]  = REG(R2);
+	dwarf_regs[3]  = REG(R3);
+	dwarf_regs[4]  = REG(R4);
+	dwarf_regs[5]  = REG(R5);
+	dwarf_regs[6]  = REG(R6);
+	dwarf_regs[7]  = REG(R7);
+	dwarf_regs[8]  = REG(R8);
+	dwarf_regs[9]  = REG(R9);
+	dwarf_regs[10] = REG(R10);
+	dwarf_regs[11] = REG(R11);
+	dwarf_regs[12] = REG(R12);
+	dwarf_regs[13] = REG(R13);
+	dwarf_regs[14] = REG(R14);
+	dwarf_regs[15] = REG(R15);
+
+	dwarf_regs[16] = REG(FP0);
+	dwarf_regs[17] = REG(FP2);
+	dwarf_regs[18] = REG(FP4);
+	dwarf_regs[19] = REG(FP6);
+	dwarf_regs[20] = REG(FP1);
+	dwarf_regs[21] = REG(FP3);
+	dwarf_regs[22] = REG(FP5);
+	dwarf_regs[23] = REG(FP7);
+	dwarf_regs[24] = REG(FP8);
+	dwarf_regs[25] = REG(FP10);
+	dwarf_regs[26] = REG(FP12);
+	dwarf_regs[27] = REG(FP14);
+	dwarf_regs[28] = REG(FP9);
+	dwarf_regs[29] = REG(FP11);
+	dwarf_regs[30] = REG(FP13);
+	dwarf_regs[31] = REG(FP15);
+
+	dwarf_regs[64] = REG(MASK);
+	dwarf_regs[65] = REG(PC);
+
+	dwfl_thread_state_register_pc(thread, dwarf_regs[65]);
+	return dwfl_thread_state_registers(thread, 0, 32, dwarf_regs);
+}
diff --git a/arch/sh/Build b/arch/sh/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/arch/sh/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
new file mode 100644
index 0000000..7fbca17
--- /dev/null
+++ b/arch/sh/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/arch/sh/include/dwarf-regs-table.h b/arch/sh/include/dwarf-regs-table.h
new file mode 100644
index 0000000..900e696
--- /dev/null
+++ b/arch/sh/include/dwarf-regs-table.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+const char * const sh_regstr_tbl[] = {
+	"r0",
+	"r1",
+	"r2",
+	"r3",
+	"r4",
+	"r5",
+	"r6",
+	"r7",
+	"r8",
+	"r9",
+	"r10",
+	"r11",
+	"r12",
+	"r13",
+	"r14",
+	"r15",
+	"pc",
+	"pr",
+};
+
+#endif
diff --git a/arch/sh/util/Build b/arch/sh/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/arch/sh/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/arch/sh/util/dwarf-regs.c b/arch/sh/util/dwarf-regs.c
new file mode 100644
index 0000000..f8dfa89
--- /dev/null
+++ b/arch/sh/util/dwarf-regs.c
@@ -0,0 +1,55 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+/*
+ * Generic dwarf analysis helpers
+ */
+
+#define SH_MAX_REGS 18
+const char *sh_regs_table[SH_MAX_REGS] = {
+	"r0",
+	"r1",
+	"r2",
+	"r3",
+	"r4",
+	"r5",
+	"r6",
+	"r7",
+	"r8",
+	"r9",
+	"r10",
+	"r11",
+	"r12",
+	"r13",
+	"r14",
+	"r15",
+	"pc",
+	"pr",
+};
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_arch_regstr(unsigned int n)
+{
+	return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL;
+}
diff --git a/arch/sparc/Build b/arch/sparc/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/arch/sparc/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
new file mode 100644
index 0000000..7fbca17
--- /dev/null
+++ b/arch/sparc/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/arch/sparc/include/dwarf-regs-table.h b/arch/sparc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..35ede84
--- /dev/null
+++ b/arch/sparc/include/dwarf-regs-table.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const sparc_regstr_tbl[] = {
+	"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+	"%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+	"%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+	"%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+	"%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+	"%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+	"%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+	"%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+	"%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+	"%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+	"%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+	"%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+#endif
diff --git a/arch/sparc/util/Build b/arch/sparc/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/arch/sparc/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/arch/sparc/util/dwarf-regs.c b/arch/sparc/util/dwarf-regs.c
new file mode 100644
index 0000000..b704fdb
--- /dev/null
+++ b/arch/sparc/util/dwarf-regs.c
@@ -0,0 +1,43 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define SPARC_MAX_REGS	96
+
+const char *sparc_regs_table[SPARC_MAX_REGS] = {
+	"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+	"%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+	"%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+	"%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+	"%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+	"%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+	"%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+	"%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+	"%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+	"%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+	"%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+	"%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n:	the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+	return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
+}
diff --git a/arch/x86/Build b/arch/x86/Build
new file mode 100644
index 0000000..db52fa2
--- /dev/null
+++ b/arch/x86/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-y += tests/
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
new file mode 100644
index 0000000..1a38e78
--- /dev/null
+++ b/arch/x86/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
+
+###
+# Syscall table generation
+#
+
+out    := $(OUTPUT)arch/x86/include/generated/asm
+header := $(out)/syscalls_64.c
+sys    := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sys)/syscall_64.tbl $(systbl)
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
+        || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
+
+clean::
+	$(call QUIET_CLEAN, x86) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/arch/x86/annotate/instructions.c b/arch/x86/annotate/instructions.c
new file mode 100644
index 0000000..44f5aba
--- /dev/null
+++ b/arch/x86/annotate/instructions.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+static struct ins x86__instructions[] = {
+	{ .name = "adc",	.ops = &mov_ops,  },
+	{ .name = "adcb",	.ops = &mov_ops,  },
+	{ .name = "adcl",	.ops = &mov_ops,  },
+	{ .name = "add",	.ops = &mov_ops,  },
+	{ .name = "addl",	.ops = &mov_ops,  },
+	{ .name = "addq",	.ops = &mov_ops,  },
+	{ .name = "addsd",	.ops = &mov_ops,  },
+	{ .name = "addw",	.ops = &mov_ops,  },
+	{ .name = "and",	.ops = &mov_ops,  },
+	{ .name = "andb",	.ops = &mov_ops,  },
+	{ .name = "andl",	.ops = &mov_ops,  },
+	{ .name = "andpd",	.ops = &mov_ops,  },
+	{ .name = "andps",	.ops = &mov_ops,  },
+	{ .name = "andq",	.ops = &mov_ops,  },
+	{ .name = "andw",	.ops = &mov_ops,  },
+	{ .name = "bsr",	.ops = &mov_ops,  },
+	{ .name = "bt",		.ops = &mov_ops,  },
+	{ .name = "btr",	.ops = &mov_ops,  },
+	{ .name = "bts",	.ops = &mov_ops,  },
+	{ .name = "btsq",	.ops = &mov_ops,  },
+	{ .name = "call",	.ops = &call_ops, },
+	{ .name = "callq",	.ops = &call_ops, },
+	{ .name = "cmovbe",	.ops = &mov_ops,  },
+	{ .name = "cmove",	.ops = &mov_ops,  },
+	{ .name = "cmovae",	.ops = &mov_ops,  },
+	{ .name = "cmp",	.ops = &mov_ops,  },
+	{ .name = "cmpb",	.ops = &mov_ops,  },
+	{ .name = "cmpl",	.ops = &mov_ops,  },
+	{ .name = "cmpq",	.ops = &mov_ops,  },
+	{ .name = "cmpw",	.ops = &mov_ops,  },
+	{ .name = "cmpxch",	.ops = &mov_ops,  },
+	{ .name = "cmpxchg",	.ops = &mov_ops,  },
+	{ .name = "cs",		.ops = &mov_ops,  },
+	{ .name = "dec",	.ops = &dec_ops,  },
+	{ .name = "decl",	.ops = &dec_ops,  },
+	{ .name = "divsd",	.ops = &mov_ops,  },
+	{ .name = "divss",	.ops = &mov_ops,  },
+	{ .name = "gs",		.ops = &mov_ops,  },
+	{ .name = "imul",	.ops = &mov_ops,  },
+	{ .name = "inc",	.ops = &dec_ops,  },
+	{ .name = "incl",	.ops = &dec_ops,  },
+	{ .name = "ja",		.ops = &jump_ops, },
+	{ .name = "jae",	.ops = &jump_ops, },
+	{ .name = "jb",		.ops = &jump_ops, },
+	{ .name = "jbe",	.ops = &jump_ops, },
+	{ .name = "jc",		.ops = &jump_ops, },
+	{ .name = "jcxz",	.ops = &jump_ops, },
+	{ .name = "je",		.ops = &jump_ops, },
+	{ .name = "jecxz",	.ops = &jump_ops, },
+	{ .name = "jg",		.ops = &jump_ops, },
+	{ .name = "jge",	.ops = &jump_ops, },
+	{ .name = "jl",		.ops = &jump_ops, },
+	{ .name = "jle",	.ops = &jump_ops, },
+	{ .name = "jmp",	.ops = &jump_ops, },
+	{ .name = "jmpq",	.ops = &jump_ops, },
+	{ .name = "jna",	.ops = &jump_ops, },
+	{ .name = "jnae",	.ops = &jump_ops, },
+	{ .name = "jnb",	.ops = &jump_ops, },
+	{ .name = "jnbe",	.ops = &jump_ops, },
+	{ .name = "jnc",	.ops = &jump_ops, },
+	{ .name = "jne",	.ops = &jump_ops, },
+	{ .name = "jng",	.ops = &jump_ops, },
+	{ .name = "jnge",	.ops = &jump_ops, },
+	{ .name = "jnl",	.ops = &jump_ops, },
+	{ .name = "jnle",	.ops = &jump_ops, },
+	{ .name = "jno",	.ops = &jump_ops, },
+	{ .name = "jnp",	.ops = &jump_ops, },
+	{ .name = "jns",	.ops = &jump_ops, },
+	{ .name = "jnz",	.ops = &jump_ops, },
+	{ .name = "jo",		.ops = &jump_ops, },
+	{ .name = "jp",		.ops = &jump_ops, },
+	{ .name = "jpe",	.ops = &jump_ops, },
+	{ .name = "jpo",	.ops = &jump_ops, },
+	{ .name = "jrcxz",	.ops = &jump_ops, },
+	{ .name = "js",		.ops = &jump_ops, },
+	{ .name = "jz",		.ops = &jump_ops, },
+	{ .name = "lea",	.ops = &mov_ops,  },
+	{ .name = "lock",	.ops = &lock_ops, },
+	{ .name = "mov",	.ops = &mov_ops,  },
+	{ .name = "movapd",	.ops = &mov_ops,  },
+	{ .name = "movaps",	.ops = &mov_ops,  },
+	{ .name = "movb",	.ops = &mov_ops,  },
+	{ .name = "movdqa",	.ops = &mov_ops,  },
+	{ .name = "movdqu",	.ops = &mov_ops,  },
+	{ .name = "movl",	.ops = &mov_ops,  },
+	{ .name = "movq",	.ops = &mov_ops,  },
+	{ .name = "movsd",	.ops = &mov_ops,  },
+	{ .name = "movslq",	.ops = &mov_ops,  },
+	{ .name = "movss",	.ops = &mov_ops,  },
+	{ .name = "movupd",	.ops = &mov_ops,  },
+	{ .name = "movups",	.ops = &mov_ops,  },
+	{ .name = "movw",	.ops = &mov_ops,  },
+	{ .name = "movzbl",	.ops = &mov_ops,  },
+	{ .name = "movzwl",	.ops = &mov_ops,  },
+	{ .name = "mulsd",	.ops = &mov_ops,  },
+	{ .name = "mulss",	.ops = &mov_ops,  },
+	{ .name = "nop",	.ops = &nop_ops,  },
+	{ .name = "nopl",	.ops = &nop_ops,  },
+	{ .name = "nopw",	.ops = &nop_ops,  },
+	{ .name = "or",		.ops = &mov_ops,  },
+	{ .name = "orb",	.ops = &mov_ops,  },
+	{ .name = "orl",	.ops = &mov_ops,  },
+	{ .name = "orps",	.ops = &mov_ops,  },
+	{ .name = "orq",	.ops = &mov_ops,  },
+	{ .name = "pand",	.ops = &mov_ops,  },
+	{ .name = "paddq",	.ops = &mov_ops,  },
+	{ .name = "pcmpeqb",	.ops = &mov_ops,  },
+	{ .name = "por",	.ops = &mov_ops,  },
+	{ .name = "rclb",	.ops = &mov_ops,  },
+	{ .name = "rcll",	.ops = &mov_ops,  },
+	{ .name = "retq",	.ops = &ret_ops,  },
+	{ .name = "sbb",	.ops = &mov_ops,  },
+	{ .name = "sbbl",	.ops = &mov_ops,  },
+	{ .name = "sete",	.ops = &mov_ops,  },
+	{ .name = "sub",	.ops = &mov_ops,  },
+	{ .name = "subl",	.ops = &mov_ops,  },
+	{ .name = "subq",	.ops = &mov_ops,  },
+	{ .name = "subsd",	.ops = &mov_ops,  },
+	{ .name = "subw",	.ops = &mov_ops,  },
+	{ .name = "test",	.ops = &mov_ops,  },
+	{ .name = "testb",	.ops = &mov_ops,  },
+	{ .name = "testl",	.ops = &mov_ops,  },
+	{ .name = "ucomisd",	.ops = &mov_ops,  },
+	{ .name = "ucomiss",	.ops = &mov_ops,  },
+	{ .name = "vaddsd",	.ops = &mov_ops,  },
+	{ .name = "vandpd",	.ops = &mov_ops,  },
+	{ .name = "vmovdqa",	.ops = &mov_ops,  },
+	{ .name = "vmovq",	.ops = &mov_ops,  },
+	{ .name = "vmovsd",	.ops = &mov_ops,  },
+	{ .name = "vmulsd",	.ops = &mov_ops,  },
+	{ .name = "vorpd",	.ops = &mov_ops,  },
+	{ .name = "vsubsd",	.ops = &mov_ops,  },
+	{ .name = "vucomisd",	.ops = &mov_ops,  },
+	{ .name = "xadd",	.ops = &mov_ops,  },
+	{ .name = "xbeginl",	.ops = &jump_ops, },
+	{ .name = "xbeginq",	.ops = &jump_ops, },
+	{ .name = "xchg",	.ops = &mov_ops,  },
+	{ .name = "xor",	.ops = &mov_ops, },
+	{ .name = "xorb",	.ops = &mov_ops, },
+	{ .name = "xorpd",	.ops = &mov_ops, },
+	{ .name = "xorps",	.ops = &mov_ops, },
+};
+
+static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+			      const char *ins2)
+{
+	if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
+		return false;
+
+	if (arch->model == 0x1e) {
+		/* Nehalem */
+		if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+		     strstr(ins1, "test")) {
+			return true;
+		}
+	} else {
+		/* Newer platform */
+		if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+		     strstr(ins1, "test") ||
+		     strstr(ins1, "add") ||
+		     strstr(ins1, "sub") ||
+		     strstr(ins1, "and") ||
+		     strstr(ins1, "inc") ||
+		     strstr(ins1, "dec")) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static int x86__cpuid_parse(struct arch *arch, char *cpuid)
+{
+	unsigned int family, model, stepping;
+	int ret;
+
+	/*
+	 * cpuid = "GenuineIntel,family,model,stepping"
+	 */
+	ret = sscanf(cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
+	if (ret == 3) {
+		arch->family = family;
+		arch->model = model;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int x86__annotate_init(struct arch *arch, char *cpuid)
+{
+	int err = 0;
+
+	if (arch->initialized)
+		return 0;
+
+	if (cpuid)
+		err = x86__cpuid_parse(arch, cpuid);
+
+	arch->initialized = true;
+	return err;
+}
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
new file mode 100644
index 0000000..4dfe426
--- /dev/null
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -0,0 +1,386 @@
+#
+# 64-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
+#
+# The abi is "common", "64" or "x32" for this file.
+#
+0	common	read			__x64_sys_read
+1	common	write			__x64_sys_write
+2	common	open			__x64_sys_open
+3	common	close			__x64_sys_close
+4	common	stat			__x64_sys_newstat
+5	common	fstat			__x64_sys_newfstat
+6	common	lstat			__x64_sys_newlstat
+7	common	poll			__x64_sys_poll
+8	common	lseek			__x64_sys_lseek
+9	common	mmap			__x64_sys_mmap
+10	common	mprotect		__x64_sys_mprotect
+11	common	munmap			__x64_sys_munmap
+12	common	brk			__x64_sys_brk
+13	64	rt_sigaction		__x64_sys_rt_sigaction
+14	common	rt_sigprocmask		__x64_sys_rt_sigprocmask
+15	64	rt_sigreturn		__x64_sys_rt_sigreturn/ptregs
+16	64	ioctl			__x64_sys_ioctl
+17	common	pread64			__x64_sys_pread64
+18	common	pwrite64		__x64_sys_pwrite64
+19	64	readv			__x64_sys_readv
+20	64	writev			__x64_sys_writev
+21	common	access			__x64_sys_access
+22	common	pipe			__x64_sys_pipe
+23	common	select			__x64_sys_select
+24	common	sched_yield		__x64_sys_sched_yield
+25	common	mremap			__x64_sys_mremap
+26	common	msync			__x64_sys_msync
+27	common	mincore			__x64_sys_mincore
+28	common	madvise			__x64_sys_madvise
+29	common	shmget			__x64_sys_shmget
+30	common	shmat			__x64_sys_shmat
+31	common	shmctl			__x64_sys_shmctl
+32	common	dup			__x64_sys_dup
+33	common	dup2			__x64_sys_dup2
+34	common	pause			__x64_sys_pause
+35	common	nanosleep		__x64_sys_nanosleep
+36	common	getitimer		__x64_sys_getitimer
+37	common	alarm			__x64_sys_alarm
+38	common	setitimer		__x64_sys_setitimer
+39	common	getpid			__x64_sys_getpid
+40	common	sendfile		__x64_sys_sendfile64
+41	common	socket			__x64_sys_socket
+42	common	connect			__x64_sys_connect
+43	common	accept			__x64_sys_accept
+44	common	sendto			__x64_sys_sendto
+45	64	recvfrom		__x64_sys_recvfrom
+46	64	sendmsg			__x64_sys_sendmsg
+47	64	recvmsg			__x64_sys_recvmsg
+48	common	shutdown		__x64_sys_shutdown
+49	common	bind			__x64_sys_bind
+50	common	listen			__x64_sys_listen
+51	common	getsockname		__x64_sys_getsockname
+52	common	getpeername		__x64_sys_getpeername
+53	common	socketpair		__x64_sys_socketpair
+54	64	setsockopt		__x64_sys_setsockopt
+55	64	getsockopt		__x64_sys_getsockopt
+56	common	clone			__x64_sys_clone/ptregs
+57	common	fork			__x64_sys_fork/ptregs
+58	common	vfork			__x64_sys_vfork/ptregs
+59	64	execve			__x64_sys_execve/ptregs
+60	common	exit			__x64_sys_exit
+61	common	wait4			__x64_sys_wait4
+62	common	kill			__x64_sys_kill
+63	common	uname			__x64_sys_newuname
+64	common	semget			__x64_sys_semget
+65	common	semop			__x64_sys_semop
+66	common	semctl			__x64_sys_semctl
+67	common	shmdt			__x64_sys_shmdt
+68	common	msgget			__x64_sys_msgget
+69	common	msgsnd			__x64_sys_msgsnd
+70	common	msgrcv			__x64_sys_msgrcv
+71	common	msgctl			__x64_sys_msgctl
+72	common	fcntl			__x64_sys_fcntl
+73	common	flock			__x64_sys_flock
+74	common	fsync			__x64_sys_fsync
+75	common	fdatasync		__x64_sys_fdatasync
+76	common	truncate		__x64_sys_truncate
+77	common	ftruncate		__x64_sys_ftruncate
+78	common	getdents		__x64_sys_getdents
+79	common	getcwd			__x64_sys_getcwd
+80	common	chdir			__x64_sys_chdir
+81	common	fchdir			__x64_sys_fchdir
+82	common	rename			__x64_sys_rename
+83	common	mkdir			__x64_sys_mkdir
+84	common	rmdir			__x64_sys_rmdir
+85	common	creat			__x64_sys_creat
+86	common	link			__x64_sys_link
+87	common	unlink			__x64_sys_unlink
+88	common	symlink			__x64_sys_symlink
+89	common	readlink		__x64_sys_readlink
+90	common	chmod			__x64_sys_chmod
+91	common	fchmod			__x64_sys_fchmod
+92	common	chown			__x64_sys_chown
+93	common	fchown			__x64_sys_fchown
+94	common	lchown			__x64_sys_lchown
+95	common	umask			__x64_sys_umask
+96	common	gettimeofday		__x64_sys_gettimeofday
+97	common	getrlimit		__x64_sys_getrlimit
+98	common	getrusage		__x64_sys_getrusage
+99	common	sysinfo			__x64_sys_sysinfo
+100	common	times			__x64_sys_times
+101	64	ptrace			__x64_sys_ptrace
+102	common	getuid			__x64_sys_getuid
+103	common	syslog			__x64_sys_syslog
+104	common	getgid			__x64_sys_getgid
+105	common	setuid			__x64_sys_setuid
+106	common	setgid			__x64_sys_setgid
+107	common	geteuid			__x64_sys_geteuid
+108	common	getegid			__x64_sys_getegid
+109	common	setpgid			__x64_sys_setpgid
+110	common	getppid			__x64_sys_getppid
+111	common	getpgrp			__x64_sys_getpgrp
+112	common	setsid			__x64_sys_setsid
+113	common	setreuid		__x64_sys_setreuid
+114	common	setregid		__x64_sys_setregid
+115	common	getgroups		__x64_sys_getgroups
+116	common	setgroups		__x64_sys_setgroups
+117	common	setresuid		__x64_sys_setresuid
+118	common	getresuid		__x64_sys_getresuid
+119	common	setresgid		__x64_sys_setresgid
+120	common	getresgid		__x64_sys_getresgid
+121	common	getpgid			__x64_sys_getpgid
+122	common	setfsuid		__x64_sys_setfsuid
+123	common	setfsgid		__x64_sys_setfsgid
+124	common	getsid			__x64_sys_getsid
+125	common	capget			__x64_sys_capget
+126	common	capset			__x64_sys_capset
+127	64	rt_sigpending		__x64_sys_rt_sigpending
+128	64	rt_sigtimedwait		__x64_sys_rt_sigtimedwait
+129	64	rt_sigqueueinfo		__x64_sys_rt_sigqueueinfo
+130	common	rt_sigsuspend		__x64_sys_rt_sigsuspend
+131	64	sigaltstack		__x64_sys_sigaltstack
+132	common	utime			__x64_sys_utime
+133	common	mknod			__x64_sys_mknod
+134	64	uselib
+135	common	personality		__x64_sys_personality
+136	common	ustat			__x64_sys_ustat
+137	common	statfs			__x64_sys_statfs
+138	common	fstatfs			__x64_sys_fstatfs
+139	common	sysfs			__x64_sys_sysfs
+140	common	getpriority		__x64_sys_getpriority
+141	common	setpriority		__x64_sys_setpriority
+142	common	sched_setparam		__x64_sys_sched_setparam
+143	common	sched_getparam		__x64_sys_sched_getparam
+144	common	sched_setscheduler	__x64_sys_sched_setscheduler
+145	common	sched_getscheduler	__x64_sys_sched_getscheduler
+146	common	sched_get_priority_max	__x64_sys_sched_get_priority_max
+147	common	sched_get_priority_min	__x64_sys_sched_get_priority_min
+148	common	sched_rr_get_interval	__x64_sys_sched_rr_get_interval
+149	common	mlock			__x64_sys_mlock
+150	common	munlock			__x64_sys_munlock
+151	common	mlockall		__x64_sys_mlockall
+152	common	munlockall		__x64_sys_munlockall
+153	common	vhangup			__x64_sys_vhangup
+154	common	modify_ldt		__x64_sys_modify_ldt
+155	common	pivot_root		__x64_sys_pivot_root
+156	64	_sysctl			__x64_sys_sysctl
+157	common	prctl			__x64_sys_prctl
+158	common	arch_prctl		__x64_sys_arch_prctl
+159	common	adjtimex		__x64_sys_adjtimex
+160	common	setrlimit		__x64_sys_setrlimit
+161	common	chroot			__x64_sys_chroot
+162	common	sync			__x64_sys_sync
+163	common	acct			__x64_sys_acct
+164	common	settimeofday		__x64_sys_settimeofday
+165	common	mount			__x64_sys_mount
+166	common	umount2			__x64_sys_umount
+167	common	swapon			__x64_sys_swapon
+168	common	swapoff			__x64_sys_swapoff
+169	common	reboot			__x64_sys_reboot
+170	common	sethostname		__x64_sys_sethostname
+171	common	setdomainname		__x64_sys_setdomainname
+172	common	iopl			__x64_sys_iopl/ptregs
+173	common	ioperm			__x64_sys_ioperm
+174	64	create_module
+175	common	init_module		__x64_sys_init_module
+176	common	delete_module		__x64_sys_delete_module
+177	64	get_kernel_syms
+178	64	query_module
+179	common	quotactl		__x64_sys_quotactl
+180	64	nfsservctl
+181	common	getpmsg
+182	common	putpmsg
+183	common	afs_syscall
+184	common	tuxcall
+185	common	security
+186	common	gettid			__x64_sys_gettid
+187	common	readahead		__x64_sys_readahead
+188	common	setxattr		__x64_sys_setxattr
+189	common	lsetxattr		__x64_sys_lsetxattr
+190	common	fsetxattr		__x64_sys_fsetxattr
+191	common	getxattr		__x64_sys_getxattr
+192	common	lgetxattr		__x64_sys_lgetxattr
+193	common	fgetxattr		__x64_sys_fgetxattr
+194	common	listxattr		__x64_sys_listxattr
+195	common	llistxattr		__x64_sys_llistxattr
+196	common	flistxattr		__x64_sys_flistxattr
+197	common	removexattr		__x64_sys_removexattr
+198	common	lremovexattr		__x64_sys_lremovexattr
+199	common	fremovexattr		__x64_sys_fremovexattr
+200	common	tkill			__x64_sys_tkill
+201	common	time			__x64_sys_time
+202	common	futex			__x64_sys_futex
+203	common	sched_setaffinity	__x64_sys_sched_setaffinity
+204	common	sched_getaffinity	__x64_sys_sched_getaffinity
+205	64	set_thread_area
+206	64	io_setup		__x64_sys_io_setup
+207	common	io_destroy		__x64_sys_io_destroy
+208	common	io_getevents		__x64_sys_io_getevents
+209	64	io_submit		__x64_sys_io_submit
+210	common	io_cancel		__x64_sys_io_cancel
+211	64	get_thread_area
+212	common	lookup_dcookie		__x64_sys_lookup_dcookie
+213	common	epoll_create		__x64_sys_epoll_create
+214	64	epoll_ctl_old
+215	64	epoll_wait_old
+216	common	remap_file_pages	__x64_sys_remap_file_pages
+217	common	getdents64		__x64_sys_getdents64
+218	common	set_tid_address		__x64_sys_set_tid_address
+219	common	restart_syscall		__x64_sys_restart_syscall
+220	common	semtimedop		__x64_sys_semtimedop
+221	common	fadvise64		__x64_sys_fadvise64
+222	64	timer_create		__x64_sys_timer_create
+223	common	timer_settime		__x64_sys_timer_settime
+224	common	timer_gettime		__x64_sys_timer_gettime
+225	common	timer_getoverrun	__x64_sys_timer_getoverrun
+226	common	timer_delete		__x64_sys_timer_delete
+227	common	clock_settime		__x64_sys_clock_settime
+228	common	clock_gettime		__x64_sys_clock_gettime
+229	common	clock_getres		__x64_sys_clock_getres
+230	common	clock_nanosleep		__x64_sys_clock_nanosleep
+231	common	exit_group		__x64_sys_exit_group
+232	common	epoll_wait		__x64_sys_epoll_wait
+233	common	epoll_ctl		__x64_sys_epoll_ctl
+234	common	tgkill			__x64_sys_tgkill
+235	common	utimes			__x64_sys_utimes
+236	64	vserver
+237	common	mbind			__x64_sys_mbind
+238	common	set_mempolicy		__x64_sys_set_mempolicy
+239	common	get_mempolicy		__x64_sys_get_mempolicy
+240	common	mq_open			__x64_sys_mq_open
+241	common	mq_unlink		__x64_sys_mq_unlink
+242	common	mq_timedsend		__x64_sys_mq_timedsend
+243	common	mq_timedreceive		__x64_sys_mq_timedreceive
+244	64	mq_notify		__x64_sys_mq_notify
+245	common	mq_getsetattr		__x64_sys_mq_getsetattr
+246	64	kexec_load		__x64_sys_kexec_load
+247	64	waitid			__x64_sys_waitid
+248	common	add_key			__x64_sys_add_key
+249	common	request_key		__x64_sys_request_key
+250	common	keyctl			__x64_sys_keyctl
+251	common	ioprio_set		__x64_sys_ioprio_set
+252	common	ioprio_get		__x64_sys_ioprio_get
+253	common	inotify_init		__x64_sys_inotify_init
+254	common	inotify_add_watch	__x64_sys_inotify_add_watch
+255	common	inotify_rm_watch	__x64_sys_inotify_rm_watch
+256	common	migrate_pages		__x64_sys_migrate_pages
+257	common	openat			__x64_sys_openat
+258	common	mkdirat			__x64_sys_mkdirat
+259	common	mknodat			__x64_sys_mknodat
+260	common	fchownat		__x64_sys_fchownat
+261	common	futimesat		__x64_sys_futimesat
+262	common	newfstatat		__x64_sys_newfstatat
+263	common	unlinkat		__x64_sys_unlinkat
+264	common	renameat		__x64_sys_renameat
+265	common	linkat			__x64_sys_linkat
+266	common	symlinkat		__x64_sys_symlinkat
+267	common	readlinkat		__x64_sys_readlinkat
+268	common	fchmodat		__x64_sys_fchmodat
+269	common	faccessat		__x64_sys_faccessat
+270	common	pselect6		__x64_sys_pselect6
+271	common	ppoll			__x64_sys_ppoll
+272	common	unshare			__x64_sys_unshare
+273	64	set_robust_list		__x64_sys_set_robust_list
+274	64	get_robust_list		__x64_sys_get_robust_list
+275	common	splice			__x64_sys_splice
+276	common	tee			__x64_sys_tee
+277	common	sync_file_range		__x64_sys_sync_file_range
+278	64	vmsplice		__x64_sys_vmsplice
+279	64	move_pages		__x64_sys_move_pages
+280	common	utimensat		__x64_sys_utimensat
+281	common	epoll_pwait		__x64_sys_epoll_pwait
+282	common	signalfd		__x64_sys_signalfd
+283	common	timerfd_create		__x64_sys_timerfd_create
+284	common	eventfd			__x64_sys_eventfd
+285	common	fallocate		__x64_sys_fallocate
+286	common	timerfd_settime		__x64_sys_timerfd_settime
+287	common	timerfd_gettime		__x64_sys_timerfd_gettime
+288	common	accept4			__x64_sys_accept4
+289	common	signalfd4		__x64_sys_signalfd4
+290	common	eventfd2		__x64_sys_eventfd2
+291	common	epoll_create1		__x64_sys_epoll_create1
+292	common	dup3			__x64_sys_dup3
+293	common	pipe2			__x64_sys_pipe2
+294	common	inotify_init1		__x64_sys_inotify_init1
+295	64	preadv			__x64_sys_preadv
+296	64	pwritev			__x64_sys_pwritev
+297	64	rt_tgsigqueueinfo	__x64_sys_rt_tgsigqueueinfo
+298	common	perf_event_open		__x64_sys_perf_event_open
+299	64	recvmmsg		__x64_sys_recvmmsg
+300	common	fanotify_init		__x64_sys_fanotify_init
+301	common	fanotify_mark		__x64_sys_fanotify_mark
+302	common	prlimit64		__x64_sys_prlimit64
+303	common	name_to_handle_at	__x64_sys_name_to_handle_at
+304	common	open_by_handle_at	__x64_sys_open_by_handle_at
+305	common	clock_adjtime		__x64_sys_clock_adjtime
+306	common	syncfs			__x64_sys_syncfs
+307	64	sendmmsg		__x64_sys_sendmmsg
+308	common	setns			__x64_sys_setns
+309	common	getcpu			__x64_sys_getcpu
+310	64	process_vm_readv	__x64_sys_process_vm_readv
+311	64	process_vm_writev	__x64_sys_process_vm_writev
+312	common	kcmp			__x64_sys_kcmp
+313	common	finit_module		__x64_sys_finit_module
+314	common	sched_setattr		__x64_sys_sched_setattr
+315	common	sched_getattr		__x64_sys_sched_getattr
+316	common	renameat2		__x64_sys_renameat2
+317	common	seccomp			__x64_sys_seccomp
+318	common	getrandom		__x64_sys_getrandom
+319	common	memfd_create		__x64_sys_memfd_create
+320	common	kexec_file_load		__x64_sys_kexec_file_load
+321	common	bpf			__x64_sys_bpf
+322	64	execveat		__x64_sys_execveat/ptregs
+323	common	userfaultfd		__x64_sys_userfaultfd
+324	common	membarrier		__x64_sys_membarrier
+325	common	mlock2			__x64_sys_mlock2
+326	common	copy_file_range		__x64_sys_copy_file_range
+327	64	preadv2			__x64_sys_preadv2
+328	64	pwritev2		__x64_sys_pwritev2
+329	common	pkey_mprotect		__x64_sys_pkey_mprotect
+330	common	pkey_alloc		__x64_sys_pkey_alloc
+331	common	pkey_free		__x64_sys_pkey_free
+332	common	statx			__x64_sys_statx
+
+#
+# x32-specific system call numbers start at 512 to avoid cache impact
+# for native 64-bit operation. The __x32_compat_sys stubs are created
+# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
+# is defined.
+#
+512	x32	rt_sigaction		__x32_compat_sys_rt_sigaction
+513	x32	rt_sigreturn		sys32_x32_rt_sigreturn
+514	x32	ioctl			__x32_compat_sys_ioctl
+515	x32	readv			__x32_compat_sys_readv
+516	x32	writev			__x32_compat_sys_writev
+517	x32	recvfrom		__x32_compat_sys_recvfrom
+518	x32	sendmsg			__x32_compat_sys_sendmsg
+519	x32	recvmsg			__x32_compat_sys_recvmsg
+520	x32	execve			__x32_compat_sys_execve/ptregs
+521	x32	ptrace			__x32_compat_sys_ptrace
+522	x32	rt_sigpending		__x32_compat_sys_rt_sigpending
+523	x32	rt_sigtimedwait		__x32_compat_sys_rt_sigtimedwait
+524	x32	rt_sigqueueinfo		__x32_compat_sys_rt_sigqueueinfo
+525	x32	sigaltstack		__x32_compat_sys_sigaltstack
+526	x32	timer_create		__x32_compat_sys_timer_create
+527	x32	mq_notify		__x32_compat_sys_mq_notify
+528	x32	kexec_load		__x32_compat_sys_kexec_load
+529	x32	waitid			__x32_compat_sys_waitid
+530	x32	set_robust_list		__x32_compat_sys_set_robust_list
+531	x32	get_robust_list		__x32_compat_sys_get_robust_list
+532	x32	vmsplice		__x32_compat_sys_vmsplice
+533	x32	move_pages		__x32_compat_sys_move_pages
+534	x32	preadv			__x32_compat_sys_preadv64
+535	x32	pwritev			__x32_compat_sys_pwritev64
+536	x32	rt_tgsigqueueinfo	__x32_compat_sys_rt_tgsigqueueinfo
+537	x32	recvmmsg		__x32_compat_sys_recvmmsg
+538	x32	sendmmsg		__x32_compat_sys_sendmmsg
+539	x32	process_vm_readv	__x32_compat_sys_process_vm_readv
+540	x32	process_vm_writev	__x32_compat_sys_process_vm_writev
+541	x32	setsockopt		__x32_compat_sys_setsockopt
+542	x32	getsockopt		__x32_compat_sys_getsockopt
+543	x32	io_setup		__x32_compat_sys_io_setup
+544	x32	io_submit		__x32_compat_sys_io_submit
+545	x32	execveat		__x32_compat_sys_execveat/ptregs
+546	x32	preadv2			__x32_compat_sys_preadv64v2
+547	x32	pwritev2		__x32_compat_sys_pwritev64v2
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
new file mode 100755
index 0000000..029a72c
--- /dev/null
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+arch="$2"
+
+syscall_macro() {
+    nr="$1"
+    name="$2"
+
+    echo "	[$nr] = \"$name\","
+}
+
+emit() {
+    nr="$1"
+    entry="$2"
+
+    syscall_macro "$nr" "$entry"
+}
+
+echo "static const char *syscalltbl_${arch}[] = {"
+
+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
+grep '^[0-9]' "$in" | sort -n > $sorted_table
+
+max_nr=0
+while read nr abi name entry compat; do
+    if [ $nr -ge 512 ] ; then # discard compat sycalls
+        break
+    fi
+
+    emit "$nr" "$name"
+    max_nr=$nr
+done < $sorted_table
+
+rm -f $sorted_table
+
+echo "};"
+
+echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
diff --git a/arch/x86/include/arch-tests.h b/arch/x86/include/arch-tests.h
new file mode 100644
index 0000000..c1bd979
--- /dev/null
+++ b/arch/x86/include/arch-tests.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#include <linux/compiler.h>
+struct test;
+
+/* Tests */
+int test__rdpmc(struct test *test __maybe_unused, int subtest);
+int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
+int test__insn_x86(struct test *test __maybe_unused, int subtest);
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/arch/x86/include/dwarf-regs-table.h b/arch/x86/include/dwarf-regs-table.h
new file mode 100644
index 0000000..b9bd5dc
--- /dev/null
+++ b/arch/x86/include/dwarf-regs-table.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const x86_32_regstr_tbl[] = {
+	"%ax", "%cx", "%dx", "%bx", "$stack",/* Stack address instead of %sp */
+	"%bp", "%si", "%di",
+};
+
+static const char * const x86_64_regstr_tbl[] = {
+	"%ax", "%dx", "%cx", "%bx", "%si", "%di",
+	"%bp", "%sp", "%r8", "%r9", "%r10", "%r11",
+	"%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/arch/x86/include/perf_regs.h b/arch/x86/include/perf_regs.h
new file mode 100644
index 0000000..7f6d538
--- /dev/null
+++ b/arch/x86/include/perf_regs.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#ifndef HAVE_ARCH_X86_64_SUPPORT
+#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_X86_32_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+#else
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
+		       (1ULL << PERF_REG_X86_ES) | \
+		       (1ULL << PERF_REG_X86_FS) | \
+		       (1ULL << PERF_REG_X86_GS))
+#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
+#define PERF_REGS_MAX PERF_REG_X86_64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+#endif
+#define PERF_REG_IP PERF_REG_X86_IP
+#define PERF_REG_SP PERF_REG_X86_SP
+
+static inline const char *perf_reg_name(int id)
+{
+	switch (id) {
+	case PERF_REG_X86_AX:
+		return "AX";
+	case PERF_REG_X86_BX:
+		return "BX";
+	case PERF_REG_X86_CX:
+		return "CX";
+	case PERF_REG_X86_DX:
+		return "DX";
+	case PERF_REG_X86_SI:
+		return "SI";
+	case PERF_REG_X86_DI:
+		return "DI";
+	case PERF_REG_X86_BP:
+		return "BP";
+	case PERF_REG_X86_SP:
+		return "SP";
+	case PERF_REG_X86_IP:
+		return "IP";
+	case PERF_REG_X86_FLAGS:
+		return "FLAGS";
+	case PERF_REG_X86_CS:
+		return "CS";
+	case PERF_REG_X86_SS:
+		return "SS";
+	case PERF_REG_X86_DS:
+		return "DS";
+	case PERF_REG_X86_ES:
+		return "ES";
+	case PERF_REG_X86_FS:
+		return "FS";
+	case PERF_REG_X86_GS:
+		return "GS";
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+	case PERF_REG_X86_R8:
+		return "R8";
+	case PERF_REG_X86_R9:
+		return "R9";
+	case PERF_REG_X86_R10:
+		return "R10";
+	case PERF_REG_X86_R11:
+		return "R11";
+	case PERF_REG_X86_R12:
+		return "R12";
+	case PERF_REG_X86_R13:
+		return "R13";
+	case PERF_REG_X86_R14:
+		return "R14";
+	case PERF_REG_X86_R15:
+		return "R15";
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
+	default:
+		return NULL;
+	}
+
+	return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/arch/x86/tests/Build b/arch/x86/tests/Build
new file mode 100644
index 0000000..8e2c5a3
--- /dev/null
+++ b/arch/x86/tests/Build
@@ -0,0 +1,7 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
+libperf-y += rdpmc.o
+libperf-y += perf-time-to-tsc.o
+libperf-$(CONFIG_AUXTRACE) += insn-x86.o
diff --git a/arch/x86/tests/arch-tests.c b/arch/x86/tests/arch-tests.c
new file mode 100644
index 0000000..cc1802f
--- /dev/null
+++ b/arch/x86/tests/arch-tests.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+	{
+		.desc = "x86 rdpmc",
+		.func = test__rdpmc,
+	},
+	{
+		.desc = "Convert perf time to TSC",
+		.func = test__perf_time_to_tsc,
+	},
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "DWARF unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+#ifdef HAVE_AUXTRACE_SUPPORT
+	{
+		.desc = "x86 instruction decoder - new instructions",
+		.func = test__insn_x86,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+
+};
diff --git a/arch/x86/tests/dwarf-unwind.c b/arch/x86/tests/dwarf-unwind.c
new file mode 100644
index 0000000..95036c7
--- /dev/null
+++ b/arch/x86/tests/dwarf-unwind.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+			 struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_X86_SP];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/arch/x86/tests/gen-insn-x86-dat.awk b/arch/x86/tests/gen-insn-x86-dat.awk
new file mode 100644
index 0000000..a214548
--- /dev/null
+++ b/arch/x86/tests/gen-insn-x86-dat.awk
@@ -0,0 +1,75 @@
+#!/bin/awk -f
+# gen-insn-x86-dat.awk: script to convert data for the insn-x86 test
+# Copyright (c) 2015, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+BEGIN {
+	print "/*"
+	print " * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk"
+	print " * from insn-x86-dat-src.c for inclusion by insn-x86.c"
+	print " * Do not change this code."
+	print "*/\n"
+	op = ""
+	branch = ""
+	rel = 0
+	going = 0
+}
+
+/ Start here / {
+	going = 1
+}
+
+/ Stop here / {
+	going = 0
+}
+
+/^\s*[0-9a-fA-F]+\:/ {
+	if (going) {
+		colon_pos = index($0, ":")
+		useful_line = substr($0, colon_pos + 1)
+		first_pos = match(useful_line, "[0-9a-fA-F]")
+		useful_line = substr(useful_line, first_pos)
+		gsub("\t", "\\t", useful_line)
+		printf "{{"
+		len = 0
+		for (i = 2; i <= NF; i++) {
+			if (match($i, "^[0-9a-fA-F][0-9a-fA-F]$")) {
+				printf "0x%s, ", $i
+				len += 1
+			} else {
+				break
+			}
+		}
+		printf "}, %d, %s, \"%s\", \"%s\",", len, rel, op, branch
+		printf "\n\"%s\",},\n", useful_line
+		op = ""
+		branch = ""
+		rel = 0
+	}
+}
+
+/ Expecting: / {
+	expecting_str = " Expecting: "
+	expecting_len = length(expecting_str)
+	expecting_pos = index($0, expecting_str)
+	useful_line = substr($0, expecting_pos + expecting_len)
+	for (i = 1; i <= NF; i++) {
+		if ($i == "Expecting:") {
+			i++
+			op = $i
+			i++
+			branch = $i
+			i++
+			rel = $i
+			break
+		}
+	}
+}
diff --git a/arch/x86/tests/gen-insn-x86-dat.sh b/arch/x86/tests/gen-insn-x86-dat.sh
new file mode 100755
index 0000000..2d4ef94
--- /dev/null
+++ b/arch/x86/tests/gen-insn-x86-dat.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# gen-insn-x86-dat: generate data for the insn-x86 test
+# Copyright (c) 2015, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+set -e
+
+if [ "$(uname -m)" != "x86_64" ]; then
+	echo "ERROR: This script only works on x86_64"
+	exit 1
+fi
+
+cd $(dirname $0)
+
+trap 'echo "Might need a more recent version of binutils"' EXIT
+
+echo "Compiling insn-x86-dat-src.c to 64-bit object"
+
+gcc -g -c insn-x86-dat-src.c
+
+objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-64.c
+
+rm -f insn-x86-dat-src.o
+
+echo "Compiling insn-x86-dat-src.c to 32-bit object"
+
+gcc -g -c -m32 insn-x86-dat-src.c
+
+objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-32.c
+
+rm -f insn-x86-dat-src.o
+
+trap - EXIT
+
+echo "Done (use git diff to see the changes)"
diff --git a/arch/x86/tests/insn-x86-dat-32.c b/arch/x86/tests/insn-x86-dat-32.c
new file mode 100644
index 0000000..fab3c6d
--- /dev/null
+++ b/arch/x86/tests/insn-x86-dat-32.c
@@ -0,0 +1,1679 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk
+ * from insn-x86-dat-src.c for inclusion by insn-x86.c
+ * Do not change this code.
+*/
+
+{{0x0f, 0x31, }, 2, 0, "", "",
+"0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 81 78 56 34 12    \tbound  %eax,0x12345678(%ecx)",},
+{{0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 88 78 56 34 12    \tbound  %ecx,0x12345678(%eax)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12    \tbound  %edx,0x12345678(%eax)",},
+{{0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 98 78 56 34 12    \tbound  %ebx,0x12345678(%eax)",},
+{{0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a0 78 56 34 12    \tbound  %esp,0x12345678(%eax)",},
+{{0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a8 78 56 34 12    \tbound  %ebp,0x12345678(%eax)",},
+{{0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b0 78 56 34 12    \tbound  %esi,0x12345678(%eax)",},
+{{0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b8 78 56 34 12    \tbound  %edi,0x12345678(%eax)",},
+{{0x62, 0x08, }, 2, 0, "", "",
+"62 08                \tbound  %ecx,(%eax)",},
+{{0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 05 78 56 34 12    \tbound  %eax,0x12345678",},
+{{0x62, 0x14, 0x01, }, 3, 0, "", "",
+"62 14 01             \tbound  %edx,(%ecx,%eax,1)",},
+{{0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 14 05 78 56 34 12 \tbound  %edx,0x12345678(,%eax,1)",},
+{{0x62, 0x14, 0x08, }, 3, 0, "", "",
+"62 14 08             \tbound  %edx,(%eax,%ecx,1)",},
+{{0x62, 0x14, 0xc8, }, 3, 0, "", "",
+"62 14 c8             \tbound  %edx,(%eax,%ecx,8)",},
+{{0x62, 0x50, 0x12, }, 3, 0, "", "",
+"62 50 12             \tbound  %edx,0x12(%eax)",},
+{{0x62, 0x55, 0x12, }, 3, 0, "", "",
+"62 55 12             \tbound  %edx,0x12(%ebp)",},
+{{0x62, 0x54, 0x01, 0x12, }, 4, 0, "", "",
+"62 54 01 12          \tbound  %edx,0x12(%ecx,%eax,1)",},
+{{0x62, 0x54, 0x05, 0x12, }, 4, 0, "", "",
+"62 54 05 12          \tbound  %edx,0x12(%ebp,%eax,1)",},
+{{0x62, 0x54, 0x08, 0x12, }, 4, 0, "", "",
+"62 54 08 12          \tbound  %edx,0x12(%eax,%ecx,1)",},
+{{0x62, 0x54, 0xc8, 0x12, }, 4, 0, "", "",
+"62 54 c8 12          \tbound  %edx,0x12(%eax,%ecx,8)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12    \tbound  %edx,0x12345678(%eax)",},
+{{0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 95 78 56 34 12    \tbound  %edx,0x12345678(%ebp)",},
+{{0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 01 78 56 34 12 \tbound  %edx,0x12345678(%ecx,%eax,1)",},
+{{0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 05 78 56 34 12 \tbound  %edx,0x12345678(%ebp,%eax,1)",},
+{{0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 08 78 56 34 12 \tbound  %edx,0x12345678(%eax,%ecx,1)",},
+{{0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 c8 78 56 34 12 \tbound  %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 81 78 56 34 12 \tbound  %ax,0x12345678(%ecx)",},
+{{0x66, 0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 88 78 56 34 12 \tbound  %cx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound  %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 98 78 56 34 12 \tbound  %bx,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a0 78 56 34 12 \tbound  %sp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a8 78 56 34 12 \tbound  %bp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b0 78 56 34 12 \tbound  %si,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b8 78 56 34 12 \tbound  %di,0x12345678(%eax)",},
+{{0x66, 0x62, 0x08, }, 3, 0, "", "",
+"66 62 08             \tbound  %cx,(%eax)",},
+{{0x66, 0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 05 78 56 34 12 \tbound  %ax,0x12345678",},
+{{0x66, 0x62, 0x14, 0x01, }, 4, 0, "", "",
+"66 62 14 01          \tbound  %dx,(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 14 05 78 56 34 12 \tbound  %dx,0x12345678(,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x08, }, 4, 0, "", "",
+"66 62 14 08          \tbound  %dx,(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x14, 0xc8, }, 4, 0, "", "",
+"66 62 14 c8          \tbound  %dx,(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x50, 0x12, }, 4, 0, "", "",
+"66 62 50 12          \tbound  %dx,0x12(%eax)",},
+{{0x66, 0x62, 0x55, 0x12, }, 4, 0, "", "",
+"66 62 55 12          \tbound  %dx,0x12(%ebp)",},
+{{0x66, 0x62, 0x54, 0x01, 0x12, }, 5, 0, "", "",
+"66 62 54 01 12       \tbound  %dx,0x12(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x05, 0x12, }, 5, 0, "", "",
+"66 62 54 05 12       \tbound  %dx,0x12(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x08, 0x12, }, 5, 0, "", "",
+"66 62 54 08 12       \tbound  %dx,0x12(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x54, 0xc8, 0x12, }, 5, 0, "", "",
+"66 62 54 c8 12       \tbound  %dx,0x12(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound  %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 95 78 56 34 12 \tbound  %dx,0x12345678(%ebp)",},
+{{0x66, 0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 01 78 56 34 12 \tbound  %dx,0x12345678(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 05 78 56 34 12 \tbound  %dx,0x12345678(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 08 78 56 34 12 \tbound  %dx,0x12345678(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 c8 78 56 34 12 \tbound  %dx,0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x41, 0xd8, }, 3, 0, "", "",
+"0f 41 d8             \tcmovno %eax,%ebx",},
+{{0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%cx",},
+{{0x0f, 0x44, 0xd8, }, 3, 0, "", "",
+"0f 44 d8             \tcmove  %eax,%ebx",},
+{{0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 44 88 78 56 34 12 \tcmove  0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove  0x12345678(%eax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto   0x12345678(%eax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno  0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets   0x12345678(%eax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns  0x12345678(%eax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef          \tkandw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef       \tkandq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef          \tkandb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef       \tkandd  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef          \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef       \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef          \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef       \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7          \tknotw  %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7       \tknotq  %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7          \tknotb  %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7       \tknotd  %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef          \tkorw   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef       \tkorq   %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef          \tkorb   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef       \tkord   %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef          \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef       \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef          \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef       \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef          \tkxorw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef       \tkxorq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef          \tkxorb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef       \tkxord  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef          \tkaddw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef       \tkaddq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef          \tkaddb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef       \tkaddd  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef          \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef          \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef       \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee          \tkmovw  %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29          \tkmovw  (%ecx),%k5",},
+{{0xc5, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 90 ac c8 23 01 00 00 \tkmovw  0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29          \tkmovw  %k5,(%ecx)",},
+{{0xc5, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 91 ac c8 23 01 00 00 \tkmovw  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8          \tkmovw  %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed          \tkmovw  %ebp,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5          \tkmovw  %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed          \tkmovw  %k5,%ebp",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee       \tkmovq  %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29       \tkmovq  (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 90 ac c8 23 01 00 00 \tkmovq  0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29       \tkmovq  %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 91 ac c8 23 01 00 00 \tkmovq  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee          \tkmovb  %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29          \tkmovb  (%ecx),%k5",},
+{{0xc5, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 90 ac c8 23 01 00 00 \tkmovb  0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29          \tkmovb  %k5,(%ecx)",},
+{{0xc5, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 91 ac c8 23 01 00 00 \tkmovb  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8          \tkmovb  %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed          \tkmovb  %ebp,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5          \tkmovb  %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed          \tkmovb  %k5,%ebp",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee       \tkmovd  %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29       \tkmovd  (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 90 ac c8 23 01 00 00 \tkmovd  0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29       \tkmovd  %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 91 ac c8 23 01 00 00 \tkmovd  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8          \tkmovd  %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed          \tkmovd  %ebp,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5          \tkmovd  %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed          \tkmovd  %k5,%ebp",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee          \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee       \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee          \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee       \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee          \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee       \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee          \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee       \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12    \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b    \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12    \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b    \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5          \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 5b f5    \tvcvtqq2ps %zmm5,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5          \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5          \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4          \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 48 6f f5    \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 6f f5    \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4          \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 48 6f f5    \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 6f f5    \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 6f f5    \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 48 6f f5    \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3             \tvmread %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 78 f5    \tvcvttps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 78 f5    \tvcvttpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 78 c6    \tvcvttsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 78 c6    \tvcvttss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 78 f5    \tvcvttps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 78 f5    \tvcvttpd2uqq %zmm5,%zmm6",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8             \tvmwrite %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 79 f5    \tvcvtps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 79 f5    \tvcvtpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 79 c6    \tvcvtsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 79 c6    \tvcvtss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 79 f5    \tvcvtps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 79 f5    \tvcvtpd2uqq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f 7a f5    \tvcvtudq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 7a f5    \tvcvtuqq2pd %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 7a f5    \tvcvtudq2ps %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 4f 7a f5    \tvcvtuqq2ps %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7a f5    \tvcvttps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7a f5    \tvcvttpd2qq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0    \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0    \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7b f5    \tvcvtps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7b f5    \tvcvtpd2qq %zmm5,%zmm6",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fd 7f ee          \tvmovdqa %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7d 48 7f ee    \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fd 48 7f ee    \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fe 7f ee          \tvmovdqu %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7e 48 7f ee    \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fe 48 7f ee    \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7f 48 7f ee    \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 ff 48 7f ee    \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1             \tpand   %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1          \tpand   %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4          \tvpand  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 db f4    \tvpandd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 db f4    \tvpandq %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1             \tpandn  %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1          \tpandn  %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4          \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 df f4    \tvpandnd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 df f4    \tvpandnq %zmm4,%zmm5,%zmm6",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1          \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5          \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f e6 f5    \tvcvtdq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 e6 f5    \tvcvtqq2pd %zmm5,%zmm6",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1          \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4             \tpor    %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4          \tvpor   %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 eb f4    \tvpord  %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 eb f4    \tvporq  %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4             \tpxor   %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4          \tvpxor  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 ef f4    \tvpxord %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 ef f4    \tvpxorq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1       \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x10, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 10 f4    \tvpsrlvw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x10, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 10 ee    \tvpmovuswb %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x11, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 11 ee    \tvpmovusdb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x11, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 11 f4    \tvpsravw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x12, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 12 ee    \tvpmovusqb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x12, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 12 f4    \tvpsllvw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0xf2, 0x7d, 0x4f, 0x13, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 4f 13 f5    \tvcvtph2ps %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x13, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 13 ee    \tvpmovusdw %zmm5,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1       \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x14, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 14 ee    \tvpmovusqw %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 14 f4    \tvprorvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 14 f4    \tvprorvq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1       \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x15, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 15 ee    \tvpmovusqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 15 f4    \tvprolvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 15 f4    \tvprolvq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4       \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 16 d4    \tvpermps %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 16 d4    \tvpermpd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4       \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x19, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 19 f7    \tvbroadcastf32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21       \tvbroadcastf128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1a 31    \tvbroadcastf32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1a 31    \tvbroadcastf64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1b 31    \tvbroadcastf32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1b 31    \tvbroadcastf64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1f, 0xf4, }, 6, 0, "", "",
+"62 f2 fd 48 1f f4    \tvpabsq %zmm4,%zmm6",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec       \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x20, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 20 ee    \tvpmovswb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4       \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x21, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 21 ee    \tvpmovsdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4       \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x22, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 22 ee    \tvpmovsqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4       \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x23, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 23 ee    \tvpmovsdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4       \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x24, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 24 ee    \tvpmovsqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4       \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x25, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 25 ee    \tvpmovsqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 26 ed    \tvptestmb %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 26 ed    \tvptestmw %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 26 ec    \tvptestnmb %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 26 ec    \tvptestnmw %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 27 ed    \tvptestmd %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 27 ed    \tvptestmq %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 27 ec    \tvptestnmd %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 27 ec    \tvptestnmq %zmm4,%zmm5,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4       \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 28 f5    \tvpmovm2b %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 28 f5    \tvpmovm2w %k5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4       \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 29 ee    \tvpmovb2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 29 ee    \tvpmovw2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21       \tvmovntdqa (%ecx),%ymm4",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x2a, 0xce, }, 6, 0, "", "",
+"62 f2 fe 48 2a ce    \tvpbroadcastmb2q %k6,%zmm1",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31       \tvmaskmovps (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 2c f4    \tvscalefps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 2c f4    \tvscalefpd %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31       \tvmaskmovpd (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 2d f4    \tvscalefss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 2d f4    \tvscalefsd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4       \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x30, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 30 ee    \tvpmovwb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4       \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x31, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 31 ee    \tvpmovdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4       \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x32, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 32 ee    \tvpmovqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4       \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x33, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 33 ee    \tvpmovdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4       \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x34, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 34 ee    \tvpmovqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4       \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x35, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 35 ee    \tvpmovqd %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4       \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 36 d4    \tvpermd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 36 d4    \tvpermq %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4       \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 38 f5    \tvpmovm2d %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 38 f5    \tvpmovm2q %k5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9       \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 39 f4    \tvpminsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 39 f4    \tvpminsq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 39 ee    \tvpmovd2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 39 ee    \tvpmovq2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4       \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x3a, 0xf6, }, 6, 0, "", "",
+"62 f2 7e 48 3a f6    \tvpbroadcastmw2d %k6,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4       \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3b f4    \tvpminud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3b f4    \tvpminuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4       \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3d f4    \tvpmaxsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3d f4    \tvpmaxsq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4       \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3f f4    \tvpmaxud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3f f4    \tvpmaxuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4       \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 40 f4    \tvpmulld %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 40 f4    \tvpmullq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 42 f5    \tvgetexpps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 42 f5    \tvgetexppd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x43, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 43 f4    \tvgetexpss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xe5, 0x0f, 0x43, 0xe2, }, 6, 0, "", "",
+"62 f2 e5 0f 43 e2    \tvgetexpsd %xmm2,%xmm3,%xmm4{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 44 f5    \tvplzcntd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 44 f5    \tvplzcntq %zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4       \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 46 f4    \tvpsravd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 46 f4    \tvpsravq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4c f5    \tvrcp14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4c f5    \tvrcp14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4d f4    \tvrcp14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4d f4    \tvrcp14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4e f5    \tvrsqrt14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4e f5    \tvrsqrt14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4f f4    \tvrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4f f4    \tvrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4       \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x59, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 59 f7    \tvbroadcasti32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21       \tvbroadcasti128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5a 31    \tvbroadcasti32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5a 31    \tvbroadcasti64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5b 31    \tvbroadcasti32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5b 31    \tvbroadcasti64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 64 f4    \tvpblendmd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 64 f4    \tvpblendmq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 65 f4    \tvblendmps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 65 f4    \tvblendmpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 66 f4    \tvpblendmb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 66 f4    \tvpblendmw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 75 f4    \tvpermi2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 75 f4    \tvpermi2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 76 f4    \tvpermi2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 76 f4    \tvpermi2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 77 f4    \tvpermi2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 77 f4    \tvpermi2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7a, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7a d8    \tvpbroadcastb %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7b, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7b d8    \tvpbroadcastw %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7c, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7c d8    \tvpbroadcastd %eax,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7d f4    \tvpermt2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7d f4    \tvpermt2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7e f4    \tvpermt2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7e f4    \tvpermt2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7f f4    \tvpermt2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7f f4    \tvpermt2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x83, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 83 f4    \tvpmultishiftqb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 88 31    \tvexpandps (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 88 31    \tvexpandpd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 89 31    \tvpexpandd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 89 31    \tvpexpandq (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8a 31    \tvcompressps %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8a 31    \tvcompresspd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8b 31    \tvpcompressd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8b 31    \tvpcompressq %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x55, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 8d f4    \tvpermb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 8d f4    \tvpermw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 90 b4 fd 7b 00 00 00 \tvpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 90 b4 fd 7b 00 00 00 \tvpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 91 b4 fd 7b 00 00 00 \tvpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 91 b4 fd 7b 00 00 00 \tvpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a0 b4 fd 7b 00 00 00 \tvpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a0 b4 fd 7b 00 00 00 \tvpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a1 b4 fd 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x29, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 29 a1 b4 fd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a2 b4 fd 7b 00 00 00 \tvscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a2 b4 fd 7b 00 00 00 \tvscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a3 b4 fd 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a3 b4 fd 7b 00 00 00 \tvscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb4, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b4 f4    \tvpmadd52luq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb5, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b5 f4    \tvpmadd52huq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 c4 f5    \tvpconflictd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 c4 f5    \tvpconflictq %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 c8 fe    \tvexp2ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 c8 fe    \tvexp2pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 ca fe    \tvrcp28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 ca fe    \tvrcp28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cb fd    \tvrcp28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cb fd    \tvrcp28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 cc fe    \tvrsqrt28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 cc fe    \tvrsqrt28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cd fd    \tvrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cd fd    \tvrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 03 fd 12 \tvalignd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 03 fd 12 \tvalignq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05    \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x08, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 08 f5 12 \tvrndscaleps $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05    \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x09, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 09 f5 12 \tvrndscalepd $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05    \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0x55, 0x0f, 0x0a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 0f 0a f4 12 \tvrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05    \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0xd5, 0x0f, 0x0b, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 0f 0b f4 12 \tvrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05    \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 18 f4 12 \tvinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 18 f4 12 \tvinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05    \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 19 ee 12 \tvextractf32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 19 ee 12 \tvextractf64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 1a fd 12 \tvinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 1a fd 12 \tvinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 1b f7 12 \tvextractf32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 1b f7 12 \tvextractf64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1e ee 12 \tvpcmpud $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1e ee 12 \tvpcmpuq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1f ee 12 \tvpcmpd $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1f ee 12 \tvpcmpq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 23 fd 12 \tvshuff32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 23 fd 12 \tvshuff64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 25 fd 12 \tvpternlogd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 25 fd 12 \tvpternlogq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 26 fe 12 \tvgetmantps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 26 fe 12 \tvgetmantpd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 27 fd 12 \tvgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 27 fd 12 \tvgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05    \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 38 f4 12 \tvinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 38 f4 12 \tvinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05    \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 39 ee 12 \tvextracti32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 39 ee 12 \tvextracti64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 3a fd 12 \tvinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 3a fd 12 \tvinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 3b f7 12 \tvextracti32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 3b f7 12 \tvextracti64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3e ee 12 \tvpcmpub $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3e ee 12 \tvpcmpuw $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3f ee 12 \tvpcmpb $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3f ee 12 \tvpcmpw $0x12,%zmm6,%zmm7,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05    \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 43 fd 12 \tvshufi32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 43 fd 12 \tvshufi64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 50 fd 12 \tvrangeps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 50 fd 12 \tvrangepd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 51 fd 12 \tvrangess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 51 fd 12 \tvrangesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 54 fd 12 \tvfixupimmps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 54 fd 12 \tvfixupimmpd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 55 fd 12 \tvfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 55 fd 12 \tvfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 56 fe 12 \tvreduceps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 56 fe 12 \tvreducepd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 57 fd 12 \tvreducess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 57 fd 12 \tvreducesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 66 ef 12 \tvfpclassps $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 66 ef 12 \tvfpclasspd $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0x7d, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 08 67 ef 12 \tvfpclassss $0x12,%xmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 08 67 ef 12 \tvfpclasssd $0x12,%xmm7,%k5",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 c5 12 \tvprord $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 c5 12 \tvprorq $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 cd 12 \tvprold $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 cd 12 \tvprolq $0x12,%zmm5,%zmm6",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02          \tpsrad  $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05       \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x6d, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 6d 48 72 e6 05 \tvpsrad $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf1, 0xed, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 ed 48 72 e6 05 \tvpsraq $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x4f, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 4f 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xd5, 0xcf, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 cf 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6{%k7}{z}",},
+{{0x62, 0xf1, 0xd5, 0x18, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 18 58 f4    \tvaddpd {rn-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 58 58 f4    \tvaddpd {ru-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x38, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 38 58 f4    \tvaddpd {rd-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x78, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 78 58 f4    \tvaddpd {rz-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 48 58 31    \tvaddpd (%ecx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xb4, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 f1 d5 48 58 b4 c8 23 01 00 00 \tvaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 58 58 31    \tvaddpd (%ecx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 48 58 72 7f \tvaddpd 0x1fc0(%edx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 58 58 72 7f \tvaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4c, 0x58, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 4c 58 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5",},
+{{0x62, 0xf1, 0xe7, 0x0f, 0xc2, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 f1 e7 0f c2 ac c8 23 01 00 00 01 \tvcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}",},
+{{0x62, 0xf1, 0xd7, 0x1f, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 f1 d7 1f c2 ec 02 \tvcmplesd {sae},%xmm4,%xmm5,%k5{%k7}",},
+{{0x62, 0xf3, 0x5d, 0x0f, 0x27, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 f3 5d 0f 27 ac c8 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}",},
+{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f3 0f 1b 00          \tbndmk  (%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 05 78 56 34 12 \tbndmk  0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f3 0f 1b 18          \tbndmk  (%eax),%bnd3",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1b 04 01       \tbndmk  (%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 04 05 78 56 34 12 \tbndmk  0x12345678(,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1b 04 08       \tbndmk  (%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1b 04 c8       \tbndmk  (%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 40 12       \tbndmk  0x12(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 45 12       \tbndmk  0x12(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 01 12    \tbndmk  0x12(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 05 12    \tbndmk  0x12(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 08 12    \tbndmk  0x12(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 c8 12    \tbndmk  0x12(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 80 78 56 34 12 \tbndmk  0x12345678(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 85 78 56 34 12 \tbndmk  0x12345678(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 01 78 56 34 12 \tbndmk  0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 05 78 56 34 12 \tbndmk  0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 08 78 56 34 12 \tbndmk  0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 c8 78 56 34 12 \tbndmk  0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f3 0f 1a 00          \tbndcl  (%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 05 78 56 34 12 \tbndcl  0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f3 0f 1a 18          \tbndcl  (%eax),%bnd3",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1a 04 01       \tbndcl  (%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 04 05 78 56 34 12 \tbndcl  0x12345678(,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1a 04 08       \tbndcl  (%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1a 04 c8       \tbndcl  (%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 40 12       \tbndcl  0x12(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 45 12       \tbndcl  0x12(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 01 12    \tbndcl  0x12(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 05 12    \tbndcl  0x12(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 08 12    \tbndcl  0x12(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 c8 12    \tbndcl  0x12(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 80 78 56 34 12 \tbndcl  0x12345678(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 85 78 56 34 12 \tbndcl  0x12345678(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 01 78 56 34 12 \tbndcl  0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 05 78 56 34 12 \tbndcl  0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 08 78 56 34 12 \tbndcl  0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 c8 78 56 34 12 \tbndcl  0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f3 0f 1a c0          \tbndcl  %eax,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f2 0f 1a 00          \tbndcu  (%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 05 78 56 34 12 \tbndcu  0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f2 0f 1a 18          \tbndcu  (%eax),%bnd3",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1a 04 01       \tbndcu  (%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 04 05 78 56 34 12 \tbndcu  0x12345678(,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1a 04 08       \tbndcu  (%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1a 04 c8       \tbndcu  (%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 40 12       \tbndcu  0x12(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 45 12       \tbndcu  0x12(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 01 12    \tbndcu  0x12(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 05 12    \tbndcu  0x12(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 08 12    \tbndcu  0x12(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 c8 12    \tbndcu  0x12(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 80 78 56 34 12 \tbndcu  0x12345678(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 85 78 56 34 12 \tbndcu  0x12345678(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 01 78 56 34 12 \tbndcu  0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 05 78 56 34 12 \tbndcu  0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 08 78 56 34 12 \tbndcu  0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 c8 78 56 34 12 \tbndcu  0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f2 0f 1a c0          \tbndcu  %eax,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f2 0f 1b 00          \tbndcn  (%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 05 78 56 34 12 \tbndcn  0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f2 0f 1b 18          \tbndcn  (%eax),%bnd3",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1b 04 01       \tbndcn  (%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 04 05 78 56 34 12 \tbndcn  0x12345678(,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1b 04 08       \tbndcn  (%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1b 04 c8       \tbndcn  (%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 40 12       \tbndcn  0x12(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 45 12       \tbndcn  0x12(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 01 12    \tbndcn  0x12(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 05 12    \tbndcn  0x12(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 08 12    \tbndcn  0x12(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 c8 12    \tbndcn  0x12(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 80 78 56 34 12 \tbndcn  0x12345678(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 85 78 56 34 12 \tbndcn  0x12345678(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 01 78 56 34 12 \tbndcn  0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 05 78 56 34 12 \tbndcn  0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 08 78 56 34 12 \tbndcn  0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 c8 78 56 34 12 \tbndcn  0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "",
+"f2 0f 1b c0          \tbndcn  %eax,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"66 0f 1a 00          \tbndmov (%eax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 05 78 56 34 12 \tbndmov 0x12345678,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"66 0f 1a 18          \tbndmov (%eax),%bnd3",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1a 04 01       \tbndmov (%ecx,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1a 04 08       \tbndmov (%eax,%ecx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1a 04 c8       \tbndmov (%eax,%ecx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1a 40 12       \tbndmov 0x12(%eax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1a 45 12       \tbndmov 0x12(%ebp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 01 12    \tbndmov 0x12(%ecx,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 05 12    \tbndmov 0x12(%ebp,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 08 12    \tbndmov 0x12(%eax,%ecx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 c8 12    \tbndmov 0x12(%eax,%ecx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%eax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%ebp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,8),%bnd0",},
+{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"66 0f 1b 00          \tbndmov %bnd0,(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 05 78 56 34 12 \tbndmov %bnd0,0x12345678",},
+{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"66 0f 1b 18          \tbndmov %bnd3,(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1b 04 01       \tbndmov %bnd0,(%ecx,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1b 04 08       \tbndmov %bnd0,(%eax,%ecx,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1b 04 c8       \tbndmov %bnd0,(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1b 40 12       \tbndmov %bnd0,0x12(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1b 45 12       \tbndmov %bnd0,0x12(%ebp)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 01 12    \tbndmov %bnd0,0x12(%ecx,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 05 12    \tbndmov %bnd0,0x12(%ebp,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 08 12    \tbndmov %bnd0,0x12(%eax,%ecx,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 c8 12    \tbndmov %bnd0,0x12(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%ecx,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "",
+"66 0f 1a c8          \tbndmov %bnd0,%bnd1",},
+{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "",
+"66 0f 1a c1          \tbndmov %bnd1,%bnd0",},
+{{0x0f, 0x1a, 0x00, }, 3, 0, "", "",
+"0f 1a 00             \tbndldx (%eax),%bnd0",},
+{{0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 05 78 56 34 12 \tbndldx 0x12345678,%bnd0",},
+{{0x0f, 0x1a, 0x18, }, 3, 0, "", "",
+"0f 1a 18             \tbndldx (%eax),%bnd3",},
+{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1a 04 01          \tbndldx (%ecx,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1a 04 08          \tbndldx (%eax,%ecx,1),%bnd0",},
+{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1a 40 12          \tbndldx 0x12(%eax),%bnd0",},
+{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1a 45 12          \tbndldx 0x12(%ebp),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1a 44 01 12       \tbndldx 0x12(%ecx,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1a 44 05 12       \tbndldx 0x12(%ebp,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1a 44 08 12       \tbndldx 0x12(%eax,%ecx,1),%bnd0",},
+{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%eax),%bnd0",},
+{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%ebp),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0x0f, 0x1b, 0x00, }, 3, 0, "", "",
+"0f 1b 00             \tbndstx %bnd0,(%eax)",},
+{{0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 05 78 56 34 12 \tbndstx %bnd0,0x12345678",},
+{{0x0f, 0x1b, 0x18, }, 3, 0, "", "",
+"0f 1b 18             \tbndstx %bnd3,(%eax)",},
+{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1b 04 01          \tbndstx %bnd0,(%ecx,%eax,1)",},
+{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%eax,1)",},
+{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1b 04 08          \tbndstx %bnd0,(%eax,%ecx,1)",},
+{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1b 40 12          \tbndstx %bnd0,0x12(%eax)",},
+{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1b 45 12          \tbndstx %bnd0,0x12(%ebp)",},
+{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1b 44 01 12       \tbndstx %bnd0,0x12(%ecx,%eax,1)",},
+{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1b 44 05 12       \tbndstx %bnd0,0x12(%ebp,%eax,1)",},
+{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1b 44 08 12       \tbndstx %bnd0,0x12(%eax,%ecx,1)",},
+{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax)",},
+{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp)",},
+{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%ecx,%eax,1)",},
+{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp,%eax,1)",},
+{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",},
+{{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional",
+"f2 e8 fc ff ff ff    \tbnd call fce <main+0xfce>",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10             \tbnd call *(%eax)",},
+{{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
+"f2 c3                \tbnd ret ",},
+{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
+"f2 e9 fc ff ff ff    \tbnd jmp fd9 <main+0xfd9>",},
+{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
+"f2 e9 fc ff ff ff    \tbnd jmp fdf <main+0xfdf>",},
+{{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect",
+"f2 ff 21             \tbnd jmp *(%ecx)",},
+{{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional",
+"f2 0f 85 fc ff ff ff \tbnd jne fe9 <main+0xfe9>",},
+{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
+"0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
+"0f 3a cc d7 91       \tsha1rnds4 $0x91,%xmm7,%xmm2",},
+{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "",
+"0f 3a cc 00 91       \tsha1rnds4 $0x91,(%eax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "",
+"0f 3a cc 18 91       \tsha1rnds4 $0x91,(%eax),%xmm3",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 01 91    \tsha1rnds4 $0x91,(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 08 91    \tsha1rnds4 $0x91,(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 c8 91    \tsha1rnds4 $0x91,(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 40 12 91    \tsha1rnds4 $0x91,0x12(%eax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 45 12 91    \tsha1rnds4 $0x91,0x12(%ebp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "",
+"0f 38 c8 c1          \tsha1nexte %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "",
+"0f 38 c8 d7          \tsha1nexte %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "",
+"0f 38 c8 00          \tsha1nexte (%eax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 05 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "",
+"0f 38 c8 18          \tsha1nexte (%eax),%xmm3",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c8 04 01       \tsha1nexte (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c8 04 08       \tsha1nexte (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c8 04 c8       \tsha1nexte (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c8 40 12       \tsha1nexte 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c8 45 12       \tsha1nexte 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 01 12    \tsha1nexte 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 05 12    \tsha1nexte 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 08 12    \tsha1nexte 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 c8 12    \tsha1nexte 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "",
+"0f 38 c9 c1          \tsha1msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "",
+"0f 38 c9 d7          \tsha1msg1 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "",
+"0f 38 c9 00          \tsha1msg1 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 05 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "",
+"0f 38 c9 18          \tsha1msg1 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c9 04 01       \tsha1msg1 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c9 04 08       \tsha1msg1 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c9 04 c8       \tsha1msg1 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c9 40 12       \tsha1msg1 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c9 45 12       \tsha1msg1 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 01 12    \tsha1msg1 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 05 12    \tsha1msg1 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 08 12    \tsha1msg1 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 c8 12    \tsha1msg1 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "",
+"0f 38 ca c1          \tsha1msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "",
+"0f 38 ca d7          \tsha1msg2 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "",
+"0f 38 ca 00          \tsha1msg2 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 05 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "",
+"0f 38 ca 18          \tsha1msg2 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 ca 04 01       \tsha1msg2 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 ca 04 08       \tsha1msg2 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 ca 04 c8       \tsha1msg2 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 ca 40 12       \tsha1msg2 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 ca 45 12       \tsha1msg2 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 01 12    \tsha1msg2 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 05 12    \tsha1msg2 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 08 12    \tsha1msg2 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 c8 12    \tsha1msg2 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "",
+"0f 38 cb cc          \tsha256rnds2 %xmm0,%xmm4,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "",
+"0f 38 cb d7          \tsha256rnds2 %xmm0,%xmm7,%xmm2",},
+{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "",
+"0f 38 cb 08          \tsha256rnds2 %xmm0,(%eax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 0d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "",
+"0f 38 cb 18          \tsha256rnds2 %xmm0,(%eax),%xmm3",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "",
+"0f 38 cb 0c 01       \tsha256rnds2 %xmm0,(%ecx,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "",
+"0f 38 cb 0c 08       \tsha256rnds2 %xmm0,(%eax,%ecx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "",
+"0f 38 cb 0c c8       \tsha256rnds2 %xmm0,(%eax,%ecx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "",
+"0f 38 cb 48 12       \tsha256rnds2 %xmm0,0x12(%eax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "",
+"0f 38 cb 4d 12       \tsha256rnds2 %xmm0,0x12(%ebp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 01 12    \tsha256rnds2 %xmm0,0x12(%ecx,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 05 12    \tsha256rnds2 %xmm0,0x12(%ebp,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 08 12    \tsha256rnds2 %xmm0,0x12(%eax,%ecx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c c8 12    \tsha256rnds2 %xmm0,0x12(%eax,%ecx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ecx,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "",
+"0f 38 cc c1          \tsha256msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "",
+"0f 38 cc d7          \tsha256msg1 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "",
+"0f 38 cc 00          \tsha256msg1 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 05 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "",
+"0f 38 cc 18          \tsha256msg1 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cc 04 01       \tsha256msg1 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cc 04 08       \tsha256msg1 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cc 04 c8       \tsha256msg1 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cc 40 12       \tsha256msg1 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cc 45 12       \tsha256msg1 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 01 12    \tsha256msg1 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 05 12    \tsha256msg1 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 08 12    \tsha256msg1 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 c8 12    \tsha256msg1 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "",
+"0f 38 cd c1          \tsha256msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "",
+"0f 38 cd d7          \tsha256msg2 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "",
+"0f 38 cd 00          \tsha256msg2 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 05 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "",
+"0f 38 cd 18          \tsha256msg2 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cd 04 01       \tsha256msg2 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cd 04 08       \tsha256msg2 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cd 04 c8       \tsha256msg2 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cd 40 12       \tsha256msg2 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cd 45 12       \tsha256msg2 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 01 12    \tsha256msg2 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 05 12    \tsha256msg2 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 08 12    \tsha256msg2 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 c8 12    \tsha256msg2 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "",
+"66 0f ae 38          \tclflushopt (%eax)",},
+{{0x66, 0x0f, 0xae, 0x3d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f ae 3d 78 56 34 12 \tclflushopt 0x12345678",},
+{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0x38, }, 3, 0, "", "",
+"0f ae 38             \tclflush (%eax)",},
+{{0x0f, 0xae, 0xf8, }, 3, 0, "", "",
+"0f ae f8             \tsfence ",},
+{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"66 0f ae 30          \tclwb   (%eax)",},
+{{0x66, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f ae 35 78 56 34 12 \tclwb   0x12345678",},
+{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae b4 c8 78 56 34 12 \tclwb   0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0x30, }, 3, 0, "", "",
+"0f ae 30             \txsaveopt (%eax)",},
+{{0x0f, 0xae, 0xf0, }, 3, 0, "", "",
+"0f ae f0             \tmfence ",},
+{{0x0f, 0xc7, 0x20, }, 3, 0, "", "",
+"0f c7 20             \txsavec (%eax)",},
+{{0x0f, 0xc7, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f c7 25 78 56 34 12 \txsavec 0x12345678",},
+{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xc7, 0x28, }, 3, 0, "", "",
+"0f c7 28             \txsaves (%eax)",},
+{{0x0f, 0xc7, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f c7 2d 78 56 34 12 \txsaves 0x12345678",},
+{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xc7, 0x18, }, 3, 0, "", "",
+"0f c7 18             \txrstors (%eax)",},
+{{0x0f, 0xc7, 0x1d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
+{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
diff --git a/arch/x86/tests/insn-x86-dat-64.c b/arch/x86/tests/insn-x86-dat-64.c
new file mode 100644
index 0000000..c57f346
--- /dev/null
+++ b/arch/x86/tests/insn-x86-dat-64.c
@@ -0,0 +1,1729 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk
+ * from insn-x86-dat-src.c for inclusion by insn-x86.c
+ * Do not change this code.
+*/
+
+{{0x0f, 0x31, }, 2, 0, "", "",
+"0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x48, 0x0f, 0x41, 0xd8, }, 4, 0, "", "",
+"48 0f 41 d8          \tcmovno %rax,%rbx",},
+{{0x48, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%cx",},
+{{0x48, 0x0f, 0x44, 0xd8, }, 4, 0, "", "",
+"48 0f 44 d8          \tcmove  %rax,%rbx",},
+{{0x48, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 44 88 78 56 34 12 \tcmove  0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove  0x12345678(%rax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto   0x12345678(%rax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno  0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets   0x12345678(%rax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns  0x12345678(%rax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef          \tkandw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef       \tkandq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef          \tkandb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef       \tkandd  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef          \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef       \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef          \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef       \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7          \tknotw  %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7       \tknotq  %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7          \tknotb  %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7       \tknotd  %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef          \tkorw   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef       \tkorq   %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef          \tkorb   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef       \tkord   %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef          \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef       \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef          \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef       \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef          \tkxorw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef       \tkxorq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef          \tkxorb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef       \tkxord  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef          \tkaddw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef       \tkaddq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef          \tkaddb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef       \tkaddd  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef          \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef          \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef       \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee          \tkmovw  %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29          \tkmovw  (%rcx),%k5",},
+{{0xc4, 0xa1, 0x78, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 90 ac f0 23 01 00 00 \tkmovw  0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29          \tkmovw  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x78, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 91 ac f0 23 01 00 00 \tkmovw  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8          \tkmovw  %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed          \tkmovw  %ebp,%k5",},
+{{0xc4, 0xc1, 0x78, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 78 92 ed       \tkmovw  %r13d,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5          \tkmovw  %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed          \tkmovw  %k5,%ebp",},
+{{0xc5, 0x78, 0x93, 0xed, }, 4, 0, "", "",
+"c5 78 93 ed          \tkmovw  %k5,%r13d",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee       \tkmovq  %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29       \tkmovq  (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf8, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 90 ac f0 23 01 00 00 \tkmovq  0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29       \tkmovq  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf8, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 91 ac f0 23 01 00 00 \tkmovq  %k5,0x123(%rax,%r14,8)",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xe8, }, 5, 0, "", "",
+"c4 e1 fb 92 e8       \tkmovq  %rax,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 92 ed       \tkmovq  %rbp,%k5",},
+{{0xc4, 0xc1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 fb 92 ed       \tkmovq  %r13,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xc5, }, 5, 0, "", "",
+"c4 e1 fb 93 c5       \tkmovq  %k5,%rax",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 93 ed       \tkmovq  %k5,%rbp",},
+{{0xc4, 0x61, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 61 fb 93 ed       \tkmovq  %k5,%r13",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee          \tkmovb  %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29          \tkmovb  (%rcx),%k5",},
+{{0xc4, 0xa1, 0x79, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 90 ac f0 23 01 00 00 \tkmovb  0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29          \tkmovb  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x79, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 91 ac f0 23 01 00 00 \tkmovb  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8          \tkmovb  %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed          \tkmovb  %ebp,%k5",},
+{{0xc4, 0xc1, 0x79, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 79 92 ed       \tkmovb  %r13d,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5          \tkmovb  %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed          \tkmovb  %k5,%ebp",},
+{{0xc5, 0x79, 0x93, 0xed, }, 4, 0, "", "",
+"c5 79 93 ed          \tkmovb  %k5,%r13d",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee       \tkmovd  %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29       \tkmovd  (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf9, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 90 ac f0 23 01 00 00 \tkmovd  0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29       \tkmovd  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf9, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 91 ac f0 23 01 00 00 \tkmovd  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8          \tkmovd  %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed          \tkmovd  %ebp,%k5",},
+{{0xc4, 0xc1, 0x7b, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 7b 92 ed       \tkmovd  %r13d,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5          \tkmovd  %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed          \tkmovd  %k5,%ebp",},
+{{0xc5, 0x7b, 0x93, 0xed, }, 4, 0, "", "",
+"c5 7b 93 ed          \tkmovd  %k5,%r13d",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee          \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee       \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee          \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee       \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee          \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee       \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee          \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee       \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12    \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b    \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12    \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b    \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5          \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 5b f5    \tvcvtqq2ps %zmm29,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5          \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5          \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4          \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 7d 48 6f d1    \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fd 48 6f d1    \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4          \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7e 48 6f f5    \tvmovdqu32 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xfe, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 6f d1    \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 6f f5    \tvmovdqu8 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 ff 48 6f d1    \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3             \tvmread %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x78, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 78 d1    \tvcvttps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 78 f5    \tvcvttpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 78 c6    \tvcvttsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 78 c6    \tvcvttss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x78, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 78 d5    \tvcvttps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 78 f5    \tvcvttpd2uqq %zmm29,%zmm30",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8             \tvmwrite %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x79, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 79 d1    \tvcvtps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 79 f5    \tvcvtpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 79 c6    \tvcvtsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 79 c6    \tvcvtss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x79, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 79 d5    \tvcvtps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 79 f5    \tvcvtpd2uqq %zmm29,%zmm30",},
+{{0x62, 0x61, 0x7e, 0x4f, 0x7a, 0xed, }, 6, 0, "", "",
+"62 61 7e 4f 7a ed    \tvcvtudq2pd %ymm5,%zmm29{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 7a d1    \tvcvtuqq2pd %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 7a f5    \tvcvtudq2ps %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 ff 4f 7a d1    \tvcvtuqq2ps %zmm25,%ymm26{%k7}",},
+{{0x62, 0x01, 0x7d, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 7d 4f 7a d1    \tvcvttps2qq %ymm25,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7a f5    \tvcvttpd2qq %zmm29,%zmm30",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0    \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0    \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x7b, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 7b d5    \tvcvtps2qq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7b f5    \tvcvtpd2qq %zmm29,%zmm30",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4             \tmovq   %mm0,%mm4",},
+{{0xc5, 0x7d, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7d 7f c6          \tvmovdqa %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7d 48 7f ca    \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fd 48 7f ca    \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0x7e, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7e 7f c6          \tvmovdqu %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7e 48 7f ca    \tvmovdqu32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fe 48 7f ca    \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x61, 0x7f, 0x48, 0x7f, 0x31, }, 6, 0, "", "",
+"62 61 7f 48 7f 31    \tvmovdqu8 %zmm30,(%rcx)",},
+{{0x62, 0x01, 0xff, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 ff 48 7f ca    \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1             \tpand   %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1          \tpand   %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4          \tvpand  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 db d0    \tvpandd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 db d0    \tvpandq %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1             \tpandn  %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1          \tpandn  %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4          \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 df d0    \tvpandnd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 df d0    \tvpandnq %zmm24,%zmm25,%zmm26",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1          \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5          \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0x61, 0x7e, 0x4f, 0xe6, 0xd5, }, 6, 0, "", "",
+"62 61 7e 4f e6 d5    \tvcvtdq2pd %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0xe6, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 e6 d1    \tvcvtqq2pd %zmm25,%zmm26",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1          \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4             \tpor    %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4          \tvpor   %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 eb d0    \tvpord  %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 eb d0    \tvporq  %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4             \tpxor   %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4          \tvpxor  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 ef d0    \tvpxord %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 ef d0    \tvpxorq %zmm24,%zmm25,%zmm26",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1       \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x02, 0x9d, 0x40, 0x10, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 10 eb    \tvpsrlvw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x10, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 10 e6    \tvpmovuswb %zmm28,%ymm6{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x11, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 11 e6    \tvpmovusdb %zmm28,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x11, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 11 eb    \tvpsravw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x12, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 12 de    \tvpmovusqb %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x12, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 12 eb    \tvpsllvw %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x62, 0x7d, 0x4f, 0x13, 0xdd, }, 6, 0, "", "",
+"62 62 7d 4f 13 dd    \tvcvtph2ps %ymm5,%zmm27{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x13, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 13 de    \tvpmovusdw %zmm27,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1       \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x14, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 14 de    \tvpmovusqw %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 14 eb    \tvprorvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 14 eb    \tvprorvq %zmm27,%zmm28,%zmm29",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1       \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x15, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 15 de    \tvpmovusqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 15 eb    \tvprolvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 15 eb    \tvprolvq %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4       \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 16 f0    \tvpermps %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 16 f0    \tvpermpd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4       \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x19, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 19 d3    \tvbroadcastf32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21       \tvbroadcastf128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 1a 11    \tvbroadcastf32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1a 11    \tvbroadcastf64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1b, 0x19, }, 6, 0, "", "",
+"62 62 7d 48 1b 19    \tvbroadcastf32x8 (%rcx),%zmm27",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1b 11    \tvbroadcastf64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x1f, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 1f e3    \tvpabsq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec       \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x20, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 20 de    \tvpmovswb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4       \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x21, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 21 de    \tvpmovsdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4       \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x22, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 22 de    \tvpmovsqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4       \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x23, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 23 de    \tvpmovsdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4       \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x24, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 24 de    \tvpmovsqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4       \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x25, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 25 de    \tvpmovsqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x92, 0x1d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 26 eb    \tvptestmb %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 26 eb    \tvptestmw %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 26 40 26 ea    \tvptestnmb %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 26 ea    \tvptestnmw %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0x1d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 27 eb    \tvptestmd %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 27 eb    \tvptestmq %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 26 40 27 ea    \tvptestnmd %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 27 ea    \tvptestnmq %zmm26,%zmm27,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4       \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 28 e5    \tvpmovm2b %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 28 e5    \tvpmovm2w %k5,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4       \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x92, 0x7e, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 29 ec    \tvpmovb2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 29 ec    \tvpmovw2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21       \tvmovntdqa (%rcx),%ymm4",},
+{{0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6, }, 6, 0, "", "",
+"62 62 fe 48 2a f6    \tvpbroadcastmb2q %k6,%zmm30",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31       \tvmaskmovps (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 2c d0    \tvscalefps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 2c d0    \tvscalefpd %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31       \tvmaskmovpd (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 2d d0    \tvscalefss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 2d d0    \tvscalefsd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4       \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x30, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 30 de    \tvpmovwb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4       \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x31, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 31 de    \tvpmovdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4       \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x32, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 32 de    \tvpmovqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4       \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x33, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 33 de    \tvpmovdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4       \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x34, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 34 de    \tvpmovqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4       \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x35, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 35 de    \tvpmovqd %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4       \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 36 f0    \tvpermd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 36 f0    \tvpermq %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4       \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 38 e5    \tvpmovm2d %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 38 e5    \tvpmovm2q %k5,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9       \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0x02, 0x35, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 39 d0    \tvpminsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 39 d0    \tvpminsq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x92, 0x7e, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 39 ec    \tvpmovd2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 39 ec    \tvpmovq2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4       \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x3a, 0xe6, }, 6, 0, "", "",
+"62 62 7e 48 3a e6    \tvpbroadcastmw2d %k6,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4       \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3b d0    \tvpminud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3b d0    \tvpminuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4       \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3d d0    \tvpmaxsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3d d0    \tvpmaxsq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4       \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3f d0    \tvpmaxud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3f d0    \tvpmaxuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4       \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 40 d0    \tvpmulld %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 40 d0    \tvpmullq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x42, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 42 d1    \tvgetexpps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x42, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 42 e3    \tvgetexppd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x43, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 43 d0    \tvgetexpss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x95, 0x07, 0x43, 0xf4, }, 6, 0, "", "",
+"62 02 95 07 43 f4    \tvgetexpsd %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 7d 48 44 e3    \tvplzcntd %zmm27,%zmm28",},
+{{0x62, 0x02, 0xfd, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 44 e3    \tvplzcntq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4       \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 46 d0    \tvpsravd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 46 d0    \tvpsravq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4c, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4c d1    \tvrcp14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4c, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4c e3    \tvrcp14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4d d0    \tvrcp14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4d d0    \tvrcp14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4e, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4e d1    \tvrsqrt14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4e, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4e e3    \tvrsqrt14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4f d0    \tvrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4f d0    \tvrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4       \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x59, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 59 d3    \tvbroadcasti32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21       \tvbroadcasti128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 5a 11    \tvbroadcasti32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5a 11    \tvbroadcasti64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 5b 21    \tvbroadcasti32x8 (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5b 11    \tvbroadcasti64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0x25, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 64 e2    \tvpblendmd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 64 e2    \tvpblendmq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x65, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 65 d0    \tvblendmps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x65, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 65 e2    \tvblendmpd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 66 e2    \tvpblendmb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 66 e2    \tvpblendmw %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x75, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 75 d0    \tvpermi2b %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x75, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 75 e2    \tvpermi2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 76 e2    \tvpermi2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 76 e2    \tvpermi2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 77 e2    \tvpermi2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 77 e2    \tvpermi2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7a, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7a f0    \tvpbroadcastb %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7b f0    \tvpbroadcastw %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7c f0    \tvpbroadcastd %eax,%xmm30",},
+{{0x62, 0x62, 0xfd, 0x48, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 fd 48 7c f0    \tvpbroadcastq %rax,%zmm30",},
+{{0x62, 0x02, 0x25, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7d e2    \tvpermt2b %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7d e2    \tvpermt2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7e e2    \tvpermt2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7e e2    \tvpermt2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7f e2    \tvpermt2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7f e2    \tvpermt2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x83, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 83 e2    \tvpmultishiftqb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x88, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 88 11    \tvexpandps (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x88, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 88 21    \tvexpandpd (%rcx),%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x89, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 89 21    \tvpexpandd (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x89, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 89 11    \tvpexpandq (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8a 21    \tvcompressps %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 8a 21    \tvcompresspd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8b 21    \tvpcompressd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 8b 11    \tvpcompressq %zmm26,(%rcx)",},
+{{0x62, 0x02, 0x25, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 8d e2    \tvpermb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 8d e2    \tvpermw %zmm26,%zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 90 94 dd 7b 00 00 00 \tvpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 90 94 dd 7b 00 00 00 \tvpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 91 94 dd 7b 00 00 00 \tvpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 91 94 dd 7b 00 00 00 \tvpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa0, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a0 a4 ed 7b 00 00 00 \tvpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa0, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a0 94 dd 7b 00 00 00 \tvpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa1, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a1 b4 ed 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0xb2, 0xfd, 0x21, 0xa1, 0xb4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 fd 21 a1 b4 dd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa2, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a2 a4 ed 7b 00 00 00 \tvscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa2, 0xa4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a2 a4 dd 7b 00 00 00 \tvscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa3, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a3 b4 ed 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa3, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a3 a4 ed 7b 00 00 00 \tvscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb4, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b4 e2    \tvpmadd52luq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb5, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b5 e2    \tvpmadd52huq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 7d 48 c4 da    \tvpconflictd %zmm26,%zmm27",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c4 da    \tvpconflictq %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc8, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 c8 f5    \tvexp2ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc8, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c8 da    \tvexp2pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xca, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 ca f5    \tvrcp28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xca, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 ca da    \tvrcp28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcb, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cb f4    \tvrcp28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcb, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cb d9    \tvrcp28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0xcc, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 cc f5    \tvrsqrt28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xcc, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 cc da    \tvrsqrt28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcd, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cd f4    \tvrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcd, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cd d9    \tvrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x15, 0x40, 0x03, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 03 f4 12 \tvalignd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x03, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 03 d9 12 \tvalignq $0x12,%zmm25,%zmm26,%zmm27",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05    \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0x7d, 0x48, 0x08, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 08 d1 12 \tvrndscaleps $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05    \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0xfd, 0x48, 0x09, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 09 d1 12 \tvrndscalepd $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05    \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0x35, 0x07, 0x0a, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 07 0a d0 12 \tvrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05    \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0xb5, 0x07, 0x0b, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 07 0b d0 12 \tvrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05    \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 18 d0 12 \tvinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 18 d0 12 \tvinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05    \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 19 ca 12 \tvextractf32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 19 ca 12 \tvextractf64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x2d, 0x47, 0x1a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 47 1a d9 12 \tvinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x47, 0x1a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 47 1a f4 12 \tvinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x1b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 1b ee 12 \tvextractf32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x1b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 1b d3 12 \tvextractf64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1e ed 12 \tvpcmpud $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1e ea 12 \tvpcmpuq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1f ed 12 \tvpcmpd $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1f ea 12 \tvpcmpq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x03, 0x15, 0x40, 0x23, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 23 f4 12 \tvshuff32x4 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x23, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 23 d9 12 \tvshuff64x2 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 25 f4 12 \tvpternlogd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x95, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 25 f4 12 \tvpternlogq $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x7d, 0x48, 0x26, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 26 da 12 \tvgetmantps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x26, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 26 f5 12 \tvgetmantpd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x07, 0x27, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 07 27 d9 12 \tvgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x07, 0x27, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 07 27 f4 12 \tvgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05    \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 38 d0 12 \tvinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 38 d0 12 \tvinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05    \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 39 ca 12 \tvextracti32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 39 ca 12 \tvextracti64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x15, 0x47, 0x3a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 47 3a f4 12 \tvinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x47, 0x3a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 47 3a d9 12 \tvinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x3b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 3b ee 12 \tvextracti32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x3b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 3b d3 12 \tvextracti64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3e ed 12 \tvpcmpub $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3e ea 12 \tvpcmpuw $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3f ed 12 \tvpcmpb $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3f ea 12 \tvpcmpw $0x12,%zmm26,%zmm27,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05    \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0x03, 0x2d, 0x40, 0x43, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 43 d9 12 \tvshufi32x4 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x43, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 43 f4 12 \tvshufi64x2 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x40, 0x50, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 50 d9 12 \tvrangeps $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x50, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 50 f4 12 \tvrangepd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x51, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 51 d9 12 \tvrangess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x51, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 51 f4 12 \tvrangesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x03, 0x15, 0x40, 0x54, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 54 f4 12 \tvfixupimmps $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x54, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 54 d9 12 \tvfixupimmpd $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x07, 0x55, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 07 55 f4 12 \tvfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x07, 0x55, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 07 55 d9 12 \tvfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x48, 0x56, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 56 da 12 \tvreduceps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x56, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 56 f5 12 \tvreducepd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x57, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 57 d9 12 \tvreducess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x57, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 57 f4 12 \tvreducesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x93, 0x7d, 0x48, 0x66, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 48 66 eb 12 \tvfpclassps $0x12,%zmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x48, 0x66, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 48 66 ee 12 \tvfpclasspd $0x12,%zmm30,%k5",},
+{{0x62, 0x93, 0x7d, 0x08, 0x67, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 08 67 eb 12 \tvfpclassss $0x12,%xmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x08, 0x67, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 08 67 ee 12 \tvfpclasssd $0x12,%xmm30,%k5",},
+{{0x62, 0x91, 0x2d, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 2d 40 72 c1 12 \tvprord $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0xad, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 ad 40 72 c1 12 \tvprorq $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0x0d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 0d 40 72 cd 12 \tvprold $0x12,%zmm29,%zmm30",},
+{{0x62, 0x91, 0x8d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 8d 40 72 cd 12 \tvprolq $0x12,%zmm29,%zmm30",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02          \tpsrad  $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05       \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0x91, 0x4d, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 4d 40 72 e2 05 \tvpsrad $0x5,%zmm26,%zmm22",},
+{{0x62, 0x91, 0xcd, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 cd 40 72 e2 05 \tvpsraq $0x5,%zmm26,%zmm22",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x01, 0x95, 0x40, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 40 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x47, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 47 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x01, 0x95, 0xc7, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 c7 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30{%k7}{z}",},
+{{0x62, 0x01, 0x95, 0x10, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 10 58 f4    \tvaddpd {rn-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x50, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 50 58 f4    \tvaddpd {ru-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x30, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 30 58 f4    \tvaddpd {rd-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x70, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 70 58 f4    \tvaddpd {rz-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 40 58 31    \tvaddpd (%rcx),%zmm29,%zmm30",},
+{{0x62, 0x21, 0x95, 0x40, 0x58, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 21 95 40 58 b4 f0 23 01 00 00 \tvaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 50 58 31    \tvaddpd (%rcx){1to8},%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 40 58 72 7f \tvaddpd 0x1fc0(%rdx),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 50 58 72 7f \tvaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30",},
+{{0x62, 0xf1, 0x0c, 0x50, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 0c 50 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5",},
+{{0x62, 0xb1, 0x97, 0x07, 0xc2, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 b1 97 07 c2 ac f0 23 01 00 00 01 \tvcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}",},
+{{0x62, 0x91, 0x97, 0x17, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 91 97 17 c2 ec 02 \tvcmplesd {sae},%xmm28,%xmm29,%k5{%k7}",},
+{{0x62, 0x23, 0x15, 0x07, 0x27, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 23 15 07 27 b4 f0 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}",},
+{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f3 0f 1b 00          \tbndmk  (%rax),%bnd0",},
+{{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
+"f3 41 0f 1b 00       \tbndmk  (%r8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 04 25 78 56 34 12 \tbndmk  0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f3 0f 1b 18          \tbndmk  (%rax),%bnd3",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1b 04 01       \tbndmk  (%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 04 05 78 56 34 12 \tbndmk  0x12345678(,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1b 04 08       \tbndmk  (%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1b 04 c8       \tbndmk  (%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 40 12       \tbndmk  0x12(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 45 12       \tbndmk  0x12(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 01 12    \tbndmk  0x12(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 05 12    \tbndmk  0x12(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 08 12    \tbndmk  0x12(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 c8 12    \tbndmk  0x12(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 80 78 56 34 12 \tbndmk  0x12345678(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 85 78 56 34 12 \tbndmk  0x12345678(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 01 78 56 34 12 \tbndmk  0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 05 78 56 34 12 \tbndmk  0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 08 78 56 34 12 \tbndmk  0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 c8 78 56 34 12 \tbndmk  0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f3 0f 1a 00          \tbndcl  (%rax),%bnd0",},
+{{0xf3, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "",
+"f3 41 0f 1a 00       \tbndcl  (%r8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 04 25 78 56 34 12 \tbndcl  0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f3 0f 1a 18          \tbndcl  (%rax),%bnd3",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1a 04 01       \tbndcl  (%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 04 05 78 56 34 12 \tbndcl  0x12345678(,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1a 04 08       \tbndcl  (%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1a 04 c8       \tbndcl  (%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 40 12       \tbndcl  0x12(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 45 12       \tbndcl  0x12(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 01 12    \tbndcl  0x12(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 05 12    \tbndcl  0x12(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 08 12    \tbndcl  0x12(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 c8 12    \tbndcl  0x12(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 80 78 56 34 12 \tbndcl  0x12345678(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 85 78 56 34 12 \tbndcl  0x12345678(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 01 78 56 34 12 \tbndcl  0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 05 78 56 34 12 \tbndcl  0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 08 78 56 34 12 \tbndcl  0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 c8 78 56 34 12 \tbndcl  0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f3 0f 1a c0          \tbndcl  %rax,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f2 0f 1a 00          \tbndcu  (%rax),%bnd0",},
+{{0xf2, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "",
+"f2 41 0f 1a 00       \tbndcu  (%r8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 04 25 78 56 34 12 \tbndcu  0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f2 0f 1a 18          \tbndcu  (%rax),%bnd3",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1a 04 01       \tbndcu  (%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 04 05 78 56 34 12 \tbndcu  0x12345678(,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1a 04 08       \tbndcu  (%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1a 04 c8       \tbndcu  (%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 40 12       \tbndcu  0x12(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 45 12       \tbndcu  0x12(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 01 12    \tbndcu  0x12(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 05 12    \tbndcu  0x12(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 08 12    \tbndcu  0x12(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 c8 12    \tbndcu  0x12(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 80 78 56 34 12 \tbndcu  0x12345678(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 85 78 56 34 12 \tbndcu  0x12345678(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 01 78 56 34 12 \tbndcu  0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 05 78 56 34 12 \tbndcu  0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 08 78 56 34 12 \tbndcu  0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 c8 78 56 34 12 \tbndcu  0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f2 0f 1a c0          \tbndcu  %rax,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f2 0f 1b 00          \tbndcn  (%rax),%bnd0",},
+{{0xf2, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
+"f2 41 0f 1b 00       \tbndcn  (%r8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 04 25 78 56 34 12 \tbndcn  0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f2 0f 1b 18          \tbndcn  (%rax),%bnd3",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1b 04 01       \tbndcn  (%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 04 05 78 56 34 12 \tbndcn  0x12345678(,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1b 04 08       \tbndcn  (%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1b 04 c8       \tbndcn  (%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 40 12       \tbndcn  0x12(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 45 12       \tbndcn  0x12(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 01 12    \tbndcn  0x12(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 05 12    \tbndcn  0x12(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 08 12    \tbndcn  0x12(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 c8 12    \tbndcn  0x12(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 80 78 56 34 12 \tbndcn  0x12345678(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 85 78 56 34 12 \tbndcn  0x12345678(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 01 78 56 34 12 \tbndcn  0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 05 78 56 34 12 \tbndcn  0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 08 78 56 34 12 \tbndcn  0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 c8 78 56 34 12 \tbndcn  0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "",
+"f2 0f 1b c0          \tbndcn  %rax,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"66 0f 1a 00          \tbndmov (%rax),%bnd0",},
+{{0x66, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "",
+"66 41 0f 1a 00       \tbndmov (%r8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 04 25 78 56 34 12 \tbndmov 0x12345678,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"66 0f 1a 18          \tbndmov (%rax),%bnd3",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1a 04 01       \tbndmov (%rcx,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1a 04 08       \tbndmov (%rax,%rcx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1a 04 c8       \tbndmov (%rax,%rcx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1a 40 12       \tbndmov 0x12(%rax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1a 45 12       \tbndmov 0x12(%rbp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 01 12    \tbndmov 0x12(%rcx,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 05 12    \tbndmov 0x12(%rbp,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 08 12    \tbndmov 0x12(%rax,%rcx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 c8 12    \tbndmov 0x12(%rax,%rcx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%rax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%rbp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,8),%bnd0",},
+{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"66 0f 1b 00          \tbndmov %bnd0,(%rax)",},
+{{0x66, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
+"66 41 0f 1b 00       \tbndmov %bnd0,(%r8)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 04 25 78 56 34 12 \tbndmov %bnd0,0x12345678",},
+{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"66 0f 1b 18          \tbndmov %bnd3,(%rax)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1b 04 01       \tbndmov %bnd0,(%rcx,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1b 04 08       \tbndmov %bnd0,(%rax,%rcx,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1b 04 c8       \tbndmov %bnd0,(%rax,%rcx,8)",},
+{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1b 40 12       \tbndmov %bnd0,0x12(%rax)",},
+{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1b 45 12       \tbndmov %bnd0,0x12(%rbp)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 01 12    \tbndmov %bnd0,0x12(%rcx,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 05 12    \tbndmov %bnd0,0x12(%rbp,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 08 12    \tbndmov %bnd0,0x12(%rax,%rcx,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 c8 12    \tbndmov %bnd0,0x12(%rax,%rcx,8)",},
+{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax)",},
+{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%rcx,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "",
+"66 0f 1a c8          \tbndmov %bnd0,%bnd1",},
+{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "",
+"66 0f 1a c1          \tbndmov %bnd1,%bnd0",},
+{{0x0f, 0x1a, 0x00, }, 3, 0, "", "",
+"0f 1a 00             \tbndldx (%rax),%bnd0",},
+{{0x41, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"41 0f 1a 00          \tbndldx (%r8),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 04 25 78 56 34 12 \tbndldx 0x12345678,%bnd0",},
+{{0x0f, 0x1a, 0x18, }, 3, 0, "", "",
+"0f 1a 18             \tbndldx (%rax),%bnd3",},
+{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1a 04 01          \tbndldx (%rcx,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1a 04 08          \tbndldx (%rax,%rcx,1),%bnd0",},
+{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1a 40 12          \tbndldx 0x12(%rax),%bnd0",},
+{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1a 45 12          \tbndldx 0x12(%rbp),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1a 44 01 12       \tbndldx 0x12(%rcx,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1a 44 05 12       \tbndldx 0x12(%rbp,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1a 44 08 12       \tbndldx 0x12(%rax,%rcx,1),%bnd0",},
+{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%rax),%bnd0",},
+{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%rbp),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0x0f, 0x1b, 0x00, }, 3, 0, "", "",
+"0f 1b 00             \tbndstx %bnd0,(%rax)",},
+{{0x41, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"41 0f 1b 00          \tbndstx %bnd0,(%r8)",},
+{{0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 04 25 78 56 34 12 \tbndstx %bnd0,0x12345678",},
+{{0x0f, 0x1b, 0x18, }, 3, 0, "", "",
+"0f 1b 18             \tbndstx %bnd3,(%rax)",},
+{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1b 04 01          \tbndstx %bnd0,(%rcx,%rax,1)",},
+{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%rax,1)",},
+{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1b 04 08          \tbndstx %bnd0,(%rax,%rcx,1)",},
+{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1b 40 12          \tbndstx %bnd0,0x12(%rax)",},
+{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1b 45 12          \tbndstx %bnd0,0x12(%rbp)",},
+{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1b 44 01 12       \tbndstx %bnd0,0x12(%rcx,%rax,1)",},
+{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1b 44 05 12       \tbndstx %bnd0,0x12(%rbp,%rax,1)",},
+{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1b 44 08 12       \tbndstx %bnd0,0x12(%rax,%rcx,1)",},
+{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax)",},
+{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp)",},
+{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%rcx,%rax,1)",},
+{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp,%rax,1)",},
+{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",},
+{{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional",
+"f2 e8 00 00 00 00    \tbnd callq f22 <main+0xf22>",},
+{{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"67 f2 ff 10          \tbnd callq *(%eax)",},
+{{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
+"f2 c3                \tbnd retq ",},
+{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
+"f2 e9 00 00 00 00    \tbnd jmpq f2e <main+0xf2e>",},
+{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
+"f2 e9 00 00 00 00    \tbnd jmpq f34 <main+0xf34>",},
+{{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect",
+"67 f2 ff 21          \tbnd jmpq *(%ecx)",},
+{{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional",
+"f2 0f 85 00 00 00 00 \tbnd jne f3f <main+0xf3f>",},
+{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
+"0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
+"0f 3a cc d7 91       \tsha1rnds4 $0x91,%xmm7,%xmm2",},
+{{0x41, 0x0f, 0x3a, 0xcc, 0xc0, 0x91, }, 6, 0, "", "",
+"41 0f 3a cc c0 91    \tsha1rnds4 $0x91,%xmm8,%xmm0",},
+{{0x44, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "",
+"44 0f 3a cc c7 91    \tsha1rnds4 $0x91,%xmm7,%xmm8",},
+{{0x45, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "",
+"45 0f 3a cc c7 91    \tsha1rnds4 $0x91,%xmm15,%xmm8",},
+{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "",
+"0f 3a cc 00 91       \tsha1rnds4 $0x91,(%rax),%xmm0",},
+{{0x41, 0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 6, 0, "", "",
+"41 0f 3a cc 00 91    \tsha1rnds4 $0x91,(%r8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 04 25 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "",
+"0f 3a cc 18 91       \tsha1rnds4 $0x91,(%rax),%xmm3",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 01 91    \tsha1rnds4 $0x91,(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 08 91    \tsha1rnds4 $0x91,(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 c8 91    \tsha1rnds4 $0x91,(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 40 12 91    \tsha1rnds4 $0x91,0x12(%rax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 45 12 91    \tsha1rnds4 $0x91,0x12(%rbp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x3a, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 11, 0, "", "",
+"44 0f 3a cc bc c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "",
+"0f 38 c8 c1          \tsha1nexte %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "",
+"0f 38 c8 d7          \tsha1nexte %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xc8, 0xc0, }, 5, 0, "", "",
+"41 0f 38 c8 c0       \tsha1nexte %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "",
+"44 0f 38 c8 c7       \tsha1nexte %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "",
+"45 0f 38 c8 c7       \tsha1nexte %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "",
+"0f 38 c8 00          \tsha1nexte (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xc8, 0x00, }, 5, 0, "", "",
+"41 0f 38 c8 00       \tsha1nexte (%r8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 04 25 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "",
+"0f 38 c8 18          \tsha1nexte (%rax),%xmm3",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c8 04 01       \tsha1nexte (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c8 04 08       \tsha1nexte (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c8 04 c8       \tsha1nexte (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c8 40 12       \tsha1nexte 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c8 45 12       \tsha1nexte 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 01 12    \tsha1nexte 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 05 12    \tsha1nexte 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 08 12    \tsha1nexte 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 c8 12    \tsha1nexte 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc8, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 c8 bc c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "",
+"0f 38 c9 c1          \tsha1msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "",
+"0f 38 c9 d7          \tsha1msg1 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xc9, 0xc0, }, 5, 0, "", "",
+"41 0f 38 c9 c0       \tsha1msg1 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "",
+"44 0f 38 c9 c7       \tsha1msg1 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "",
+"45 0f 38 c9 c7       \tsha1msg1 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "",
+"0f 38 c9 00          \tsha1msg1 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xc9, 0x00, }, 5, 0, "", "",
+"41 0f 38 c9 00       \tsha1msg1 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 04 25 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "",
+"0f 38 c9 18          \tsha1msg1 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c9 04 01       \tsha1msg1 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c9 04 08       \tsha1msg1 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c9 04 c8       \tsha1msg1 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c9 40 12       \tsha1msg1 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c9 45 12       \tsha1msg1 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 01 12    \tsha1msg1 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 05 12    \tsha1msg1 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 08 12    \tsha1msg1 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 c8 12    \tsha1msg1 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc9, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 c9 bc c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "",
+"0f 38 ca c1          \tsha1msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "",
+"0f 38 ca d7          \tsha1msg2 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xca, 0xc0, }, 5, 0, "", "",
+"41 0f 38 ca c0       \tsha1msg2 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "",
+"44 0f 38 ca c7       \tsha1msg2 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "",
+"45 0f 38 ca c7       \tsha1msg2 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "",
+"0f 38 ca 00          \tsha1msg2 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xca, 0x00, }, 5, 0, "", "",
+"41 0f 38 ca 00       \tsha1msg2 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 04 25 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "",
+"0f 38 ca 18          \tsha1msg2 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 ca 04 01       \tsha1msg2 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 ca 04 08       \tsha1msg2 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 ca 04 c8       \tsha1msg2 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 ca 40 12       \tsha1msg2 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 ca 45 12       \tsha1msg2 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 01 12    \tsha1msg2 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 05 12    \tsha1msg2 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 08 12    \tsha1msg2 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 c8 12    \tsha1msg2 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xca, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 ca bc c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "",
+"0f 38 cb cc          \tsha256rnds2 %xmm0,%xmm4,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "",
+"0f 38 cb d7          \tsha256rnds2 %xmm0,%xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xcb, 0xc8, }, 5, 0, "", "",
+"41 0f 38 cb c8       \tsha256rnds2 %xmm0,%xmm8,%xmm1",},
+{{0x44, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "",
+"44 0f 38 cb c7       \tsha256rnds2 %xmm0,%xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "",
+"45 0f 38 cb c7       \tsha256rnds2 %xmm0,%xmm15,%xmm8",},
+{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "",
+"0f 38 cb 08          \tsha256rnds2 %xmm0,(%rax),%xmm1",},
+{{0x41, 0x0f, 0x38, 0xcb, 0x08, }, 5, 0, "", "",
+"41 0f 38 cb 08       \tsha256rnds2 %xmm0,(%r8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 0c 25 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "",
+"0f 38 cb 18          \tsha256rnds2 %xmm0,(%rax),%xmm3",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "",
+"0f 38 cb 0c 01       \tsha256rnds2 %xmm0,(%rcx,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "",
+"0f 38 cb 0c 08       \tsha256rnds2 %xmm0,(%rax,%rcx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "",
+"0f 38 cb 0c c8       \tsha256rnds2 %xmm0,(%rax,%rcx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "",
+"0f 38 cb 48 12       \tsha256rnds2 %xmm0,0x12(%rax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "",
+"0f 38 cb 4d 12       \tsha256rnds2 %xmm0,0x12(%rbp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 01 12    \tsha256rnds2 %xmm0,0x12(%rcx,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 05 12    \tsha256rnds2 %xmm0,0x12(%rbp,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 08 12    \tsha256rnds2 %xmm0,0x12(%rax,%rcx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c c8 12    \tsha256rnds2 %xmm0,0x12(%rax,%rcx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rcx,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x44, 0x0f, 0x38, 0xcb, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 cb bc c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "",
+"0f 38 cc c1          \tsha256msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "",
+"0f 38 cc d7          \tsha256msg1 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xcc, 0xc0, }, 5, 0, "", "",
+"41 0f 38 cc c0       \tsha256msg1 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "",
+"44 0f 38 cc c7       \tsha256msg1 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "",
+"45 0f 38 cc c7       \tsha256msg1 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "",
+"0f 38 cc 00          \tsha256msg1 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xcc, 0x00, }, 5, 0, "", "",
+"41 0f 38 cc 00       \tsha256msg1 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 04 25 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "",
+"0f 38 cc 18          \tsha256msg1 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cc 04 01       \tsha256msg1 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cc 04 08       \tsha256msg1 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cc 04 c8       \tsha256msg1 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cc 40 12       \tsha256msg1 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cc 45 12       \tsha256msg1 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 01 12    \tsha256msg1 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 05 12    \tsha256msg1 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 08 12    \tsha256msg1 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 c8 12    \tsha256msg1 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 cc bc c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "",
+"0f 38 cd c1          \tsha256msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "",
+"0f 38 cd d7          \tsha256msg2 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xcd, 0xc0, }, 5, 0, "", "",
+"41 0f 38 cd c0       \tsha256msg2 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "",
+"44 0f 38 cd c7       \tsha256msg2 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "",
+"45 0f 38 cd c7       \tsha256msg2 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "",
+"0f 38 cd 00          \tsha256msg2 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xcd, 0x00, }, 5, 0, "", "",
+"41 0f 38 cd 00       \tsha256msg2 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 04 25 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "",
+"0f 38 cd 18          \tsha256msg2 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cd 04 01       \tsha256msg2 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cd 04 08       \tsha256msg2 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cd 04 c8       \tsha256msg2 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cd 40 12       \tsha256msg2 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cd 45 12       \tsha256msg2 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 01 12    \tsha256msg2 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 05 12    \tsha256msg2 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 08 12    \tsha256msg2 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 c8 12    \tsha256msg2 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcd, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 cd bc c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "",
+"66 0f ae 38          \tclflushopt (%rax)",},
+{{0x66, 0x41, 0x0f, 0xae, 0x38, }, 5, 0, "", "",
+"66 41 0f ae 38       \tclflushopt (%r8)",},
+{{0x66, 0x0f, 0xae, 0x3c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae 3c 25 78 56 34 12 \tclflushopt 0x12345678",},
+{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 41 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0x38, }, 3, 0, "", "",
+"0f ae 38             \tclflush (%rax)",},
+{{0x41, 0x0f, 0xae, 0x38, }, 4, 0, "", "",
+"41 0f ae 38          \tclflush (%r8)",},
+{{0x0f, 0xae, 0xf8, }, 3, 0, "", "",
+"0f ae f8             \tsfence ",},
+{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"66 0f ae 30          \tclwb   (%rax)",},
+{{0x66, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "",
+"66 41 0f ae 30       \tclwb   (%r8)",},
+{{0x66, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae 34 25 78 56 34 12 \tclwb   0x12345678",},
+{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae b4 c8 78 56 34 12 \tclwb   0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 41 0f ae b4 c8 78 56 34 12 \tclwb   0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0x30, }, 3, 0, "", "",
+"0f ae 30             \txsaveopt (%rax)",},
+{{0x41, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"41 0f ae 30          \txsaveopt (%r8)",},
+{{0x0f, 0xae, 0xf0, }, 3, 0, "", "",
+"0f ae f0             \tmfence ",},
+{{0x0f, 0xc7, 0x20, }, 3, 0, "", "",
+"0f c7 20             \txsavec (%rax)",},
+{{0x41, 0x0f, 0xc7, 0x20, }, 4, 0, "", "",
+"41 0f c7 20          \txsavec (%r8)",},
+{{0x0f, 0xc7, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 24 25 78 56 34 12 \txsavec 0x12345678",},
+{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xc7, 0x28, }, 3, 0, "", "",
+"0f c7 28             \txsaves (%rax)",},
+{{0x41, 0x0f, 0xc7, 0x28, }, 4, 0, "", "",
+"41 0f c7 28          \txsaves (%r8)",},
+{{0x0f, 0xc7, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 2c 25 78 56 34 12 \txsaves 0x12345678",},
+{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xc7, 0x18, }, 3, 0, "", "",
+"0f c7 18             \txrstors (%rax)",},
+{{0x41, 0x0f, 0xc7, 0x18, }, 4, 0, "", "",
+"41 0f c7 18          \txrstors (%r8)",},
+{{0x0f, 0xc7, 0x1c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 1c 25 78 56 34 12 \txrstors 0x12345678",},
+{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20       \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20       \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 48 0f ae 20       \tptwriteq (%rax)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 49 0f ae 20       \tptwriteq (%r8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae 24 25 78 56 34 12 \tptwriteq 0x12345678",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 49 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%r8,%rcx,8)",},
diff --git a/arch/x86/tests/insn-x86-dat-src.c b/arch/x86/tests/insn-x86-dat-src.c
new file mode 100644
index 0000000..891415b
--- /dev/null
+++ b/arch/x86/tests/insn-x86-dat-src.c
@@ -0,0 +1,2693 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains instructions for testing by the test titled:
+ *
+ *         "Test x86 instruction decoder - new instructions"
+ *
+ * Note that the 'Expecting' comment lines are consumed by the
+ * gen-insn-x86-dat.awk script and have the format:
+ *
+ *         Expecting: <op> <branch> <rel>
+ *
+ * If this file is changed, remember to run the gen-insn-x86-dat.sh
+ * script and commit the result.
+ *
+ * Refer to insn-x86.c for more details.
+ */
+
+int main(void)
+{
+	/* Following line is a marker for the awk script - do not change */
+	asm volatile("rdtsc"); /* Start here */
+
+	/* Test fix for vcvtph2ps in x86-opcode-map.txt */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+
+#ifdef __x86_64__
+
+	/* AVX-512: Instructions with the same op codes as Mask Instructions  */
+
+	asm volatile("cmovno %rax,%rbx");
+	asm volatile("cmovno 0x12345678(%rax),%rcx");
+	asm volatile("cmovno 0x12345678(%rax),%cx");
+
+	asm volatile("cmove  %rax,%rbx");
+	asm volatile("cmove 0x12345678(%rax),%rcx");
+	asm volatile("cmove 0x12345678(%rax),%cx");
+
+	asm volatile("seto    0x12345678(%rax)");
+	asm volatile("setno   0x12345678(%rax)");
+	asm volatile("setb    0x12345678(%rax)");
+	asm volatile("setc    0x12345678(%rax)");
+	asm volatile("setnae  0x12345678(%rax)");
+	asm volatile("setae   0x12345678(%rax)");
+	asm volatile("setnb   0x12345678(%rax)");
+	asm volatile("setnc   0x12345678(%rax)");
+	asm volatile("sets    0x12345678(%rax)");
+	asm volatile("setns   0x12345678(%rax)");
+
+	/* AVX-512: Mask Instructions */
+
+	asm volatile("kandw  %k7,%k6,%k5");
+	asm volatile("kandq  %k7,%k6,%k5");
+	asm volatile("kandb  %k7,%k6,%k5");
+	asm volatile("kandd  %k7,%k6,%k5");
+
+	asm volatile("kandnw  %k7,%k6,%k5");
+	asm volatile("kandnq  %k7,%k6,%k5");
+	asm volatile("kandnb  %k7,%k6,%k5");
+	asm volatile("kandnd  %k7,%k6,%k5");
+
+	asm volatile("knotw  %k7,%k6");
+	asm volatile("knotq  %k7,%k6");
+	asm volatile("knotb  %k7,%k6");
+	asm volatile("knotd  %k7,%k6");
+
+	asm volatile("korw  %k7,%k6,%k5");
+	asm volatile("korq  %k7,%k6,%k5");
+	asm volatile("korb  %k7,%k6,%k5");
+	asm volatile("kord  %k7,%k6,%k5");
+
+	asm volatile("kxnorw  %k7,%k6,%k5");
+	asm volatile("kxnorq  %k7,%k6,%k5");
+	asm volatile("kxnorb  %k7,%k6,%k5");
+	asm volatile("kxnord  %k7,%k6,%k5");
+
+	asm volatile("kxorw  %k7,%k6,%k5");
+	asm volatile("kxorq  %k7,%k6,%k5");
+	asm volatile("kxorb  %k7,%k6,%k5");
+	asm volatile("kxord  %k7,%k6,%k5");
+
+	asm volatile("kaddw  %k7,%k6,%k5");
+	asm volatile("kaddq  %k7,%k6,%k5");
+	asm volatile("kaddb  %k7,%k6,%k5");
+	asm volatile("kaddd  %k7,%k6,%k5");
+
+	asm volatile("kunpckbw %k7,%k6,%k5");
+	asm volatile("kunpckwd %k7,%k6,%k5");
+	asm volatile("kunpckdq %k7,%k6,%k5");
+
+	asm volatile("kmovw  %k6,%k5");
+	asm volatile("kmovw  (%rcx),%k5");
+	asm volatile("kmovw  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovw  %k5,(%rcx)");
+	asm volatile("kmovw  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovw  %eax,%k5");
+	asm volatile("kmovw  %ebp,%k5");
+	asm volatile("kmovw  %r13d,%k5");
+	asm volatile("kmovw  %k5,%eax");
+	asm volatile("kmovw  %k5,%ebp");
+	asm volatile("kmovw  %k5,%r13d");
+
+	asm volatile("kmovq  %k6,%k5");
+	asm volatile("kmovq  (%rcx),%k5");
+	asm volatile("kmovq  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovq  %k5,(%rcx)");
+	asm volatile("kmovq  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovq  %rax,%k5");
+	asm volatile("kmovq  %rbp,%k5");
+	asm volatile("kmovq  %r13,%k5");
+	asm volatile("kmovq  %k5,%rax");
+	asm volatile("kmovq  %k5,%rbp");
+	asm volatile("kmovq  %k5,%r13");
+
+	asm volatile("kmovb  %k6,%k5");
+	asm volatile("kmovb  (%rcx),%k5");
+	asm volatile("kmovb  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovb  %k5,(%rcx)");
+	asm volatile("kmovb  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovb  %eax,%k5");
+	asm volatile("kmovb  %ebp,%k5");
+	asm volatile("kmovb  %r13d,%k5");
+	asm volatile("kmovb  %k5,%eax");
+	asm volatile("kmovb  %k5,%ebp");
+	asm volatile("kmovb  %k5,%r13d");
+
+	asm volatile("kmovd  %k6,%k5");
+	asm volatile("kmovd  (%rcx),%k5");
+	asm volatile("kmovd  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovd  %k5,(%rcx)");
+	asm volatile("kmovd  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovd  %eax,%k5");
+	asm volatile("kmovd  %ebp,%k5");
+	asm volatile("kmovd  %r13d,%k5");
+	asm volatile("kmovd  %k5,%eax");
+	asm volatile("kmovd  %k5,%ebp");
+	asm volatile("kmovd %k5,%r13d");
+
+	asm volatile("kortestw %k6,%k5");
+	asm volatile("kortestq %k6,%k5");
+	asm volatile("kortestb %k6,%k5");
+	asm volatile("kortestd %k6,%k5");
+
+	asm volatile("ktestw %k6,%k5");
+	asm volatile("ktestq %k6,%k5");
+	asm volatile("ktestb %k6,%k5");
+	asm volatile("ktestd %k6,%k5");
+
+	asm volatile("kshiftrw $0x12,%k6,%k5");
+	asm volatile("kshiftrq $0x5b,%k6,%k5");
+	asm volatile("kshiftlw $0x12,%k6,%k5");
+	asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+	/* AVX-512: Op code 0f 5b */
+	asm volatile("vcvtdq2ps %xmm5,%xmm6");
+	asm volatile("vcvtqq2ps %zmm29,%ymm6{%k7}");
+	asm volatile("vcvtps2dq %xmm5,%xmm6");
+	asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+	/* AVX-512: Op code 0f 6f */
+
+	asm volatile("movq   %mm0,%mm4");
+	asm volatile("vmovdqa %ymm4,%ymm6");
+	asm volatile("vmovdqa32 %zmm25,%zmm26");
+	asm volatile("vmovdqa64 %zmm25,%zmm26");
+	asm volatile("vmovdqu %ymm4,%ymm6");
+	asm volatile("vmovdqu32 %zmm29,%zmm30");
+	asm volatile("vmovdqu64 %zmm25,%zmm26");
+	asm volatile("vmovdqu8 %zmm29,%zmm30");
+	asm volatile("vmovdqu16 %zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 78 */
+
+	asm volatile("vmread %rax,%rbx");
+	asm volatile("vcvttps2udq %zmm25,%zmm26");
+	asm volatile("vcvttpd2udq %zmm29,%ymm6{%k7}");
+	asm volatile("vcvttsd2usi %xmm6,%rax");
+	asm volatile("vcvttss2usi %xmm6,%rax");
+	asm volatile("vcvttps2uqq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvttpd2uqq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 79 */
+
+	asm volatile("vmwrite %rax,%rbx");
+	asm volatile("vcvtps2udq %zmm25,%zmm26");
+	asm volatile("vcvtpd2udq %zmm29,%ymm6{%k7}");
+	asm volatile("vcvtsd2usi %xmm6,%rax");
+	asm volatile("vcvtss2usi %xmm6,%rax");
+	asm volatile("vcvtps2uqq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtpd2uqq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7a */
+
+	asm volatile("vcvtudq2pd %ymm5,%zmm29{%k7}");
+	asm volatile("vcvtuqq2pd %zmm25,%zmm26");
+	asm volatile("vcvtudq2ps %zmm29,%zmm30");
+	asm volatile("vcvtuqq2ps %zmm25,%ymm26{%k7}");
+	asm volatile("vcvttps2qq %ymm25,%zmm26{%k7}");
+	asm volatile("vcvttpd2qq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7b */
+
+	asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+	asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+	asm volatile("vcvtps2qq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtpd2qq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7f */
+
+	asm volatile("movq.s  %mm0,%mm4");
+	asm volatile("vmovdqa %ymm8,%ymm6");
+	asm volatile("vmovdqa32.s %zmm25,%zmm26");
+	asm volatile("vmovdqa64.s %zmm25,%zmm26");
+	asm volatile("vmovdqu %ymm8,%ymm6");
+	asm volatile("vmovdqu32.s %zmm25,%zmm26");
+	asm volatile("vmovdqu64.s %zmm25,%zmm26");
+	asm volatile("vmovdqu8.s %zmm30,(%rcx)");
+	asm volatile("vmovdqu16.s %zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f db */
+
+	asm volatile("pand  %mm1,%mm2");
+	asm volatile("pand  %xmm1,%xmm2");
+	asm volatile("vpand  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpandq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f df */
+
+	asm volatile("pandn  %mm1,%mm2");
+	asm volatile("pandn  %xmm1,%xmm2");
+	asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandnd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpandnq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f e6 */
+
+	asm volatile("vcvttpd2dq %xmm1,%xmm2");
+	asm volatile("vcvtdq2pd %xmm5,%xmm6");
+	asm volatile("vcvtdq2pd %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtqq2pd %zmm25,%zmm26");
+	asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+	/* AVX-512: Op code 0f eb */
+
+	asm volatile("por   %mm4,%mm6");
+	asm volatile("vpor   %ymm4,%ymm6,%ymm2");
+	asm volatile("vpord  %zmm24,%zmm25,%zmm26");
+	asm volatile("vporq  %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f ef */
+
+	asm volatile("pxor   %mm4,%mm6");
+	asm volatile("vpxor  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpxord %zmm24,%zmm25,%zmm26");
+	asm volatile("vpxorq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 10 */
+
+	asm volatile("pblendvb %xmm1,%xmm0");
+	asm volatile("vpsrlvw %zmm27,%zmm28,%zmm29");
+	asm volatile("vpmovuswb %zmm28,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 11 */
+
+	asm volatile("vpmovusdb %zmm28,%xmm6{%k7}");
+	asm volatile("vpsravw %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 12 */
+
+	asm volatile("vpmovusqb %zmm27,%xmm6{%k7}");
+	asm volatile("vpsllvw %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 13 */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+	asm volatile("vcvtph2ps %ymm5,%zmm27{%k7}");
+	asm volatile("vpmovusdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 14 */
+
+	asm volatile("blendvps %xmm1,%xmm0");
+	asm volatile("vpmovusqw %zmm27,%xmm6{%k7}");
+	asm volatile("vprorvd %zmm27,%zmm28,%zmm29");
+	asm volatile("vprorvq %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 15 */
+
+	asm volatile("blendvpd %xmm1,%xmm0");
+	asm volatile("vpmovusqd %zmm27,%ymm6{%k7}");
+	asm volatile("vprolvd %zmm27,%zmm28,%zmm29");
+	asm volatile("vprolvq %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 16 */
+
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermps %ymm24,%ymm26,%ymm22{%k7}");
+	asm volatile("vpermpd %ymm24,%ymm26,%ymm22{%k7}");
+
+	/* AVX-512: Op code 0f 38 19 */
+
+	asm volatile("vbroadcastsd %xmm4,%ymm6");
+	asm volatile("vbroadcastf32x2 %xmm27,%zmm26");
+
+	/* AVX-512: Op code 0f 38 1a */
+
+	asm volatile("vbroadcastf128 (%rcx),%ymm4");
+	asm volatile("vbroadcastf32x4 (%rcx),%zmm26");
+	asm volatile("vbroadcastf64x2 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 1b */
+
+	asm volatile("vbroadcastf32x8 (%rcx),%zmm27");
+	asm volatile("vbroadcastf64x4 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 1f */
+
+	asm volatile("vpabsq %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 20 */
+
+	asm volatile("vpmovsxbw %xmm4,%xmm5");
+	asm volatile("vpmovswb %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 21 */
+
+	asm volatile("vpmovsxbd %xmm4,%ymm6");
+	asm volatile("vpmovsdb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 22 */
+
+	asm volatile("vpmovsxbq %xmm4,%ymm4");
+	asm volatile("vpmovsqb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 23 */
+
+	asm volatile("vpmovsxwd %xmm4,%ymm4");
+	asm volatile("vpmovsdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 24 */
+
+	asm volatile("vpmovsxwq %xmm4,%ymm6");
+	asm volatile("vpmovsqw %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 25 */
+
+	asm volatile("vpmovsxdq %xmm4,%ymm4");
+	asm volatile("vpmovsqd %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 26 */
+
+	asm volatile("vptestmb %zmm27,%zmm28,%k5");
+	asm volatile("vptestmw %zmm27,%zmm28,%k5");
+	asm volatile("vptestnmb %zmm26,%zmm27,%k5");
+	asm volatile("vptestnmw %zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 38 27 */
+
+	asm volatile("vptestmd %zmm27,%zmm28,%k5");
+	asm volatile("vptestmq %zmm27,%zmm28,%k5");
+	asm volatile("vptestnmd %zmm26,%zmm27,%k5");
+	asm volatile("vptestnmq %zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 38 28 */
+
+	asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2b %k5,%zmm28");
+	asm volatile("vpmovm2w %k5,%zmm28");
+
+	/* AVX-512: Op code 0f 38 29 */
+
+	asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovb2m %zmm28,%k5");
+	asm volatile("vpmovw2m %zmm28,%k5");
+
+	/* AVX-512: Op code 0f 38 2a */
+
+	asm volatile("vmovntdqa (%rcx),%ymm4");
+	asm volatile("vpbroadcastmb2q %k6,%zmm30");
+
+	/* AVX-512: Op code 0f 38 2c */
+
+	asm volatile("vmaskmovps (%rcx),%ymm4,%ymm6");
+	asm volatile("vscalefps %zmm24,%zmm25,%zmm26");
+	asm volatile("vscalefpd %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 2d */
+
+	asm volatile("vmaskmovpd (%rcx),%ymm4,%ymm6");
+	asm volatile("vscalefss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vscalefsd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 30 */
+
+	asm volatile("vpmovzxbw %xmm4,%ymm4");
+	asm volatile("vpmovwb %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 31 */
+
+	asm volatile("vpmovzxbd %xmm4,%ymm6");
+	asm volatile("vpmovdb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 32 */
+
+	asm volatile("vpmovzxbq %xmm4,%ymm4");
+	asm volatile("vpmovqb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 33 */
+
+	asm volatile("vpmovzxwd %xmm4,%ymm4");
+	asm volatile("vpmovdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 34 */
+
+	asm volatile("vpmovzxwq %xmm4,%ymm6");
+	asm volatile("vpmovqw %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 35 */
+
+	asm volatile("vpmovzxdq %xmm4,%ymm4");
+	asm volatile("vpmovqd %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermd %ymm24,%ymm26,%ymm22{%k7}");
+	asm volatile("vpermq %ymm24,%ymm26,%ymm22{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2d %k5,%zmm28");
+	asm volatile("vpmovm2q %k5,%zmm28");
+
+	/* AVX-512: Op code 0f 38 39 */
+
+	asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+	asm volatile("vpminsd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpminsq %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmovd2m %zmm28,%k5");
+	asm volatile("vpmovq2m %zmm28,%k5");
+
+	/* AVX-512: Op code 0f 38 3a */
+
+	asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+	asm volatile("vpbroadcastmw2d %k6,%zmm28");
+
+	/* AVX-512: Op code 0f 38 3b */
+
+	asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpminud %zmm24,%zmm25,%zmm26");
+	asm volatile("vpminuq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 3d */
+
+	asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxsd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmaxsq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 3f */
+
+	asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxud %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmaxuq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmulld %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmullq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vgetexpps %zmm25,%zmm26");
+	asm volatile("vgetexppd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 43 */
+
+	asm volatile("vgetexpss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vgetexpsd %xmm28,%xmm29,%xmm30{%k7}");
+
+	/* AVX-512: Op code 0f 38 44 */
+
+	asm volatile("vplzcntd %zmm27,%zmm28");
+	asm volatile("vplzcntq %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 46 */
+
+	asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpsravd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpsravq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 4c */
+
+	asm volatile("vrcp14ps %zmm25,%zmm26");
+	asm volatile("vrcp14pd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 4d */
+
+	asm volatile("vrcp14ss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vrcp14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 4e */
+
+	asm volatile("vrsqrt14ps %zmm25,%zmm26");
+	asm volatile("vrsqrt14pd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 4f */
+
+	asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 59 */
+
+	asm volatile("vpbroadcastq %xmm4,%xmm6");
+	asm volatile("vbroadcasti32x2 %xmm27,%zmm26");
+
+	/* AVX-512: Op code 0f 38 5a */
+
+	asm volatile("vbroadcasti128 (%rcx),%ymm4");
+	asm volatile("vbroadcasti32x4 (%rcx),%zmm26");
+	asm volatile("vbroadcasti64x2 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 5b */
+
+	asm volatile("vbroadcasti32x8 (%rcx),%zmm28");
+	asm volatile("vbroadcasti64x4 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 64 */
+
+	asm volatile("vpblendmd %zmm26,%zmm27,%zmm28");
+	asm volatile("vpblendmq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 65 */
+
+	asm volatile("vblendmps %zmm24,%zmm25,%zmm26");
+	asm volatile("vblendmpd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 66 */
+
+	asm volatile("vpblendmb %zmm26,%zmm27,%zmm28");
+	asm volatile("vpblendmw %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 75 */
+
+	asm volatile("vpermi2b %zmm24,%zmm25,%zmm26");
+	asm volatile("vpermi2w %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 76 */
+
+	asm volatile("vpermi2d %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermi2q %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 77 */
+
+	asm volatile("vpermi2ps %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermi2pd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7a */
+
+	asm volatile("vpbroadcastb %eax,%xmm30");
+
+	/* AVX-512: Op code 0f 38 7b */
+
+	asm volatile("vpbroadcastw %eax,%xmm30");
+
+	/* AVX-512: Op code 0f 38 7c */
+
+	asm volatile("vpbroadcastd %eax,%xmm30");
+	asm volatile("vpbroadcastq %rax,%zmm30");
+
+	/* AVX-512: Op code 0f 38 7d */
+
+	asm volatile("vpermt2b %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2w %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7e */
+
+	asm volatile("vpermt2d %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2q %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7f */
+
+	asm volatile("vpermt2ps %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2pd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 83 */
+
+	asm volatile("vpmultishiftqb %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 88 */
+
+	asm volatile("vexpandps (%rcx),%zmm26");
+	asm volatile("vexpandpd (%rcx),%zmm28");
+
+	/* AVX-512: Op code 0f 38 89 */
+
+	asm volatile("vpexpandd (%rcx),%zmm28");
+	asm volatile("vpexpandq (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 8a */
+
+	asm volatile("vcompressps %zmm28,(%rcx)");
+	asm volatile("vcompresspd %zmm28,(%rcx)");
+
+	/* AVX-512: Op code 0f 38 8b */
+
+	asm volatile("vpcompressd %zmm28,(%rcx)");
+	asm volatile("vpcompressq %zmm26,(%rcx)");
+
+	/* AVX-512: Op code 0f 38 8d */
+
+	asm volatile("vpermb %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermw %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 90 */
+
+	asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdq %xmm2,0x04(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+	asm volatile("vpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}");
+
+	/* AVX-512: Op code 0f 38 91 */
+
+	asm volatile("vpgatherqd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqq %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}");
+	asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+
+	/* AVX-512: Op code 0f 38 a0 */
+
+	asm volatile("vpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a1 */
+
+	asm volatile("vpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a2 */
+
+	asm volatile("vscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a3 */
+
+	asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 b4 */
+
+	asm volatile("vpmadd52luq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 b5 */
+
+	asm volatile("vpmadd52huq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 c4 */
+
+	asm volatile("vpconflictd %zmm26,%zmm27");
+	asm volatile("vpconflictq %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 c8 */
+
+	asm volatile("vexp2ps %zmm29,%zmm30");
+	asm volatile("vexp2pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 ca */
+
+	asm volatile("vrcp28ps %zmm29,%zmm30");
+	asm volatile("vrcp28pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 cb */
+
+	asm volatile("vrcp28ss %xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vrcp28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 38 cc */
+
+	asm volatile("vrsqrt28ps %zmm29,%zmm30");
+	asm volatile("vrsqrt28pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 cd */
+
+	asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 03 */
+
+	asm volatile("valignd $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("valignq $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 08 */
+
+	asm volatile("vroundps $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscaleps $0x12,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 3a 09 */
+
+	asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscalepd $0x12,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 0b */
+
+	asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 18 */
+
+	asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+	asm volatile("vinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 19 */
+
+	asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+	asm volatile("vextractf32x4 $0x12,%zmm25,%xmm26{%k7}");
+	asm volatile("vextractf64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+	asm volatile("vinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1b */
+
+	asm volatile("vextractf32x8 $0x12,%zmm29,%ymm30{%k7}");
+	asm volatile("vextractf64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1e */
+
+	asm volatile("vpcmpud $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpuq $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 1f */
+
+	asm volatile("vpcmpd $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpq $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 23 */
+
+	asm volatile("vshuff32x4 $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vshuff64x2 $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 25 */
+
+	asm volatile("vpternlogd $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vpternlogq $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 26 */
+
+	asm volatile("vgetmantps $0x12,%zmm26,%zmm27");
+	asm volatile("vgetmantpd $0x12,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 27 */
+
+	asm volatile("vgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+	asm volatile("vgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+
+	/* AVX-512: Op code 0f 3a 38 */
+
+	asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+	asm volatile("vinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 39 */
+
+	asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+	asm volatile("vextracti32x4 $0x12,%zmm25,%xmm26{%k7}");
+	asm volatile("vextracti64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3a */
+
+	asm volatile("vinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+	asm volatile("vinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3b */
+
+	asm volatile("vextracti32x8 $0x12,%zmm29,%ymm30{%k7}");
+	asm volatile("vextracti64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3e */
+
+	asm volatile("vpcmpub $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpuw $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 3f */
+
+	asm volatile("vpcmpb $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpw $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+	asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27");
+	asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 50 */
+
+	asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27");
+	asm volatile("vrangepd $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 51 */
+
+	asm volatile("vrangess $0x12,%xmm25,%xmm26,%xmm27");
+	asm volatile("vrangesd $0x12,%xmm28,%xmm29,%xmm30");
+
+	/* AVX-512: Op code 0f 3a 54 */
+
+	asm volatile("vfixupimmps $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vfixupimmpd $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 55 */
+
+	asm volatile("vfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 56 */
+
+	asm volatile("vreduceps $0x12,%zmm26,%zmm27");
+	asm volatile("vreducepd $0x12,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 57 */
+
+	asm volatile("vreducess $0x12,%xmm25,%xmm26,%xmm27");
+	asm volatile("vreducesd $0x12,%xmm28,%xmm29,%xmm30");
+
+	/* AVX-512: Op code 0f 3a 66 */
+
+	asm volatile("vfpclassps $0x12,%zmm27,%k5");
+	asm volatile("vfpclasspd $0x12,%zmm30,%k5");
+
+	/* AVX-512: Op code 0f 3a 67 */
+
+	asm volatile("vfpclassss $0x12,%xmm27,%k5");
+	asm volatile("vfpclasssd $0x12,%xmm30,%k5");
+
+	/* AVX-512: Op code 0f 72 (Grp13) */
+
+	asm volatile("vprord $0x12,%zmm25,%zmm26");
+	asm volatile("vprorq $0x12,%zmm25,%zmm26");
+	asm volatile("vprold $0x12,%zmm29,%zmm30");
+	asm volatile("vprolq $0x12,%zmm29,%zmm30");
+	asm volatile("psrad  $0x2,%mm6");
+	asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+	asm volatile("vpsrad $0x5,%zmm26,%zmm22");
+	asm volatile("vpsraq $0x5,%zmm26,%zmm22");
+
+	/* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+	asm volatile("vgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+	asm volatile("vgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+
+	/* AVX-512: Examples */
+
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}");
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}{z}");
+	asm volatile("vaddpd {rn-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {ru-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {rd-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {rz-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd (%rcx),%zmm29,%zmm30");
+	asm volatile("vaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30");
+	asm volatile("vaddpd (%rcx){1to8},%zmm29,%zmm30");
+	asm volatile("vaddpd 0x1fc0(%rdx),%zmm29,%zmm30");
+	asm volatile("vaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30");
+	asm volatile("vcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5");
+	asm volatile("vcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}");
+	asm volatile("vcmplesd {sae},%xmm28,%xmm29,%k5{%k7}");
+	asm volatile("vgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}");
+
+	/* bndmk m64, bnd */
+
+	asm volatile("bndmk (%rax), %bnd0");
+	asm volatile("bndmk (%r8), %bnd0");
+	asm volatile("bndmk (0x12345678), %bnd0");
+	asm volatile("bndmk (%rax), %bnd3");
+	asm volatile("bndmk (%rcx,%rax,1), %bnd0");
+	asm volatile("bndmk 0x12345678(,%rax,1), %bnd0");
+	asm volatile("bndmk (%rax,%rcx,1), %bnd0");
+	asm volatile("bndmk (%rax,%rcx,8), %bnd0");
+	asm volatile("bndmk 0x12(%rax), %bnd0");
+	asm volatile("bndmk 0x12(%rbp), %bnd0");
+	asm volatile("bndmk 0x12(%rcx,%rax,1), %bnd0");
+	asm volatile("bndmk 0x12(%rbp,%rax,1), %bnd0");
+	asm volatile("bndmk 0x12(%rax,%rcx,1), %bnd0");
+	asm volatile("bndmk 0x12(%rax,%rcx,8), %bnd0");
+	asm volatile("bndmk 0x12345678(%rax), %bnd0");
+	asm volatile("bndmk 0x12345678(%rbp), %bnd0");
+	asm volatile("bndmk 0x12345678(%rcx,%rax,1), %bnd0");
+	asm volatile("bndmk 0x12345678(%rbp,%rax,1), %bnd0");
+	asm volatile("bndmk 0x12345678(%rax,%rcx,1), %bnd0");
+	asm volatile("bndmk 0x12345678(%rax,%rcx,8), %bnd0");
+
+	/* bndcl r/m64, bnd */
+
+	asm volatile("bndcl (%rax), %bnd0");
+	asm volatile("bndcl (%r8), %bnd0");
+	asm volatile("bndcl (0x12345678), %bnd0");
+	asm volatile("bndcl (%rax), %bnd3");
+	asm volatile("bndcl (%rcx,%rax,1), %bnd0");
+	asm volatile("bndcl 0x12345678(,%rax,1), %bnd0");
+	asm volatile("bndcl (%rax,%rcx,1), %bnd0");
+	asm volatile("bndcl (%rax,%rcx,8), %bnd0");
+	asm volatile("bndcl 0x12(%rax), %bnd0");
+	asm volatile("bndcl 0x12(%rbp), %bnd0");
+	asm volatile("bndcl 0x12(%rcx,%rax,1), %bnd0");
+	asm volatile("bndcl 0x12(%rbp,%rax,1), %bnd0");
+	asm volatile("bndcl 0x12(%rax,%rcx,1), %bnd0");
+	asm volatile("bndcl 0x12(%rax,%rcx,8), %bnd0");
+	asm volatile("bndcl 0x12345678(%rax), %bnd0");
+	asm volatile("bndcl 0x12345678(%rbp), %bnd0");
+	asm volatile("bndcl 0x12345678(%rcx,%rax,1), %bnd0");
+	asm volatile("bndcl 0x12345678(%rbp,%rax,1), %bnd0");
+	asm volatile("bndcl 0x12345678(%rax,%rcx,1), %bnd0");
+	asm volatile("bndcl 0x12345678(%rax,%rcx,8), %bnd0");
+	asm volatile("bndcl %rax, %bnd0");
+
+	/* bndcu r/m64, bnd */
+
+	asm volatile("bndcu (%rax), %bnd0");
+	asm volatile("bndcu (%r8), %bnd0");
+	asm volatile("bndcu (0x12345678), %bnd0");
+	asm volatile("bndcu (%rax), %bnd3");
+	asm volatile("bndcu (%rcx,%rax,1), %bnd0");
+	asm volatile("bndcu 0x12345678(,%rax,1), %bnd0");
+	asm volatile("bndcu (%rax,%rcx,1), %bnd0");
+	asm volatile("bndcu (%rax,%rcx,8), %bnd0");
+	asm volatile("bndcu 0x12(%rax), %bnd0");
+	asm volatile("bndcu 0x12(%rbp), %bnd0");
+	asm volatile("bndcu 0x12(%rcx,%rax,1), %bnd0");
+	asm volatile("bndcu 0x12(%rbp,%rax,1), %bnd0");
+	asm volatile("bndcu 0x12(%rax,%rcx,1), %bnd0");
+	asm volatile("bndcu 0x12(%rax,%rcx,8), %bnd0");
+	asm volatile("bndcu 0x12345678(%rax), %bnd0");
+	asm volatile("bndcu 0x12345678(%rbp), %bnd0");
+	asm volatile("bndcu 0x12345678(%rcx,%rax,1), %bnd0");
+	asm volatile("bndcu 0x12345678(%rbp,%rax,1), %bnd0");
+	asm volatile("bndcu 0x12345678(%rax,%rcx,1), %bnd0");
+	asm volatile("bndcu 0x12345678(%rax,%rcx,8), %bnd0");
+	asm volatile("bndcu %rax, %bnd0");
+
+	/* bndcn r/m64, bnd */
+
+	asm volatile("bndcn (%rax), %bnd0");
+	asm volatile("bndcn (%r8), %bnd0");
+	asm volatile("bndcn (0x12345678), %bnd0");
+	asm volatile("bndcn (%rax), %bnd3");
+	asm volatile("bndcn (%rcx,%rax,1), %bnd0");
+	asm volatile("bndcn 0x12345678(,%rax,1), %bnd0");
+	asm volatile("bndcn (%rax,%rcx,1), %bnd0");
+	asm volatile("bndcn (%rax,%rcx,8), %bnd0");
+	asm volatile("bndcn 0x12(%rax), %bnd0");
+	asm volatile("bndcn 0x12(%rbp), %bnd0");
+	asm volatile("bndcn 0x12(%rcx,%rax,1), %bnd0");
+	asm volatile("bndcn 0x12(%rbp,%rax,1), %bnd0");
+	asm volatile("bndcn 0x12(%rax,%rcx,1), %bnd0");
+	asm volatile("bndcn 0x12(%rax,%rcx,8), %bnd0");
+	asm volatile("bndcn 0x12345678(%rax), %bnd0");
+	asm volatile("bndcn 0x12345678(%rbp), %bnd0");
+	asm volatile("bndcn 0x12345678(%rcx,%rax,1), %bnd0");
+	asm volatile("bndcn 0x12345678(%rbp,%rax,1), %bnd0");
+	asm volatile("bndcn 0x12345678(%rax,%rcx,1), %bnd0");
+	asm volatile("bndcn 0x12345678(%rax,%rcx,8), %bnd0");
+	asm volatile("bndcn %rax, %bnd0");
+
+	/* bndmov m128, bnd */
+
+	asm volatile("bndmov (%rax), %bnd0");
+	asm volatile("bndmov (%r8), %bnd0");
+	asm volatile("bndmov (0x12345678), %bnd0");
+	asm volatile("bndmov (%rax), %bnd3");
+	asm volatile("bndmov (%rcx,%rax,1), %bnd0");
+	asm volatile("bndmov 0x12345678(,%rax,1), %bnd0");
+	asm volatile("bndmov (%rax,%rcx,1), %bnd0");
+	asm volatile("bndmov (%rax,%rcx,8), %bnd0");
+	asm volatile("bndmov 0x12(%rax), %bnd0");
+	asm volatile("bndmov 0x12(%rbp), %bnd0");
+	asm volatile("bndmov 0x12(%rcx,%rax,1), %bnd0");
+	asm volatile("bndmov 0x12(%rbp,%rax,1), %bnd0");
+	asm volatile("bndmov 0x12(%rax,%rcx,1), %bnd0");
+	asm volatile("bndmov 0x12(%rax,%rcx,8), %bnd0");
+	asm volatile("bndmov 0x12345678(%rax), %bnd0");
+	asm volatile("bndmov 0x12345678(%rbp), %bnd0");
+	asm volatile("bndmov 0x12345678(%rcx,%rax,1), %bnd0");
+	asm volatile("bndmov 0x12345678(%rbp,%rax,1), %bnd0");
+	asm volatile("bndmov 0x12345678(%rax,%rcx,1), %bnd0");
+	asm volatile("bndmov 0x12345678(%rax,%rcx,8), %bnd0");
+
+	/* bndmov bnd, m128 */
+
+	asm volatile("bndmov %bnd0, (%rax)");
+	asm volatile("bndmov %bnd0, (%r8)");
+	asm volatile("bndmov %bnd0, (0x12345678)");
+	asm volatile("bndmov %bnd3, (%rax)");
+	asm volatile("bndmov %bnd0, (%rcx,%rax,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(,%rax,1)");
+	asm volatile("bndmov %bnd0, (%rax,%rcx,1)");
+	asm volatile("bndmov %bnd0, (%rax,%rcx,8)");
+	asm volatile("bndmov %bnd0, 0x12(%rax)");
+	asm volatile("bndmov %bnd0, 0x12(%rbp)");
+	asm volatile("bndmov %bnd0, 0x12(%rcx,%rax,1)");
+	asm volatile("bndmov %bnd0, 0x12(%rbp,%rax,1)");
+	asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,1)");
+	asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,8)");
+	asm volatile("bndmov %bnd0, 0x12345678(%rax)");
+	asm volatile("bndmov %bnd0, 0x12345678(%rbp)");
+	asm volatile("bndmov %bnd0, 0x12345678(%rcx,%rax,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(%rbp,%rax,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,8)");
+
+	/* bndmov bnd2, bnd1 */
+
+	asm volatile("bndmov %bnd0, %bnd1");
+	asm volatile("bndmov %bnd1, %bnd0");
+
+	/* bndldx mib, bnd */
+
+	asm volatile("bndldx (%rax), %bnd0");
+	asm volatile("bndldx (%r8), %bnd0");
+	asm volatile("bndldx (0x12345678), %bnd0");
+	asm volatile("bndldx (%rax), %bnd3");
+	asm volatile("bndldx (%rcx,%rax,1), %bnd0");
+	asm volatile("bndldx 0x12345678(,%rax,1), %bnd0");
+	asm volatile("bndldx (%rax,%rcx,1), %bnd0");
+	asm volatile("bndldx 0x12(%rax), %bnd0");
+	asm volatile("bndldx 0x12(%rbp), %bnd0");
+	asm volatile("bndldx 0x12(%rcx,%rax,1), %bnd0");
+	asm volatile("bndldx 0x12(%rbp,%rax,1), %bnd0");
+	asm volatile("bndldx 0x12(%rax,%rcx,1), %bnd0");
+	asm volatile("bndldx 0x12345678(%rax), %bnd0");
+	asm volatile("bndldx 0x12345678(%rbp), %bnd0");
+	asm volatile("bndldx 0x12345678(%rcx,%rax,1), %bnd0");
+	asm volatile("bndldx 0x12345678(%rbp,%rax,1), %bnd0");
+	asm volatile("bndldx 0x12345678(%rax,%rcx,1), %bnd0");
+
+	/* bndstx bnd, mib */
+
+	asm volatile("bndstx %bnd0, (%rax)");
+	asm volatile("bndstx %bnd0, (%r8)");
+	asm volatile("bndstx %bnd0, (0x12345678)");
+	asm volatile("bndstx %bnd3, (%rax)");
+	asm volatile("bndstx %bnd0, (%rcx,%rax,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(,%rax,1)");
+	asm volatile("bndstx %bnd0, (%rax,%rcx,1)");
+	asm volatile("bndstx %bnd0, 0x12(%rax)");
+	asm volatile("bndstx %bnd0, 0x12(%rbp)");
+	asm volatile("bndstx %bnd0, 0x12(%rcx,%rax,1)");
+	asm volatile("bndstx %bnd0, 0x12(%rbp,%rax,1)");
+	asm volatile("bndstx %bnd0, 0x12(%rax,%rcx,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(%rax)");
+	asm volatile("bndstx %bnd0, 0x12345678(%rbp)");
+	asm volatile("bndstx %bnd0, 0x12345678(%rcx,%rax,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(%rbp,%rax,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(%rax,%rcx,1)");
+
+	/* bnd prefix on call, ret, jmp and all jcc */
+
+	asm volatile("bnd call label1");  /* Expecting: call unconditional 0 */
+	asm volatile("bnd call *(%eax)"); /* Expecting: call indirect      0 */
+	asm volatile("bnd ret");          /* Expecting: ret  indirect      0 */
+	asm volatile("bnd jmp label1");   /* Expecting: jmp  unconditional 0 */
+	asm volatile("bnd jmp label1");   /* Expecting: jmp  unconditional 0 */
+	asm volatile("bnd jmp *(%ecx)");  /* Expecting: jmp  indirect      0 */
+	asm volatile("bnd jne label1");   /* Expecting: jcc  conditional   0 */
+
+	/* sha1rnds4 imm8, xmm2/m128, xmm1 */
+
+	asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0");
+	asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2");
+	asm volatile("sha1rnds4 $0x91, %xmm8, %xmm0");
+	asm volatile("sha1rnds4 $0x91, %xmm7, %xmm8");
+	asm volatile("sha1rnds4 $0x91, %xmm15, %xmm8");
+	asm volatile("sha1rnds4 $0x91, (%rax), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%r8), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%rax), %xmm3");
+	asm volatile("sha1rnds4 $0x91, (%rcx,%rax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(,%rax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%rax,%rcx,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%rax,%rcx,8), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%rax), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%rbp), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rax), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* sha1nexte xmm2/m128, xmm1 */
+
+	asm volatile("sha1nexte %xmm1, %xmm0");
+	asm volatile("sha1nexte %xmm7, %xmm2");
+	asm volatile("sha1nexte %xmm8, %xmm0");
+	asm volatile("sha1nexte %xmm7, %xmm8");
+	asm volatile("sha1nexte %xmm15, %xmm8");
+	asm volatile("sha1nexte (%rax), %xmm0");
+	asm volatile("sha1nexte (%r8), %xmm0");
+	asm volatile("sha1nexte (0x12345678), %xmm0");
+	asm volatile("sha1nexte (%rax), %xmm3");
+	asm volatile("sha1nexte (%rcx,%rax,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(,%rax,1), %xmm0");
+	asm volatile("sha1nexte (%rax,%rcx,1), %xmm0");
+	asm volatile("sha1nexte (%rax,%rcx,8), %xmm0");
+	asm volatile("sha1nexte 0x12(%rax), %xmm0");
+	asm volatile("sha1nexte 0x12(%rbp), %xmm0");
+	asm volatile("sha1nexte 0x12(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1nexte 0x12(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1nexte 0x12(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1nexte 0x12(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rax), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rbp), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* sha1msg1 xmm2/m128, xmm1 */
+
+	asm volatile("sha1msg1 %xmm1, %xmm0");
+	asm volatile("sha1msg1 %xmm7, %xmm2");
+	asm volatile("sha1msg1 %xmm8, %xmm0");
+	asm volatile("sha1msg1 %xmm7, %xmm8");
+	asm volatile("sha1msg1 %xmm15, %xmm8");
+	asm volatile("sha1msg1 (%rax), %xmm0");
+	asm volatile("sha1msg1 (%r8), %xmm0");
+	asm volatile("sha1msg1 (0x12345678), %xmm0");
+	asm volatile("sha1msg1 (%rax), %xmm3");
+	asm volatile("sha1msg1 (%rcx,%rax,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(,%rax,1), %xmm0");
+	asm volatile("sha1msg1 (%rax,%rcx,1), %xmm0");
+	asm volatile("sha1msg1 (%rax,%rcx,8), %xmm0");
+	asm volatile("sha1msg1 0x12(%rax), %xmm0");
+	asm volatile("sha1msg1 0x12(%rbp), %xmm0");
+	asm volatile("sha1msg1 0x12(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1msg1 0x12(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1msg1 0x12(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1msg1 0x12(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rax), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rbp), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* sha1msg2 xmm2/m128, xmm1 */
+
+	asm volatile("sha1msg2 %xmm1, %xmm0");
+	asm volatile("sha1msg2 %xmm7, %xmm2");
+	asm volatile("sha1msg2 %xmm8, %xmm0");
+	asm volatile("sha1msg2 %xmm7, %xmm8");
+	asm volatile("sha1msg2 %xmm15, %xmm8");
+	asm volatile("sha1msg2 (%rax), %xmm0");
+	asm volatile("sha1msg2 (%r8), %xmm0");
+	asm volatile("sha1msg2 (0x12345678), %xmm0");
+	asm volatile("sha1msg2 (%rax), %xmm3");
+	asm volatile("sha1msg2 (%rcx,%rax,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(,%rax,1), %xmm0");
+	asm volatile("sha1msg2 (%rax,%rcx,1), %xmm0");
+	asm volatile("sha1msg2 (%rax,%rcx,8), %xmm0");
+	asm volatile("sha1msg2 0x12(%rax), %xmm0");
+	asm volatile("sha1msg2 0x12(%rbp), %xmm0");
+	asm volatile("sha1msg2 0x12(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1msg2 0x12(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1msg2 0x12(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1msg2 0x12(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rax), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rbp), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rcx,%rax,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rbp,%rax,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rax,%rcx,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */
+	/* Note sha256rnds2 has an implicit operand 'xmm0' */
+
+	asm volatile("sha256rnds2 %xmm4, %xmm1");
+	asm volatile("sha256rnds2 %xmm7, %xmm2");
+	asm volatile("sha256rnds2 %xmm8, %xmm1");
+	asm volatile("sha256rnds2 %xmm7, %xmm8");
+	asm volatile("sha256rnds2 %xmm15, %xmm8");
+	asm volatile("sha256rnds2 (%rax), %xmm1");
+	asm volatile("sha256rnds2 (%r8), %xmm1");
+	asm volatile("sha256rnds2 (0x12345678), %xmm1");
+	asm volatile("sha256rnds2 (%rax), %xmm3");
+	asm volatile("sha256rnds2 (%rcx,%rax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(,%rax,1), %xmm1");
+	asm volatile("sha256rnds2 (%rax,%rcx,1), %xmm1");
+	asm volatile("sha256rnds2 (%rax,%rcx,8), %xmm1");
+	asm volatile("sha256rnds2 0x12(%rax), %xmm1");
+	asm volatile("sha256rnds2 0x12(%rbp), %xmm1");
+	asm volatile("sha256rnds2 0x12(%rcx,%rax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12(%rbp,%rax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12(%rax,%rcx,1), %xmm1");
+	asm volatile("sha256rnds2 0x12(%rax,%rcx,8), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rax), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rbp), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rcx,%rax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rbp,%rax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rax,%rcx,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* sha256msg1 xmm2/m128, xmm1 */
+
+	asm volatile("sha256msg1 %xmm1, %xmm0");
+	asm volatile("sha256msg1 %xmm7, %xmm2");
+	asm volatile("sha256msg1 %xmm8, %xmm0");
+	asm volatile("sha256msg1 %xmm7, %xmm8");
+	asm volatile("sha256msg1 %xmm15, %xmm8");
+	asm volatile("sha256msg1 (%rax), %xmm0");
+	asm volatile("sha256msg1 (%r8), %xmm0");
+	asm volatile("sha256msg1 (0x12345678), %xmm0");
+	asm volatile("sha256msg1 (%rax), %xmm3");
+	asm volatile("sha256msg1 (%rcx,%rax,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(,%rax,1), %xmm0");
+	asm volatile("sha256msg1 (%rax,%rcx,1), %xmm0");
+	asm volatile("sha256msg1 (%rax,%rcx,8), %xmm0");
+	asm volatile("sha256msg1 0x12(%rax), %xmm0");
+	asm volatile("sha256msg1 0x12(%rbp), %xmm0");
+	asm volatile("sha256msg1 0x12(%rcx,%rax,1), %xmm0");
+	asm volatile("sha256msg1 0x12(%rbp,%rax,1), %xmm0");
+	asm volatile("sha256msg1 0x12(%rax,%rcx,1), %xmm0");
+	asm volatile("sha256msg1 0x12(%rax,%rcx,8), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rax), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rbp), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rcx,%rax,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rbp,%rax,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rax,%rcx,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* sha256msg2 xmm2/m128, xmm1 */
+
+	asm volatile("sha256msg2 %xmm1, %xmm0");
+	asm volatile("sha256msg2 %xmm7, %xmm2");
+	asm volatile("sha256msg2 %xmm8, %xmm0");
+	asm volatile("sha256msg2 %xmm7, %xmm8");
+	asm volatile("sha256msg2 %xmm15, %xmm8");
+	asm volatile("sha256msg2 (%rax), %xmm0");
+	asm volatile("sha256msg2 (%r8), %xmm0");
+	asm volatile("sha256msg2 (0x12345678), %xmm0");
+	asm volatile("sha256msg2 (%rax), %xmm3");
+	asm volatile("sha256msg2 (%rcx,%rax,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(,%rax,1), %xmm0");
+	asm volatile("sha256msg2 (%rax,%rcx,1), %xmm0");
+	asm volatile("sha256msg2 (%rax,%rcx,8), %xmm0");
+	asm volatile("sha256msg2 0x12(%rax), %xmm0");
+	asm volatile("sha256msg2 0x12(%rbp), %xmm0");
+	asm volatile("sha256msg2 0x12(%rcx,%rax,1), %xmm0");
+	asm volatile("sha256msg2 0x12(%rbp,%rax,1), %xmm0");
+	asm volatile("sha256msg2 0x12(%rax,%rcx,1), %xmm0");
+	asm volatile("sha256msg2 0x12(%rax,%rcx,8), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rax), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rbp), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rcx,%rax,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rbp,%rax,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rax,%rcx,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm15");
+
+	/* clflushopt m8 */
+
+	asm volatile("clflushopt (%rax)");
+	asm volatile("clflushopt (%r8)");
+	asm volatile("clflushopt (0x12345678)");
+	asm volatile("clflushopt 0x12345678(%rax,%rcx,8)");
+	asm volatile("clflushopt 0x12345678(%r8,%rcx,8)");
+	/* Also check instructions in the same group encoding as clflushopt */
+	asm volatile("clflush (%rax)");
+	asm volatile("clflush (%r8)");
+	asm volatile("sfence");
+
+	/* clwb m8 */
+
+	asm volatile("clwb (%rax)");
+	asm volatile("clwb (%r8)");
+	asm volatile("clwb (0x12345678)");
+	asm volatile("clwb 0x12345678(%rax,%rcx,8)");
+	asm volatile("clwb 0x12345678(%r8,%rcx,8)");
+	/* Also check instructions in the same group encoding as clwb */
+	asm volatile("xsaveopt (%rax)");
+	asm volatile("xsaveopt (%r8)");
+	asm volatile("mfence");
+
+	/* xsavec mem */
+
+	asm volatile("xsavec (%rax)");
+	asm volatile("xsavec (%r8)");
+	asm volatile("xsavec (0x12345678)");
+	asm volatile("xsavec 0x12345678(%rax,%rcx,8)");
+	asm volatile("xsavec 0x12345678(%r8,%rcx,8)");
+
+	/* xsaves mem */
+
+	asm volatile("xsaves (%rax)");
+	asm volatile("xsaves (%r8)");
+	asm volatile("xsaves (0x12345678)");
+	asm volatile("xsaves 0x12345678(%rax,%rcx,8)");
+	asm volatile("xsaves 0x12345678(%r8,%rcx,8)");
+
+	/* xrstors mem */
+
+	asm volatile("xrstors (%rax)");
+	asm volatile("xrstors (%r8)");
+	asm volatile("xrstors (0x12345678)");
+	asm volatile("xrstors 0x12345678(%rax,%rcx,8)");
+	asm volatile("xrstors 0x12345678(%r8,%rcx,8)");
+
+	/* ptwrite */
+
+	asm volatile("ptwrite (%rax)");
+	asm volatile("ptwrite (%r8)");
+	asm volatile("ptwrite (0x12345678)");
+	asm volatile("ptwrite 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwrite 0x12345678(%r8,%rcx,8)");
+
+	asm volatile("ptwritel (%rax)");
+	asm volatile("ptwritel (%r8)");
+	asm volatile("ptwritel (0x12345678)");
+	asm volatile("ptwritel 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwritel 0x12345678(%r8,%rcx,8)");
+
+	asm volatile("ptwriteq (%rax)");
+	asm volatile("ptwriteq (%r8)");
+	asm volatile("ptwriteq (0x12345678)");
+	asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)");
+
+#else  /* #ifdef __x86_64__ */
+
+	/* bound r32, mem (same op code as EVEX prefix) */
+
+	asm volatile("bound %eax, 0x12345678(%ecx)");
+	asm volatile("bound %ecx, 0x12345678(%eax)");
+	asm volatile("bound %edx, 0x12345678(%eax)");
+	asm volatile("bound %ebx, 0x12345678(%eax)");
+	asm volatile("bound %esp, 0x12345678(%eax)");
+	asm volatile("bound %ebp, 0x12345678(%eax)");
+	asm volatile("bound %esi, 0x12345678(%eax)");
+	asm volatile("bound %edi, 0x12345678(%eax)");
+	asm volatile("bound %ecx, (%eax)");
+	asm volatile("bound %eax, (0x12345678)");
+	asm volatile("bound %edx, (%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(,%eax,1)");
+	asm volatile("bound %edx, (%eax,%ecx,1)");
+	asm volatile("bound %edx, (%eax,%ecx,8)");
+	asm volatile("bound %edx, 0x12(%eax)");
+	asm volatile("bound %edx, 0x12(%ebp)");
+	asm volatile("bound %edx, 0x12(%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12(%ebp,%eax,1)");
+	asm volatile("bound %edx, 0x12(%eax,%ecx,1)");
+	asm volatile("bound %edx, 0x12(%eax,%ecx,8)");
+	asm volatile("bound %edx, 0x12345678(%eax)");
+	asm volatile("bound %edx, 0x12345678(%ebp)");
+	asm volatile("bound %edx, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bound %edx, 0x12345678(%eax,%ecx,8)");
+
+	/* bound r16, mem (same op code as EVEX prefix) */
+
+	asm volatile("bound %ax, 0x12345678(%ecx)");
+	asm volatile("bound %cx, 0x12345678(%eax)");
+	asm volatile("bound %dx, 0x12345678(%eax)");
+	asm volatile("bound %bx, 0x12345678(%eax)");
+	asm volatile("bound %sp, 0x12345678(%eax)");
+	asm volatile("bound %bp, 0x12345678(%eax)");
+	asm volatile("bound %si, 0x12345678(%eax)");
+	asm volatile("bound %di, 0x12345678(%eax)");
+	asm volatile("bound %cx, (%eax)");
+	asm volatile("bound %ax, (0x12345678)");
+	asm volatile("bound %dx, (%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(,%eax,1)");
+	asm volatile("bound %dx, (%eax,%ecx,1)");
+	asm volatile("bound %dx, (%eax,%ecx,8)");
+	asm volatile("bound %dx, 0x12(%eax)");
+	asm volatile("bound %dx, 0x12(%ebp)");
+	asm volatile("bound %dx, 0x12(%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12(%ebp,%eax,1)");
+	asm volatile("bound %dx, 0x12(%eax,%ecx,1)");
+	asm volatile("bound %dx, 0x12(%eax,%ecx,8)");
+	asm volatile("bound %dx, 0x12345678(%eax)");
+	asm volatile("bound %dx, 0x12345678(%ebp)");
+	asm volatile("bound %dx, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bound %dx, 0x12345678(%eax,%ecx,8)");
+
+	/* AVX-512: Instructions with the same op codes as Mask Instructions  */
+
+	asm volatile("cmovno %eax,%ebx");
+	asm volatile("cmovno 0x12345678(%eax),%ecx");
+	asm volatile("cmovno 0x12345678(%eax),%cx");
+
+	asm volatile("cmove  %eax,%ebx");
+	asm volatile("cmove 0x12345678(%eax),%ecx");
+	asm volatile("cmove 0x12345678(%eax),%cx");
+
+	asm volatile("seto    0x12345678(%eax)");
+	asm volatile("setno   0x12345678(%eax)");
+	asm volatile("setb    0x12345678(%eax)");
+	asm volatile("setc    0x12345678(%eax)");
+	asm volatile("setnae  0x12345678(%eax)");
+	asm volatile("setae   0x12345678(%eax)");
+	asm volatile("setnb   0x12345678(%eax)");
+	asm volatile("setnc   0x12345678(%eax)");
+	asm volatile("sets    0x12345678(%eax)");
+	asm volatile("setns   0x12345678(%eax)");
+
+	/* AVX-512: Mask Instructions */
+
+	asm volatile("kandw  %k7,%k6,%k5");
+	asm volatile("kandq  %k7,%k6,%k5");
+	asm volatile("kandb  %k7,%k6,%k5");
+	asm volatile("kandd  %k7,%k6,%k5");
+
+	asm volatile("kandnw  %k7,%k6,%k5");
+	asm volatile("kandnq  %k7,%k6,%k5");
+	asm volatile("kandnb  %k7,%k6,%k5");
+	asm volatile("kandnd  %k7,%k6,%k5");
+
+	asm volatile("knotw  %k7,%k6");
+	asm volatile("knotq  %k7,%k6");
+	asm volatile("knotb  %k7,%k6");
+	asm volatile("knotd  %k7,%k6");
+
+	asm volatile("korw  %k7,%k6,%k5");
+	asm volatile("korq  %k7,%k6,%k5");
+	asm volatile("korb  %k7,%k6,%k5");
+	asm volatile("kord  %k7,%k6,%k5");
+
+	asm volatile("kxnorw  %k7,%k6,%k5");
+	asm volatile("kxnorq  %k7,%k6,%k5");
+	asm volatile("kxnorb  %k7,%k6,%k5");
+	asm volatile("kxnord  %k7,%k6,%k5");
+
+	asm volatile("kxorw  %k7,%k6,%k5");
+	asm volatile("kxorq  %k7,%k6,%k5");
+	asm volatile("kxorb  %k7,%k6,%k5");
+	asm volatile("kxord  %k7,%k6,%k5");
+
+	asm volatile("kaddw  %k7,%k6,%k5");
+	asm volatile("kaddq  %k7,%k6,%k5");
+	asm volatile("kaddb  %k7,%k6,%k5");
+	asm volatile("kaddd  %k7,%k6,%k5");
+
+	asm volatile("kunpckbw %k7,%k6,%k5");
+	asm volatile("kunpckwd %k7,%k6,%k5");
+	asm volatile("kunpckdq %k7,%k6,%k5");
+
+	asm volatile("kmovw  %k6,%k5");
+	asm volatile("kmovw  (%ecx),%k5");
+	asm volatile("kmovw  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovw  %k5,(%ecx)");
+	asm volatile("kmovw  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovw  %eax,%k5");
+	asm volatile("kmovw  %ebp,%k5");
+	asm volatile("kmovw  %k5,%eax");
+	asm volatile("kmovw  %k5,%ebp");
+
+	asm volatile("kmovq  %k6,%k5");
+	asm volatile("kmovq  (%ecx),%k5");
+	asm volatile("kmovq  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovq  %k5,(%ecx)");
+	asm volatile("kmovq  %k5,0x123(%eax,%ecx,8)");
+
+	asm volatile("kmovb  %k6,%k5");
+	asm volatile("kmovb  (%ecx),%k5");
+	asm volatile("kmovb  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovb  %k5,(%ecx)");
+	asm volatile("kmovb  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovb  %eax,%k5");
+	asm volatile("kmovb  %ebp,%k5");
+	asm volatile("kmovb  %k5,%eax");
+	asm volatile("kmovb  %k5,%ebp");
+
+	asm volatile("kmovd  %k6,%k5");
+	asm volatile("kmovd  (%ecx),%k5");
+	asm volatile("kmovd  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovd  %k5,(%ecx)");
+	asm volatile("kmovd  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovd  %eax,%k5");
+	asm volatile("kmovd  %ebp,%k5");
+	asm volatile("kmovd  %k5,%eax");
+	asm volatile("kmovd  %k5,%ebp");
+
+	asm volatile("kortestw %k6,%k5");
+	asm volatile("kortestq %k6,%k5");
+	asm volatile("kortestb %k6,%k5");
+	asm volatile("kortestd %k6,%k5");
+
+	asm volatile("ktestw %k6,%k5");
+	asm volatile("ktestq %k6,%k5");
+	asm volatile("ktestb %k6,%k5");
+	asm volatile("ktestd %k6,%k5");
+
+	asm volatile("kshiftrw $0x12,%k6,%k5");
+	asm volatile("kshiftrq $0x5b,%k6,%k5");
+	asm volatile("kshiftlw $0x12,%k6,%k5");
+	asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+	/* AVX-512: Op code 0f 5b */
+	asm volatile("vcvtdq2ps %xmm5,%xmm6");
+	asm volatile("vcvtqq2ps %zmm5,%ymm6{%k7}");
+	asm volatile("vcvtps2dq %xmm5,%xmm6");
+	asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+	/* AVX-512: Op code 0f 6f */
+
+	asm volatile("movq   %mm0,%mm4");
+	asm volatile("vmovdqa %ymm4,%ymm6");
+	asm volatile("vmovdqa32 %zmm5,%zmm6");
+	asm volatile("vmovdqa64 %zmm5,%zmm6");
+	asm volatile("vmovdqu %ymm4,%ymm6");
+	asm volatile("vmovdqu32 %zmm5,%zmm6");
+	asm volatile("vmovdqu64 %zmm5,%zmm6");
+	asm volatile("vmovdqu8 %zmm5,%zmm6");
+	asm volatile("vmovdqu16 %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 78 */
+
+	asm volatile("vmread %eax,%ebx");
+	asm volatile("vcvttps2udq %zmm5,%zmm6");
+	asm volatile("vcvttpd2udq %zmm5,%ymm6{%k7}");
+	asm volatile("vcvttsd2usi %xmm6,%eax");
+	asm volatile("vcvttss2usi %xmm6,%eax");
+	asm volatile("vcvttps2uqq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvttpd2uqq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 79 */
+
+	asm volatile("vmwrite %eax,%ebx");
+	asm volatile("vcvtps2udq %zmm5,%zmm6");
+	asm volatile("vcvtpd2udq %zmm5,%ymm6{%k7}");
+	asm volatile("vcvtsd2usi %xmm6,%eax");
+	asm volatile("vcvtss2usi %xmm6,%eax");
+	asm volatile("vcvtps2uqq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtpd2uqq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7a */
+
+	asm volatile("vcvtudq2pd %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtuqq2pd %zmm5,%zmm6");
+	asm volatile("vcvtudq2ps %zmm5,%zmm6");
+	asm volatile("vcvtuqq2ps %zmm5,%ymm6{%k7}");
+	asm volatile("vcvttps2qq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvttpd2qq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7b */
+
+	asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+	asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+	asm volatile("vcvtps2qq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtpd2qq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7f */
+
+	asm volatile("movq.s  %mm0,%mm4");
+	asm volatile("vmovdqa.s %ymm5,%ymm6");
+	asm volatile("vmovdqa32.s %zmm5,%zmm6");
+	asm volatile("vmovdqa64.s %zmm5,%zmm6");
+	asm volatile("vmovdqu.s %ymm5,%ymm6");
+	asm volatile("vmovdqu32.s %zmm5,%zmm6");
+	asm volatile("vmovdqu64.s %zmm5,%zmm6");
+	asm volatile("vmovdqu8.s %zmm5,%zmm6");
+	asm volatile("vmovdqu16.s %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f db */
+
+	asm volatile("pand  %mm1,%mm2");
+	asm volatile("pand  %xmm1,%xmm2");
+	asm volatile("vpand  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpandq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f df */
+
+	asm volatile("pandn  %mm1,%mm2");
+	asm volatile("pandn  %xmm1,%xmm2");
+	asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandnd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpandnq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f e6 */
+
+	asm volatile("vcvttpd2dq %xmm1,%xmm2");
+	asm volatile("vcvtdq2pd %xmm5,%xmm6");
+	asm volatile("vcvtdq2pd %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtqq2pd %zmm5,%zmm6");
+	asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+	/* AVX-512: Op code 0f eb */
+
+	asm volatile("por   %mm4,%mm6");
+	asm volatile("vpor   %ymm4,%ymm6,%ymm2");
+	asm volatile("vpord  %zmm4,%zmm5,%zmm6");
+	asm volatile("vporq  %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f ef */
+
+	asm volatile("pxor   %mm4,%mm6");
+	asm volatile("vpxor  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpxord %zmm4,%zmm5,%zmm6");
+	asm volatile("vpxorq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 10 */
+
+	asm volatile("pblendvb %xmm1,%xmm0");
+	asm volatile("vpsrlvw %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmovuswb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 11 */
+
+	asm volatile("vpmovusdb %zmm5,%xmm6{%k7}");
+	asm volatile("vpsravw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 12 */
+
+	asm volatile("vpmovusqb %zmm5,%xmm6{%k7}");
+	asm volatile("vpsllvw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 13 */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+	asm volatile("vcvtph2ps %ymm5,%zmm6{%k7}");
+	asm volatile("vpmovusdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 14 */
+
+	asm volatile("blendvps %xmm1,%xmm0");
+	asm volatile("vpmovusqw %zmm5,%xmm6{%k7}");
+	asm volatile("vprorvd %zmm4,%zmm5,%zmm6");
+	asm volatile("vprorvq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 15 */
+
+	asm volatile("blendvpd %xmm1,%xmm0");
+	asm volatile("vpmovusqd %zmm5,%ymm6{%k7}");
+	asm volatile("vprolvd %zmm4,%zmm5,%zmm6");
+	asm volatile("vprolvq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 16 */
+
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2{%k7}");
+	asm volatile("vpermpd %ymm4,%ymm6,%ymm2{%k7}");
+
+	/* AVX-512: Op code 0f 38 19 */
+
+	asm volatile("vbroadcastsd %xmm4,%ymm6");
+	asm volatile("vbroadcastf32x2 %xmm7,%zmm6");
+
+	/* AVX-512: Op code 0f 38 1a */
+
+	asm volatile("vbroadcastf128 (%ecx),%ymm4");
+	asm volatile("vbroadcastf32x4 (%ecx),%zmm6");
+	asm volatile("vbroadcastf64x2 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 1b */
+
+	asm volatile("vbroadcastf32x8 (%ecx),%zmm6");
+	asm volatile("vbroadcastf64x4 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 1f */
+
+	asm volatile("vpabsq %zmm4,%zmm6");
+
+	/* AVX-512: Op code 0f 38 20 */
+
+	asm volatile("vpmovsxbw %xmm4,%xmm5");
+	asm volatile("vpmovswb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 21 */
+
+	asm volatile("vpmovsxbd %xmm4,%ymm6");
+	asm volatile("vpmovsdb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 22 */
+
+	asm volatile("vpmovsxbq %xmm4,%ymm4");
+	asm volatile("vpmovsqb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 23 */
+
+	asm volatile("vpmovsxwd %xmm4,%ymm4");
+	asm volatile("vpmovsdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 24 */
+
+	asm volatile("vpmovsxwq %xmm4,%ymm6");
+	asm volatile("vpmovsqw %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 25 */
+
+	asm volatile("vpmovsxdq %xmm4,%ymm4");
+	asm volatile("vpmovsqd %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 26 */
+
+	asm volatile("vptestmb %zmm5,%zmm6,%k5");
+	asm volatile("vptestmw %zmm5,%zmm6,%k5");
+	asm volatile("vptestnmb %zmm4,%zmm5,%k5");
+	asm volatile("vptestnmw %zmm4,%zmm5,%k5");
+
+	/* AVX-512: Op code 0f 38 27 */
+
+	asm volatile("vptestmd %zmm5,%zmm6,%k5");
+	asm volatile("vptestmq %zmm5,%zmm6,%k5");
+	asm volatile("vptestnmd %zmm4,%zmm5,%k5");
+	asm volatile("vptestnmq %zmm4,%zmm5,%k5");
+
+	/* AVX-512: Op code 0f 38 28 */
+
+	asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2b %k5,%zmm6");
+	asm volatile("vpmovm2w %k5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 29 */
+
+	asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovb2m %zmm6,%k5");
+	asm volatile("vpmovw2m %zmm6,%k5");
+
+	/* AVX-512: Op code 0f 38 2a */
+
+	asm volatile("vmovntdqa (%ecx),%ymm4");
+	asm volatile("vpbroadcastmb2q %k6,%zmm1");
+
+	/* AVX-512: Op code 0f 38 2c */
+
+	asm volatile("vmaskmovps (%ecx),%ymm4,%ymm6");
+	asm volatile("vscalefps %zmm4,%zmm5,%zmm6");
+	asm volatile("vscalefpd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 2d */
+
+	asm volatile("vmaskmovpd (%ecx),%ymm4,%ymm6");
+	asm volatile("vscalefss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vscalefsd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 30 */
+
+	asm volatile("vpmovzxbw %xmm4,%ymm4");
+	asm volatile("vpmovwb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 31 */
+
+	asm volatile("vpmovzxbd %xmm4,%ymm6");
+	asm volatile("vpmovdb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 32 */
+
+	asm volatile("vpmovzxbq %xmm4,%ymm4");
+	asm volatile("vpmovqb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 33 */
+
+	asm volatile("vpmovzxwd %xmm4,%ymm4");
+	asm volatile("vpmovdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 34 */
+
+	asm volatile("vpmovzxwq %xmm4,%ymm6");
+	asm volatile("vpmovqw %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 35 */
+
+	asm volatile("vpmovzxdq %xmm4,%ymm4");
+	asm volatile("vpmovqd %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 36 */
+
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2{%k7}");
+	asm volatile("vpermq %ymm4,%ymm6,%ymm2{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2d %k5,%zmm6");
+	asm volatile("vpmovm2q %k5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 39 */
+
+	asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+	asm volatile("vpminsd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpminsq %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmovd2m %zmm6,%k5");
+	asm volatile("vpmovq2m %zmm6,%k5");
+
+	/* AVX-512: Op code 0f 38 3a */
+
+	asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+	asm volatile("vpbroadcastmw2d %k6,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3b */
+
+	asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpminud %zmm4,%zmm5,%zmm6");
+	asm volatile("vpminuq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3d */
+
+	asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxsd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmaxsq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3f */
+
+	asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxud %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmaxuq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 40 */
+
+	asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmulld %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmullq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vgetexpps %zmm5,%zmm6");
+	asm volatile("vgetexppd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 43 */
+
+	asm volatile("vgetexpss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vgetexpsd %xmm2,%xmm3,%xmm4{%k7}");
+
+	/* AVX-512: Op code 0f 38 44 */
+
+	asm volatile("vplzcntd %zmm5,%zmm6");
+	asm volatile("vplzcntq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 46 */
+
+	asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpsravd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpsravq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4c */
+
+	asm volatile("vrcp14ps %zmm5,%zmm6");
+	asm volatile("vrcp14pd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4d */
+
+	asm volatile("vrcp14ss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vrcp14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 4e */
+
+	asm volatile("vrsqrt14ps %zmm5,%zmm6");
+	asm volatile("vrsqrt14pd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4f */
+
+	asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 59 */
+
+	asm volatile("vpbroadcastq %xmm4,%xmm6");
+	asm volatile("vbroadcasti32x2 %xmm7,%zmm6");
+
+	/* AVX-512: Op code 0f 38 5a */
+
+	asm volatile("vbroadcasti128 (%ecx),%ymm4");
+	asm volatile("vbroadcasti32x4 (%ecx),%zmm6");
+	asm volatile("vbroadcasti64x2 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 5b */
+
+	asm volatile("vbroadcasti32x8 (%ecx),%zmm6");
+	asm volatile("vbroadcasti64x4 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 64 */
+
+	asm volatile("vpblendmd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpblendmq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 65 */
+
+	asm volatile("vblendmps %zmm4,%zmm5,%zmm6");
+	asm volatile("vblendmpd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 66 */
+
+	asm volatile("vpblendmb %zmm4,%zmm5,%zmm6");
+	asm volatile("vpblendmw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 75 */
+
+	asm volatile("vpermi2b %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2w %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 76 */
+
+	asm volatile("vpermi2d %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2q %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 77 */
+
+	asm volatile("vpermi2ps %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2pd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7a */
+
+	asm volatile("vpbroadcastb %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7b */
+
+	asm volatile("vpbroadcastw %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7c */
+
+	asm volatile("vpbroadcastd %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7d */
+
+	asm volatile("vpermt2b %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2w %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7e */
+
+	asm volatile("vpermt2d %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2q %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7f */
+
+	asm volatile("vpermt2ps %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2pd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 83 */
+
+	asm volatile("vpmultishiftqb %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 88 */
+
+	asm volatile("vexpandps (%ecx),%zmm6");
+	asm volatile("vexpandpd (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 89 */
+
+	asm volatile("vpexpandd (%ecx),%zmm6");
+	asm volatile("vpexpandq (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 8a */
+
+	asm volatile("vcompressps %zmm6,(%ecx)");
+	asm volatile("vcompresspd %zmm6,(%ecx)");
+
+	/* AVX-512: Op code 0f 38 8b */
+
+	asm volatile("vpcompressd %zmm6,(%ecx)");
+	asm volatile("vpcompressq %zmm6,(%ecx)");
+
+	/* AVX-512: Op code 0f 38 8d */
+
+	asm volatile("vpermb %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 90 */
+
+	asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdq %xmm2,0x04(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+	asm volatile("vpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}");
+
+	/* AVX-512: Op code 0f 38 91 */
+
+	asm volatile("vpgatherqd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqq %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}");
+	asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+
+	/* AVX-512: Op code 0f 38 a0 */
+
+	asm volatile("vpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a1 */
+
+	asm volatile("vpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a2 */
+
+	asm volatile("vscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a3 */
+
+	asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 b4 */
+
+	asm volatile("vpmadd52luq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 b5 */
+
+	asm volatile("vpmadd52huq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 c4 */
+
+	asm volatile("vpconflictd %zmm5,%zmm6");
+	asm volatile("vpconflictq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 c8 */
+
+	asm volatile("vexp2ps %zmm6,%zmm7");
+	asm volatile("vexp2pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 ca */
+
+	asm volatile("vrcp28ps %zmm6,%zmm7");
+	asm volatile("vrcp28pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 cb */
+
+	asm volatile("vrcp28ss %xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vrcp28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 38 cc */
+
+	asm volatile("vrsqrt28ps %zmm6,%zmm7");
+	asm volatile("vrsqrt28pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 cd */
+
+	asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 03 */
+
+	asm volatile("valignd $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("valignq $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 08 */
+
+	asm volatile("vroundps $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscaleps $0x12,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 09 */
+
+	asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscalepd $0x12,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 0a */
+
+	asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 0b */
+
+	asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 18 */
+
+	asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 19 */
+
+	asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+	asm volatile("vextractf32x4 $0x12,%zmm5,%xmm6{%k7}");
+	asm volatile("vextractf64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+	asm volatile("vinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1b */
+
+	asm volatile("vextractf32x8 $0x12,%zmm6,%ymm7{%k7}");
+	asm volatile("vextractf64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1e */
+
+	asm volatile("vpcmpud $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpuq $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 1f */
+
+	asm volatile("vpcmpd $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpq $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 23 */
+
+	asm volatile("vshuff32x4 $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vshuff64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 25 */
+
+	asm volatile("vpternlogd $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vpternlogq $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 26 */
+
+	asm volatile("vgetmantps $0x12,%zmm6,%zmm7");
+	asm volatile("vgetmantpd $0x12,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 27 */
+
+	asm volatile("vgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 38 */
+
+	asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 39 */
+
+	asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+	asm volatile("vextracti32x4 $0x12,%zmm5,%xmm6{%k7}");
+	asm volatile("vextracti64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3a */
+
+	asm volatile("vinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+	asm volatile("vinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3b */
+
+	asm volatile("vextracti32x8 $0x12,%zmm6,%ymm7{%k7}");
+	asm volatile("vextracti64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3e */
+
+	asm volatile("vpcmpub $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpuw $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 3f */
+
+	asm volatile("vpcmpb $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpw $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 42 */
+
+	asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+	asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 50 */
+
+	asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vrangepd $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 51 */
+
+	asm volatile("vrangess $0x12,%xmm5,%xmm6,%xmm7");
+	asm volatile("vrangesd $0x12,%xmm5,%xmm6,%xmm7");
+
+	/* AVX-512: Op code 0f 3a 54 */
+
+	asm volatile("vfixupimmps $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vfixupimmpd $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 55 */
+
+	asm volatile("vfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 56 */
+
+	asm volatile("vreduceps $0x12,%zmm6,%zmm7");
+	asm volatile("vreducepd $0x12,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 57 */
+
+	asm volatile("vreducess $0x12,%xmm5,%xmm6,%xmm7");
+	asm volatile("vreducesd $0x12,%xmm5,%xmm6,%xmm7");
+
+	/* AVX-512: Op code 0f 3a 66 */
+
+	asm volatile("vfpclassps $0x12,%zmm7,%k5");
+	asm volatile("vfpclasspd $0x12,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 67 */
+
+	asm volatile("vfpclassss $0x12,%xmm7,%k5");
+	asm volatile("vfpclasssd $0x12,%xmm7,%k5");
+
+	/* AVX-512: Op code 0f 72 (Grp13) */
+
+	asm volatile("vprord $0x12,%zmm5,%zmm6");
+	asm volatile("vprorq $0x12,%zmm5,%zmm6");
+	asm volatile("vprold $0x12,%zmm5,%zmm6");
+	asm volatile("vprolq $0x12,%zmm5,%zmm6");
+	asm volatile("psrad  $0x2,%mm6");
+	asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+	asm volatile("vpsrad $0x5,%zmm6,%zmm2");
+	asm volatile("vpsraq $0x5,%zmm6,%zmm2");
+
+	/* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+	asm volatile("vgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+	asm volatile("vgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+
+	/* AVX-512: Examples */
+
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}{z}");
+	asm volatile("vaddpd {rn-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {ru-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {rd-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {rz-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd (%ecx),%zmm5,%zmm6");
+	asm volatile("vaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6");
+	asm volatile("vaddpd (%ecx){1to8},%zmm5,%zmm6");
+	asm volatile("vaddpd 0x1fc0(%edx),%zmm5,%zmm6");
+	asm volatile("vaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6");
+	asm volatile("vcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5");
+	asm volatile("vcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}");
+	asm volatile("vcmplesd {sae},%xmm4,%xmm5,%k5{%k7}");
+	asm volatile("vgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}");
+
+	/* bndmk m32, bnd */
+
+	asm volatile("bndmk (%eax), %bnd0");
+	asm volatile("bndmk (0x12345678), %bnd0");
+	asm volatile("bndmk (%eax), %bnd3");
+	asm volatile("bndmk (%ecx,%eax,1), %bnd0");
+	asm volatile("bndmk 0x12345678(,%eax,1), %bnd0");
+	asm volatile("bndmk (%eax,%ecx,1), %bnd0");
+	asm volatile("bndmk (%eax,%ecx,8), %bnd0");
+	asm volatile("bndmk 0x12(%eax), %bnd0");
+	asm volatile("bndmk 0x12(%ebp), %bnd0");
+	asm volatile("bndmk 0x12(%ecx,%eax,1), %bnd0");
+	asm volatile("bndmk 0x12(%ebp,%eax,1), %bnd0");
+	asm volatile("bndmk 0x12(%eax,%ecx,1), %bnd0");
+	asm volatile("bndmk 0x12(%eax,%ecx,8), %bnd0");
+	asm volatile("bndmk 0x12345678(%eax), %bnd0");
+	asm volatile("bndmk 0x12345678(%ebp), %bnd0");
+	asm volatile("bndmk 0x12345678(%ecx,%eax,1), %bnd0");
+	asm volatile("bndmk 0x12345678(%ebp,%eax,1), %bnd0");
+	asm volatile("bndmk 0x12345678(%eax,%ecx,1), %bnd0");
+	asm volatile("bndmk 0x12345678(%eax,%ecx,8), %bnd0");
+
+	/* bndcl r/m32, bnd */
+
+	asm volatile("bndcl (%eax), %bnd0");
+	asm volatile("bndcl (0x12345678), %bnd0");
+	asm volatile("bndcl (%eax), %bnd3");
+	asm volatile("bndcl (%ecx,%eax,1), %bnd0");
+	asm volatile("bndcl 0x12345678(,%eax,1), %bnd0");
+	asm volatile("bndcl (%eax,%ecx,1), %bnd0");
+	asm volatile("bndcl (%eax,%ecx,8), %bnd0");
+	asm volatile("bndcl 0x12(%eax), %bnd0");
+	asm volatile("bndcl 0x12(%ebp), %bnd0");
+	asm volatile("bndcl 0x12(%ecx,%eax,1), %bnd0");
+	asm volatile("bndcl 0x12(%ebp,%eax,1), %bnd0");
+	asm volatile("bndcl 0x12(%eax,%ecx,1), %bnd0");
+	asm volatile("bndcl 0x12(%eax,%ecx,8), %bnd0");
+	asm volatile("bndcl 0x12345678(%eax), %bnd0");
+	asm volatile("bndcl 0x12345678(%ebp), %bnd0");
+	asm volatile("bndcl 0x12345678(%ecx,%eax,1), %bnd0");
+	asm volatile("bndcl 0x12345678(%ebp,%eax,1), %bnd0");
+	asm volatile("bndcl 0x12345678(%eax,%ecx,1), %bnd0");
+	asm volatile("bndcl 0x12345678(%eax,%ecx,8), %bnd0");
+	asm volatile("bndcl %eax, %bnd0");
+
+	/* bndcu r/m32, bnd */
+
+	asm volatile("bndcu (%eax), %bnd0");
+	asm volatile("bndcu (0x12345678), %bnd0");
+	asm volatile("bndcu (%eax), %bnd3");
+	asm volatile("bndcu (%ecx,%eax,1), %bnd0");
+	asm volatile("bndcu 0x12345678(,%eax,1), %bnd0");
+	asm volatile("bndcu (%eax,%ecx,1), %bnd0");
+	asm volatile("bndcu (%eax,%ecx,8), %bnd0");
+	asm volatile("bndcu 0x12(%eax), %bnd0");
+	asm volatile("bndcu 0x12(%ebp), %bnd0");
+	asm volatile("bndcu 0x12(%ecx,%eax,1), %bnd0");
+	asm volatile("bndcu 0x12(%ebp,%eax,1), %bnd0");
+	asm volatile("bndcu 0x12(%eax,%ecx,1), %bnd0");
+	asm volatile("bndcu 0x12(%eax,%ecx,8), %bnd0");
+	asm volatile("bndcu 0x12345678(%eax), %bnd0");
+	asm volatile("bndcu 0x12345678(%ebp), %bnd0");
+	asm volatile("bndcu 0x12345678(%ecx,%eax,1), %bnd0");
+	asm volatile("bndcu 0x12345678(%ebp,%eax,1), %bnd0");
+	asm volatile("bndcu 0x12345678(%eax,%ecx,1), %bnd0");
+	asm volatile("bndcu 0x12345678(%eax,%ecx,8), %bnd0");
+	asm volatile("bndcu %eax, %bnd0");
+
+	/* bndcn r/m32, bnd */
+
+	asm volatile("bndcn (%eax), %bnd0");
+	asm volatile("bndcn (0x12345678), %bnd0");
+	asm volatile("bndcn (%eax), %bnd3");
+	asm volatile("bndcn (%ecx,%eax,1), %bnd0");
+	asm volatile("bndcn 0x12345678(,%eax,1), %bnd0");
+	asm volatile("bndcn (%eax,%ecx,1), %bnd0");
+	asm volatile("bndcn (%eax,%ecx,8), %bnd0");
+	asm volatile("bndcn 0x12(%eax), %bnd0");
+	asm volatile("bndcn 0x12(%ebp), %bnd0");
+	asm volatile("bndcn 0x12(%ecx,%eax,1), %bnd0");
+	asm volatile("bndcn 0x12(%ebp,%eax,1), %bnd0");
+	asm volatile("bndcn 0x12(%eax,%ecx,1), %bnd0");
+	asm volatile("bndcn 0x12(%eax,%ecx,8), %bnd0");
+	asm volatile("bndcn 0x12345678(%eax), %bnd0");
+	asm volatile("bndcn 0x12345678(%ebp), %bnd0");
+	asm volatile("bndcn 0x12345678(%ecx,%eax,1), %bnd0");
+	asm volatile("bndcn 0x12345678(%ebp,%eax,1), %bnd0");
+	asm volatile("bndcn 0x12345678(%eax,%ecx,1), %bnd0");
+	asm volatile("bndcn 0x12345678(%eax,%ecx,8), %bnd0");
+	asm volatile("bndcn %eax, %bnd0");
+
+	/* bndmov m64, bnd */
+
+	asm volatile("bndmov (%eax), %bnd0");
+	asm volatile("bndmov (0x12345678), %bnd0");
+	asm volatile("bndmov (%eax), %bnd3");
+	asm volatile("bndmov (%ecx,%eax,1), %bnd0");
+	asm volatile("bndmov 0x12345678(,%eax,1), %bnd0");
+	asm volatile("bndmov (%eax,%ecx,1), %bnd0");
+	asm volatile("bndmov (%eax,%ecx,8), %bnd0");
+	asm volatile("bndmov 0x12(%eax), %bnd0");
+	asm volatile("bndmov 0x12(%ebp), %bnd0");
+	asm volatile("bndmov 0x12(%ecx,%eax,1), %bnd0");
+	asm volatile("bndmov 0x12(%ebp,%eax,1), %bnd0");
+	asm volatile("bndmov 0x12(%eax,%ecx,1), %bnd0");
+	asm volatile("bndmov 0x12(%eax,%ecx,8), %bnd0");
+	asm volatile("bndmov 0x12345678(%eax), %bnd0");
+	asm volatile("bndmov 0x12345678(%ebp), %bnd0");
+	asm volatile("bndmov 0x12345678(%ecx,%eax,1), %bnd0");
+	asm volatile("bndmov 0x12345678(%ebp,%eax,1), %bnd0");
+	asm volatile("bndmov 0x12345678(%eax,%ecx,1), %bnd0");
+	asm volatile("bndmov 0x12345678(%eax,%ecx,8), %bnd0");
+
+	/* bndmov bnd, m64 */
+
+	asm volatile("bndmov %bnd0, (%eax)");
+	asm volatile("bndmov %bnd0, (0x12345678)");
+	asm volatile("bndmov %bnd3, (%eax)");
+	asm volatile("bndmov %bnd0, (%ecx,%eax,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(,%eax,1)");
+	asm volatile("bndmov %bnd0, (%eax,%ecx,1)");
+	asm volatile("bndmov %bnd0, (%eax,%ecx,8)");
+	asm volatile("bndmov %bnd0, 0x12(%eax)");
+	asm volatile("bndmov %bnd0, 0x12(%ebp)");
+	asm volatile("bndmov %bnd0, 0x12(%ecx,%eax,1)");
+	asm volatile("bndmov %bnd0, 0x12(%ebp,%eax,1)");
+	asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,1)");
+	asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,8)");
+	asm volatile("bndmov %bnd0, 0x12345678(%eax)");
+	asm volatile("bndmov %bnd0, 0x12345678(%ebp)");
+	asm volatile("bndmov %bnd0, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,8)");
+
+	/* bndmov bnd2, bnd1 */
+
+	asm volatile("bndmov %bnd0, %bnd1");
+	asm volatile("bndmov %bnd1, %bnd0");
+
+	/* bndldx mib, bnd */
+
+	asm volatile("bndldx (%eax), %bnd0");
+	asm volatile("bndldx (0x12345678), %bnd0");
+	asm volatile("bndldx (%eax), %bnd3");
+	asm volatile("bndldx (%ecx,%eax,1), %bnd0");
+	asm volatile("bndldx 0x12345678(,%eax,1), %bnd0");
+	asm volatile("bndldx (%eax,%ecx,1), %bnd0");
+	asm volatile("bndldx 0x12(%eax), %bnd0");
+	asm volatile("bndldx 0x12(%ebp), %bnd0");
+	asm volatile("bndldx 0x12(%ecx,%eax,1), %bnd0");
+	asm volatile("bndldx 0x12(%ebp,%eax,1), %bnd0");
+	asm volatile("bndldx 0x12(%eax,%ecx,1), %bnd0");
+	asm volatile("bndldx 0x12345678(%eax), %bnd0");
+	asm volatile("bndldx 0x12345678(%ebp), %bnd0");
+	asm volatile("bndldx 0x12345678(%ecx,%eax,1), %bnd0");
+	asm volatile("bndldx 0x12345678(%ebp,%eax,1), %bnd0");
+	asm volatile("bndldx 0x12345678(%eax,%ecx,1), %bnd0");
+
+	/* bndstx bnd, mib */
+
+	asm volatile("bndstx %bnd0, (%eax)");
+	asm volatile("bndstx %bnd0, (0x12345678)");
+	asm volatile("bndstx %bnd3, (%eax)");
+	asm volatile("bndstx %bnd0, (%ecx,%eax,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(,%eax,1)");
+	asm volatile("bndstx %bnd0, (%eax,%ecx,1)");
+	asm volatile("bndstx %bnd0, 0x12(%eax)");
+	asm volatile("bndstx %bnd0, 0x12(%ebp)");
+	asm volatile("bndstx %bnd0, 0x12(%ecx,%eax,1)");
+	asm volatile("bndstx %bnd0, 0x12(%ebp,%eax,1)");
+	asm volatile("bndstx %bnd0, 0x12(%eax,%ecx,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(%eax)");
+	asm volatile("bndstx %bnd0, 0x12345678(%ebp)");
+	asm volatile("bndstx %bnd0, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bndstx %bnd0, 0x12345678(%eax,%ecx,1)");
+
+	/* bnd prefix on call, ret, jmp and all jcc */
+
+	asm volatile("bnd call label1");  /* Expecting: call unconditional 0xfffffffc */
+	asm volatile("bnd call *(%eax)"); /* Expecting: call indirect      0 */
+	asm volatile("bnd ret");          /* Expecting: ret  indirect      0 */
+	asm volatile("bnd jmp label1");   /* Expecting: jmp  unconditional 0xfffffffc */
+	asm volatile("bnd jmp label1");   /* Expecting: jmp  unconditional 0xfffffffc */
+	asm volatile("bnd jmp *(%ecx)");  /* Expecting: jmp  indirect      0 */
+	asm volatile("bnd jne label1");   /* Expecting: jcc  conditional   0xfffffffc */
+
+	/* sha1rnds4 imm8, xmm2/m128, xmm1 */
+
+	asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0");
+	asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2");
+	asm volatile("sha1rnds4 $0x91, (%eax), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%eax), %xmm3");
+	asm volatile("sha1rnds4 $0x91, (%ecx,%eax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(,%eax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%eax,%ecx,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, (%eax,%ecx,8), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%eax), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%ebp), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,8), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%eax), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,8), %xmm0");
+
+	/* sha1nexte xmm2/m128, xmm1 */
+
+	asm volatile("sha1nexte %xmm1, %xmm0");
+	asm volatile("sha1nexte %xmm7, %xmm2");
+	asm volatile("sha1nexte (%eax), %xmm0");
+	asm volatile("sha1nexte (0x12345678), %xmm0");
+	asm volatile("sha1nexte (%eax), %xmm3");
+	asm volatile("sha1nexte (%ecx,%eax,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(,%eax,1), %xmm0");
+	asm volatile("sha1nexte (%eax,%ecx,1), %xmm0");
+	asm volatile("sha1nexte (%eax,%ecx,8), %xmm0");
+	asm volatile("sha1nexte 0x12(%eax), %xmm0");
+	asm volatile("sha1nexte 0x12(%ebp), %xmm0");
+	asm volatile("sha1nexte 0x12(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1nexte 0x12(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1nexte 0x12(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1nexte 0x12(%eax,%ecx,8), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%eax), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%ebp), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1nexte 0x12345678(%eax,%ecx,8), %xmm0");
+
+	/* sha1msg1 xmm2/m128, xmm1 */
+
+	asm volatile("sha1msg1 %xmm1, %xmm0");
+	asm volatile("sha1msg1 %xmm7, %xmm2");
+	asm volatile("sha1msg1 (%eax), %xmm0");
+	asm volatile("sha1msg1 (0x12345678), %xmm0");
+	asm volatile("sha1msg1 (%eax), %xmm3");
+	asm volatile("sha1msg1 (%ecx,%eax,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(,%eax,1), %xmm0");
+	asm volatile("sha1msg1 (%eax,%ecx,1), %xmm0");
+	asm volatile("sha1msg1 (%eax,%ecx,8), %xmm0");
+	asm volatile("sha1msg1 0x12(%eax), %xmm0");
+	asm volatile("sha1msg1 0x12(%ebp), %xmm0");
+	asm volatile("sha1msg1 0x12(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1msg1 0x12(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1msg1 0x12(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1msg1 0x12(%eax,%ecx,8), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%eax), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%ebp), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1msg1 0x12345678(%eax,%ecx,8), %xmm0");
+
+	/* sha1msg2 xmm2/m128, xmm1 */
+
+	asm volatile("sha1msg2 %xmm1, %xmm0");
+	asm volatile("sha1msg2 %xmm7, %xmm2");
+	asm volatile("sha1msg2 (%eax), %xmm0");
+	asm volatile("sha1msg2 (0x12345678), %xmm0");
+	asm volatile("sha1msg2 (%eax), %xmm3");
+	asm volatile("sha1msg2 (%ecx,%eax,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(,%eax,1), %xmm0");
+	asm volatile("sha1msg2 (%eax,%ecx,1), %xmm0");
+	asm volatile("sha1msg2 (%eax,%ecx,8), %xmm0");
+	asm volatile("sha1msg2 0x12(%eax), %xmm0");
+	asm volatile("sha1msg2 0x12(%ebp), %xmm0");
+	asm volatile("sha1msg2 0x12(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1msg2 0x12(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1msg2 0x12(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1msg2 0x12(%eax,%ecx,8), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%eax), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%ebp), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%ecx,%eax,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%ebp,%eax,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%eax,%ecx,1), %xmm0");
+	asm volatile("sha1msg2 0x12345678(%eax,%ecx,8), %xmm0");
+
+	/* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */
+	/* Note sha256rnds2 has an implicit operand 'xmm0' */
+
+	asm volatile("sha256rnds2 %xmm4, %xmm1");
+	asm volatile("sha256rnds2 %xmm7, %xmm2");
+	asm volatile("sha256rnds2 (%eax), %xmm1");
+	asm volatile("sha256rnds2 (0x12345678), %xmm1");
+	asm volatile("sha256rnds2 (%eax), %xmm3");
+	asm volatile("sha256rnds2 (%ecx,%eax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(,%eax,1), %xmm1");
+	asm volatile("sha256rnds2 (%eax,%ecx,1), %xmm1");
+	asm volatile("sha256rnds2 (%eax,%ecx,8), %xmm1");
+	asm volatile("sha256rnds2 0x12(%eax), %xmm1");
+	asm volatile("sha256rnds2 0x12(%ebp), %xmm1");
+	asm volatile("sha256rnds2 0x12(%ecx,%eax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12(%ebp,%eax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12(%eax,%ecx,1), %xmm1");
+	asm volatile("sha256rnds2 0x12(%eax,%ecx,8), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%eax), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%ebp), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%ecx,%eax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%ebp,%eax,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%eax,%ecx,1), %xmm1");
+	asm volatile("sha256rnds2 0x12345678(%eax,%ecx,8), %xmm1");
+
+	/* sha256msg1 xmm2/m128, xmm1 */
+
+	asm volatile("sha256msg1 %xmm1, %xmm0");
+	asm volatile("sha256msg1 %xmm7, %xmm2");
+	asm volatile("sha256msg1 (%eax), %xmm0");
+	asm volatile("sha256msg1 (0x12345678), %xmm0");
+	asm volatile("sha256msg1 (%eax), %xmm3");
+	asm volatile("sha256msg1 (%ecx,%eax,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(,%eax,1), %xmm0");
+	asm volatile("sha256msg1 (%eax,%ecx,1), %xmm0");
+	asm volatile("sha256msg1 (%eax,%ecx,8), %xmm0");
+	asm volatile("sha256msg1 0x12(%eax), %xmm0");
+	asm volatile("sha256msg1 0x12(%ebp), %xmm0");
+	asm volatile("sha256msg1 0x12(%ecx,%eax,1), %xmm0");
+	asm volatile("sha256msg1 0x12(%ebp,%eax,1), %xmm0");
+	asm volatile("sha256msg1 0x12(%eax,%ecx,1), %xmm0");
+	asm volatile("sha256msg1 0x12(%eax,%ecx,8), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%eax), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%ebp), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%ecx,%eax,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%ebp,%eax,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%eax,%ecx,1), %xmm0");
+	asm volatile("sha256msg1 0x12345678(%eax,%ecx,8), %xmm0");
+
+	/* sha256msg2 xmm2/m128, xmm1 */
+
+	asm volatile("sha256msg2 %xmm1, %xmm0");
+	asm volatile("sha256msg2 %xmm7, %xmm2");
+	asm volatile("sha256msg2 (%eax), %xmm0");
+	asm volatile("sha256msg2 (0x12345678), %xmm0");
+	asm volatile("sha256msg2 (%eax), %xmm3");
+	asm volatile("sha256msg2 (%ecx,%eax,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(,%eax,1), %xmm0");
+	asm volatile("sha256msg2 (%eax,%ecx,1), %xmm0");
+	asm volatile("sha256msg2 (%eax,%ecx,8), %xmm0");
+	asm volatile("sha256msg2 0x12(%eax), %xmm0");
+	asm volatile("sha256msg2 0x12(%ebp), %xmm0");
+	asm volatile("sha256msg2 0x12(%ecx,%eax,1), %xmm0");
+	asm volatile("sha256msg2 0x12(%ebp,%eax,1), %xmm0");
+	asm volatile("sha256msg2 0x12(%eax,%ecx,1), %xmm0");
+	asm volatile("sha256msg2 0x12(%eax,%ecx,8), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%eax), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%ebp), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%ecx,%eax,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%ebp,%eax,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%eax,%ecx,1), %xmm0");
+	asm volatile("sha256msg2 0x12345678(%eax,%ecx,8), %xmm0");
+
+	/* clflushopt m8 */
+
+	asm volatile("clflushopt (%eax)");
+	asm volatile("clflushopt (0x12345678)");
+	asm volatile("clflushopt 0x12345678(%eax,%ecx,8)");
+	/* Also check instructions in the same group encoding as clflushopt */
+	asm volatile("clflush (%eax)");
+	asm volatile("sfence");
+
+	/* clwb m8 */
+
+	asm volatile("clwb (%eax)");
+	asm volatile("clwb (0x12345678)");
+	asm volatile("clwb 0x12345678(%eax,%ecx,8)");
+	/* Also check instructions in the same group encoding as clwb */
+	asm volatile("xsaveopt (%eax)");
+	asm volatile("mfence");
+
+	/* xsavec mem */
+
+	asm volatile("xsavec (%eax)");
+	asm volatile("xsavec (0x12345678)");
+	asm volatile("xsavec 0x12345678(%eax,%ecx,8)");
+
+	/* xsaves mem */
+
+	asm volatile("xsaves (%eax)");
+	asm volatile("xsaves (0x12345678)");
+	asm volatile("xsaves 0x12345678(%eax,%ecx,8)");
+
+	/* xrstors mem */
+
+	asm volatile("xrstors (%eax)");
+	asm volatile("xrstors (0x12345678)");
+	asm volatile("xrstors 0x12345678(%eax,%ecx,8)");
+
+	/* ptwrite */
+
+	asm volatile("ptwrite (%eax)");
+	asm volatile("ptwrite (0x12345678)");
+	asm volatile("ptwrite 0x12345678(%eax,%ecx,8)");
+
+	asm volatile("ptwritel (%eax)");
+	asm volatile("ptwritel (0x12345678)");
+	asm volatile("ptwritel 0x12345678(%eax,%ecx,8)");
+
+#endif /* #ifndef __x86_64__ */
+
+	/* Following line is a marker for the awk script - do not change */
+	asm volatile("rdtsc"); /* Stop here */
+
+	return 0;
+}
diff --git a/arch/x86/tests/insn-x86.c b/arch/x86/tests/insn-x86.c
new file mode 100644
index 0000000..a5d24ae
--- /dev/null
+++ b/arch/x86/tests/insn-x86.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#include "intel-pt-decoder/insn.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+
+struct test_data {
+	u8 data[MAX_INSN_SIZE];
+	int expected_length;
+	int expected_rel;
+	const char *expected_op_str;
+	const char *expected_branch_str;
+	const char *asm_rep;
+};
+
+struct test_data test_data_32[] = {
+#include "insn-x86-dat-32.c"
+	{{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee             \trdpkru"},
+	{{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef             \twrpkru"},
+	{{0}, 0, 0, NULL, NULL, NULL},
+};
+
+struct test_data test_data_64[] = {
+#include "insn-x86-dat-64.c"
+	{{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee             \trdpkru"},
+	{{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef             \twrpkru"},
+	{{0}, 0, 0, NULL, NULL, NULL},
+};
+
+static int get_op(const char *op_str)
+{
+	struct val_data {
+		const char *name;
+		int val;
+	} vals[] = {
+		{"other",   INTEL_PT_OP_OTHER},
+		{"call",    INTEL_PT_OP_CALL},
+		{"ret",     INTEL_PT_OP_RET},
+		{"jcc",     INTEL_PT_OP_JCC},
+		{"jmp",     INTEL_PT_OP_JMP},
+		{"loop",    INTEL_PT_OP_LOOP},
+		{"iret",    INTEL_PT_OP_IRET},
+		{"int",     INTEL_PT_OP_INT},
+		{"syscall", INTEL_PT_OP_SYSCALL},
+		{"sysret",  INTEL_PT_OP_SYSRET},
+		{NULL, 0},
+	};
+	struct val_data *val;
+
+	if (!op_str || !strlen(op_str))
+		return 0;
+
+	for (val = vals; val->name; val++) {
+		if (!strcmp(val->name, op_str))
+			return val->val;
+	}
+
+	pr_debug("Failed to get op\n");
+
+	return -1;
+}
+
+static int get_branch(const char *branch_str)
+{
+	struct val_data {
+		const char *name;
+		int val;
+	} vals[] = {
+		{"no_branch",     INTEL_PT_BR_NO_BRANCH},
+		{"indirect",      INTEL_PT_BR_INDIRECT},
+		{"conditional",   INTEL_PT_BR_CONDITIONAL},
+		{"unconditional", INTEL_PT_BR_UNCONDITIONAL},
+		{NULL, 0},
+	};
+	struct val_data *val;
+
+	if (!branch_str || !strlen(branch_str))
+		return 0;
+
+	for (val = vals; val->name; val++) {
+		if (!strcmp(val->name, branch_str))
+			return val->val;
+	}
+
+	pr_debug("Failed to get branch\n");
+
+	return -1;
+}
+
+static int test_data_item(struct test_data *dat, int x86_64)
+{
+	struct intel_pt_insn intel_pt_insn;
+	struct insn insn;
+	int op, branch;
+
+	insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64);
+	insn_get_length(&insn);
+
+	if (!insn_complete(&insn)) {
+		pr_debug("Failed to decode: %s\n", dat->asm_rep);
+		return -1;
+	}
+
+	if (insn.length != dat->expected_length) {
+		pr_debug("Failed to decode length (%d vs expected %d): %s\n",
+			 insn.length, dat->expected_length, dat->asm_rep);
+		return -1;
+	}
+
+	op = get_op(dat->expected_op_str);
+	branch = get_branch(dat->expected_branch_str);
+
+	if (intel_pt_get_insn(dat->data, MAX_INSN_SIZE, x86_64, &intel_pt_insn)) {
+		pr_debug("Intel PT failed to decode: %s\n", dat->asm_rep);
+		return -1;
+	}
+
+	if ((int)intel_pt_insn.op != op) {
+		pr_debug("Failed to decode 'op' value (%d vs expected %d): %s\n",
+			 intel_pt_insn.op, op, dat->asm_rep);
+		return -1;
+	}
+
+	if ((int)intel_pt_insn.branch != branch) {
+		pr_debug("Failed to decode 'branch' value (%d vs expected %d): %s\n",
+			 intel_pt_insn.branch, branch, dat->asm_rep);
+		return -1;
+	}
+
+	if (intel_pt_insn.rel != dat->expected_rel) {
+		pr_debug("Failed to decode 'rel' value (%#x vs expected %#x): %s\n",
+			 intel_pt_insn.rel, dat->expected_rel, dat->asm_rep);
+		return -1;
+	}
+
+	pr_debug("Decoded ok: %s\n", dat->asm_rep);
+
+	return 0;
+}
+
+static int test_data_set(struct test_data *dat_set, int x86_64)
+{
+	struct test_data *dat;
+	int ret = 0;
+
+	for (dat = dat_set; dat->expected_length; dat++) {
+		if (test_data_item(dat, x86_64))
+			ret = -1;
+	}
+
+	return ret;
+}
+
+/**
+ * test__insn_x86 - test x86 instruction decoder - new instructions.
+ *
+ * This function implements a test that decodes a selection of instructions and
+ * checks the results.  The Intel PT function that further categorizes
+ * instructions (i.e. intel_pt_get_insn()) is also checked.
+ *
+ * The instructions are originally in insn-x86-dat-src.c which has been
+ * processed by scripts gen-insn-x86-dat.sh and gen-insn-x86-dat.awk to produce
+ * insn-x86-dat-32.c and insn-x86-dat-64.c which are included into this program.
+ * i.e. to add new instructions to the test, edit insn-x86-dat-src.c, run the
+ * gen-insn-x86-dat.sh script, make perf, and then run the test.
+ *
+ * If the test passes %0 is returned, otherwise %-1 is returned.  Use the
+ * verbose (-v) option to see all the instructions and whether or not they
+ * decoded successfuly.
+ */
+int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int ret = 0;
+
+	if (test_data_set(test_data_32, 0))
+		ret = -1;
+
+	if (test_data_set(test_data_64, 1))
+		ret = -1;
+
+	return ret;
+}
diff --git a/arch/x86/tests/intel-cqm.c b/arch/x86/tests/intel-cqm.c
new file mode 100644
index 0000000..90a4a8c
--- /dev/null
+++ b/arch/x86/tests/intel-cqm.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests/tests.h"
+#include "perf.h"
+#include "cloexec.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "arch-tests.h"
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+static pid_t spawn(void)
+{
+	pid_t pid;
+
+	pid = fork();
+	if (pid)
+		return pid;
+
+	while(1)
+		sleep(5);
+	return 0;
+}
+
+/*
+ * Create an event group that contains both a sampled hardware
+ * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then
+ * wait for the hardware perf counter to overflow and generate a PMI,
+ * which triggers an event read for both of the events in the group.
+ *
+ * Since reading Intel CQM event counters requires sending SMP IPIs, the
+ * CQM pmu needs to handle the above situation gracefully, and return
+ * the last read counter value to avoid triggering a WARN_ON_ONCE() in
+ * smp_call_function_many() caused by sending IPIs from NMI context.
+ */
+int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	struct perf_event_attr pe;
+	int i, fd[2], flag, ret;
+	size_t mmap_len;
+	void *event;
+	pid_t pid;
+	int err = TEST_FAIL;
+
+	flag = perf_event_open_cloexec_flag();
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("perf_evlist__new failed\n");
+		return TEST_FAIL;
+	}
+
+	ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
+	if (ret) {
+		pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
+		err = TEST_SKIP;
+		goto out;
+	}
+
+	evsel = perf_evlist__first(evlist);
+	if (!evsel) {
+		pr_debug("perf_evlist__first failed\n");
+		goto out;
+	}
+
+	memset(&pe, 0, sizeof(pe));
+	pe.size = sizeof(pe);
+
+	pe.type = PERF_TYPE_HARDWARE;
+	pe.config = PERF_COUNT_HW_CPU_CYCLES;
+	pe.read_format = PERF_FORMAT_GROUP;
+
+	pe.sample_period = 128;
+	pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ;
+
+	pid = spawn();
+
+	fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag);
+	if (fd[0] < 0) {
+		pr_debug("failed to open event\n");
+		goto out;
+	}
+
+	memset(&pe, 0, sizeof(pe));
+	pe.size = sizeof(pe);
+
+	pe.type = evsel->attr.type;
+	pe.config = evsel->attr.config;
+
+	fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag);
+	if (fd[1] < 0) {
+		pr_debug("failed to open event\n");
+		goto out;
+	}
+
+	/*
+	 * Pick a power-of-two number of pages + 1 for the meta-data
+	 * page (struct perf_event_mmap_page). See tools/perf/design.txt.
+	 */
+	mmap_len = page_size * 65;
+
+	event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0);
+	if (event == (void *)(-1)) {
+		pr_debug("failed to mmap %d\n", errno);
+		goto out;
+	}
+
+	sleep(1);
+
+	err = TEST_OK;
+
+	munmap(event, mmap_len);
+
+	for (i = 0; i < 2; i++)
+		close(fd[i]);
+
+	kill(pid, SIGKILL);
+	wait(NULL);
+out:
+	perf_evlist__delete(evlist);
+	return err;
+}
diff --git a/arch/x86/tests/perf-time-to-tsc.c b/arch/x86/tests/perf-time-to-tsc.c
new file mode 100644
index 0000000..7a77216
--- /dev/null
+++ b/arch/x86/tests/perf-time-to-tsc.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/prctl.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tsc.h"
+#include "tests/tests.h"
+
+#include "arch-tests.h"
+
+#define CHECK__(x) {				\
+	while ((x) < 0) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+#define CHECK_NOT_NULL__(x) {			\
+	while ((x) == NULL) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+/**
+ * test__perf_time_to_tsc - test converting perf time to TSC.
+ *
+ * This function implements a test that checks that the conversion of perf time
+ * to and from TSC is consistent with the order of events.  If the test passes
+ * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
+ * supported then then the test passes but " (not supported)" is printed.
+ */
+int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+		.sample_time	     = true,
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int err = -1, ret, i;
+	const char *comm1, *comm2;
+	struct perf_tsc_conversion tc;
+	struct perf_event_mmap_page *pc;
+	union perf_event *event;
+	u64 test_tsc, comm1_tsc, comm2_tsc;
+	u64 test_time, comm1_time = 0, comm2_time = 0;
+	struct perf_mmap *md;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	CHECK_NOT_NULL__(threads);
+
+	cpus = cpu_map__new(NULL);
+	CHECK_NOT_NULL__(cpus);
+
+	evlist = perf_evlist__new();
+	CHECK_NOT_NULL__(evlist);
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	CHECK__(parse_events(evlist, "cycles:u", NULL));
+
+	perf_evlist__config(evlist, &opts, NULL);
+
+	evsel = perf_evlist__first(evlist);
+
+	evsel->attr.comm = 1;
+	evsel->attr.disabled = 1;
+	evsel->attr.enable_on_exec = 0;
+
+	CHECK__(perf_evlist__open(evlist));
+
+	CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+
+	pc = evlist->mmap[0].base;
+	ret = perf_read_tsc_conversion(pc, &tc);
+	if (ret) {
+		if (ret == -EOPNOTSUPP) {
+			fprintf(stderr, " (not supported)");
+			return 0;
+		}
+		goto out_err;
+	}
+
+	perf_evlist__enable(evlist);
+
+	comm1 = "Test COMM 1";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0));
+
+	test_tsc = rdtsc();
+
+	comm2 = "Test COMM 2";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0));
+
+	perf_evlist__disable(evlist);
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md)) != NULL) {
+			struct perf_sample sample;
+
+			if (event->header.type != PERF_RECORD_COMM ||
+			    (pid_t)event->comm.pid != getpid() ||
+			    (pid_t)event->comm.tid != getpid())
+				goto next_event;
+
+			if (strcmp(event->comm.comm, comm1) == 0) {
+				CHECK__(perf_evsel__parse_sample(evsel, event,
+								 &sample));
+				comm1_time = sample.time;
+			}
+			if (strcmp(event->comm.comm, comm2) == 0) {
+				CHECK__(perf_evsel__parse_sample(evsel, event,
+								 &sample));
+				comm2_time = sample.time;
+			}
+next_event:
+			perf_mmap__consume(md);
+		}
+		perf_mmap__read_done(md);
+	}
+
+	if (!comm1_time || !comm2_time)
+		goto out_err;
+
+	test_time = tsc_to_perf_time(test_tsc, &tc);
+	comm1_tsc = perf_time_to_tsc(comm1_time, &tc);
+	comm2_tsc = perf_time_to_tsc(comm2_time, &tc);
+
+	pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n",
+		 comm1_time, comm1_tsc);
+	pr_debug("rdtsc          time %"PRIu64" tsc %"PRIu64"\n",
+		 test_time, test_tsc);
+	pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n",
+		 comm2_time, comm2_tsc);
+
+	if (test_time <= comm1_time ||
+	    test_time >= comm2_time)
+		goto out_err;
+
+	if (test_tsc <= comm1_tsc ||
+	    test_tsc >= comm2_tsc)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	perf_evlist__delete(evlist);
+	return err;
+}
diff --git a/arch/x86/tests/rdpmc.c b/arch/x86/tests/rdpmc.c
new file mode 100644
index 0000000..7a11f02
--- /dev/null
+++ b/arch/x86/tests/rdpmc.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <linux/types.h>
+#include "perf.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "cloexec.h"
+#include "util.h"
+#include "arch-tests.h"
+
+static u64 rdpmc(unsigned int counter)
+{
+	unsigned int low, high;
+
+	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+	return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+	unsigned int low, high;
+
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+	return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+	struct perf_event_mmap_page *pc = addr;
+	u32 seq, idx, time_mult = 0, time_shift = 0;
+	u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+	do {
+		seq = pc->lock;
+		barrier();
+
+		enabled = pc->time_enabled;
+		running = pc->time_running;
+
+		if (enabled != running) {
+			cyc = rdtsc();
+			time_mult = pc->time_mult;
+			time_shift = pc->time_shift;
+			time_offset = pc->time_offset;
+		}
+
+		idx = pc->index;
+		count = pc->offset;
+		if (idx)
+			count += rdpmc(idx - 1);
+
+		barrier();
+	} while (pc->lock != seq);
+
+	if (enabled != running) {
+		u64 quot, rem;
+
+		quot = (cyc >> time_shift);
+		rem = cyc & (((u64)1 << time_shift) - 1);
+		delta = time_offset + quot * time_mult +
+			((rem * time_mult) >> time_shift);
+
+		enabled += delta;
+		if (idx)
+			running += delta;
+
+		quot = count / running;
+		rem = count % running;
+		count = quot * enabled + (rem * enabled) / running;
+	}
+
+	return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __maybe_unused,
+			     siginfo_t *info __maybe_unused,
+			     void *uc __maybe_unused)
+{
+	exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+	volatile int tmp = 0;
+	u64 i, loops = 1000;
+	int n;
+	int fd;
+	void *addr;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_INSTRUCTIONS,
+		.exclude_kernel = 1,
+	};
+	u64 delta_sum = 0;
+        struct sigaction sa;
+	char sbuf[STRERR_BUFSIZE];
+
+	sigfillset(&sa.sa_mask);
+	sa.sa_sigaction = segfault_handler;
+	sa.sa_flags = 0;
+	sigaction(SIGSEGV, &sa, NULL);
+
+	fd = sys_perf_event_open(&attr, 0, -1, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0) {
+		pr_err("Error: sys_perf_event_open() syscall returned "
+		       "with %d (%s)\n", fd,
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -1;
+	}
+
+	addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+	if (addr == (void *)(-1)) {
+		pr_err("Error: mmap() syscall returned with (%s)\n",
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_close;
+	}
+
+	for (n = 0; n < 6; n++) {
+		u64 stamp, now, delta;
+
+		stamp = mmap_read_self(addr);
+
+		for (i = 0; i < loops; i++)
+			tmp++;
+
+		now = mmap_read_self(addr);
+		loops *= 10;
+
+		delta = now - stamp;
+		pr_debug("%14d: %14Lu\n", n, (long long)delta);
+
+		delta_sum += delta;
+	}
+
+	munmap(addr, page_size);
+	pr_debug("   ");
+out_close:
+	close(fd);
+
+	if (!delta_sum)
+		return -1;
+
+	return 0;
+}
+
+int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int status = 0;
+	int wret = 0;
+	int ret;
+	int pid;
+
+	pid = fork();
+	if (pid < 0)
+		return -1;
+
+	if (!pid) {
+		ret = __test__rdpmc();
+
+		exit(ret);
+	}
+
+	wret = waitpid(pid, &status, 0);
+	if (wret < 0 || status)
+		return -1;
+
+	return 0;
+}
diff --git a/arch/x86/tests/regs_load.S b/arch/x86/tests/regs_load.S
new file mode 100644
index 0000000..bbe5a0d
--- /dev/null
+++ b/arch/x86/tests/regs_load.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+#define AX	 0
+#define BX	 1 * 8
+#define CX	 2 * 8
+#define DX	 3 * 8
+#define SI	 4 * 8
+#define DI	 5 * 8
+#define BP	 6 * 8
+#define SP	 7 * 8
+#define IP	 8 * 8
+#define FLAGS	 9 * 8
+#define CS	10 * 8
+#define SS	11 * 8
+#define DS	12 * 8
+#define ES	13 * 8
+#define FS	14 * 8
+#define GS	15 * 8
+#define R8	16 * 8
+#define R9	17 * 8
+#define R10	18 * 8
+#define R11	19 * 8
+#define R12	20 * 8
+#define R13	21 * 8
+#define R14	22 * 8
+#define R15	23 * 8
+
+.text
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ENTRY(perf_regs_load)
+	movq %rax, AX(%rdi)
+	movq %rbx, BX(%rdi)
+	movq %rcx, CX(%rdi)
+	movq %rdx, DX(%rdi)
+	movq %rsi, SI(%rdi)
+	movq %rdi, DI(%rdi)
+	movq %rbp, BP(%rdi)
+
+	leaq 8(%rsp), %rax /* exclude this call.  */
+	movq %rax, SP(%rdi)
+
+	movq 0(%rsp), %rax
+	movq %rax, IP(%rdi)
+
+	movq $0, FLAGS(%rdi)
+	movq $0, CS(%rdi)
+	movq $0, SS(%rdi)
+	movq $0, DS(%rdi)
+	movq $0, ES(%rdi)
+	movq $0, FS(%rdi)
+	movq $0, GS(%rdi)
+
+	movq %r8,  R8(%rdi)
+	movq %r9,  R9(%rdi)
+	movq %r10, R10(%rdi)
+	movq %r11, R11(%rdi)
+	movq %r12, R12(%rdi)
+	movq %r13, R13(%rdi)
+	movq %r14, R14(%rdi)
+	movq %r15, R15(%rdi)
+	ret
+ENDPROC(perf_regs_load)
+#else
+ENTRY(perf_regs_load)
+	push %edi
+	movl 8(%esp), %edi
+	movl %eax, AX(%edi)
+	movl %ebx, BX(%edi)
+	movl %ecx, CX(%edi)
+	movl %edx, DX(%edi)
+	movl %esi, SI(%edi)
+	pop %eax
+	movl %eax, DI(%edi)
+	movl %ebp, BP(%edi)
+
+	leal 4(%esp), %eax /* exclude this call.  */
+	movl %eax, SP(%edi)
+
+	movl 0(%esp), %eax
+	movl %eax, IP(%edi)
+
+	movl $0, FLAGS(%edi)
+	movl $0, CS(%edi)
+	movl $0, SS(%edi)
+	movl $0, DS(%edi)
+	movl $0, ES(%edi)
+	movl $0, FS(%edi)
+	movl $0, GS(%edi)
+	ret
+ENDPROC(perf_regs_load)
+#endif
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/arch/x86/util/Build b/arch/x86/util/Build
new file mode 100644
index 0000000..f95e6f4
--- /dev/null
+++ b/arch/x86/util/Build
@@ -0,0 +1,16 @@
+libperf-y += header.o
+libperf-y += tsc.o
+libperf-y += pmu.o
+libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
+libperf-y += group.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
+
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/arch/x86/util/auxtrace.c b/arch/x86/util/auxtrace.c
new file mode 100644
index 0000000..b135af6
--- /dev/null
+++ b/arch/x86/util/auxtrace.c
@@ -0,0 +1,80 @@
+/*
+ * auxtrace.c: AUX area tracing support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+
+#include "../../util/header.h"
+#include "../../util/debug.h"
+#include "../../util/pmu.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/evlist.h"
+
+static
+struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
+						    int *err)
+{
+	struct perf_pmu *intel_pt_pmu;
+	struct perf_pmu *intel_bts_pmu;
+	struct perf_evsel *evsel;
+	bool found_pt = false;
+	bool found_bts = false;
+
+	intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+	intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type)
+			found_pt = true;
+		if (intel_bts_pmu && evsel->attr.type == intel_bts_pmu->type)
+			found_bts = true;
+	}
+
+	if (found_pt && found_bts) {
+		pr_err("intel_pt and intel_bts may not be used together\n");
+		*err = -EINVAL;
+		return NULL;
+	}
+
+	if (found_pt)
+		return intel_pt_recording_init(err);
+
+	if (found_bts)
+		return intel_bts_recording_init(err);
+
+	return NULL;
+}
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+					      int *err)
+{
+	char buffer[64];
+	int ret;
+
+	*err = 0;
+
+	ret = get_cpuid(buffer, sizeof(buffer));
+	if (ret) {
+		*err = ret;
+		return NULL;
+	}
+
+	if (!strncmp(buffer, "GenuineIntel,", 13))
+		return auxtrace_record__init_intel(evlist, err);
+
+	return NULL;
+}
diff --git a/arch/x86/util/dwarf-regs.c b/arch/x86/util/dwarf-regs.c
new file mode 100644
index 0000000..1f86ee8
--- /dev/null
+++ b/arch/x86/util/dwarf-regs.c
@@ -0,0 +1,129 @@
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ * Extracted from probe-finder.c
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stddef.h>
+#include <errno.h> /* for EINVAL */
+#include <string.h> /* for strcmp */
+#include <linux/ptrace.h> /* for struct pt_regs */
+#include <linux/kernel.h> /* for offsetof */
+#include <dwarf-regs.h>
+
+/*
+ * See arch/x86/kernel/ptrace.c.
+ * Different from it:
+ *
+ *  - Since struct pt_regs is defined differently for user and kernel,
+ *    but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct
+ *    field name of user's pt_regs), we make REG_OFFSET_NAME to accept
+ *    both string name and reg field name.
+ *
+ *  - Since accessing x86_32's pt_regs from x86_64 building is difficult
+ *    and vise versa, we simply fill offset with -1, so
+ *    get_arch_regstr() still works but regs_query_register_offset()
+ *    returns error.
+ *    The only inconvenience caused by it now is that we are not allowed
+ *    to generate BPF prologue for a x86_64 kernel if perf is built for
+ *    x86_32. This is really a rare usecase.
+ *
+ *  - Order is different from kernel's ptrace.c for get_arch_regstr(). Use
+ *    the order defined by dwarf.
+ */
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+#ifdef __x86_64__
+# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
+# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1}
+#else
+# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1}
+# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
+#endif
+
+/* TODO: switching by dwarf address size */
+#ifndef __x86_64__
+static const struct pt_regs_offset x86_32_regoffset_table[] = {
+	REG_OFFSET_NAME_32("%ax",	eax),
+	REG_OFFSET_NAME_32("%cx",	ecx),
+	REG_OFFSET_NAME_32("%dx",	edx),
+	REG_OFFSET_NAME_32("%bx",	ebx),
+	REG_OFFSET_NAME_32("$stack",	esp),	/* Stack address instead of %sp */
+	REG_OFFSET_NAME_32("%bp",	ebp),
+	REG_OFFSET_NAME_32("%si",	esi),
+	REG_OFFSET_NAME_32("%di",	edi),
+	REG_OFFSET_END,
+};
+
+#define regoffset_table x86_32_regoffset_table
+#else
+static const struct pt_regs_offset x86_64_regoffset_table[] = {
+	REG_OFFSET_NAME_64("%ax",	rax),
+	REG_OFFSET_NAME_64("%dx",	rdx),
+	REG_OFFSET_NAME_64("%cx",	rcx),
+	REG_OFFSET_NAME_64("%bx",	rbx),
+	REG_OFFSET_NAME_64("%si",	rsi),
+	REG_OFFSET_NAME_64("%di",	rdi),
+	REG_OFFSET_NAME_64("%bp",	rbp),
+	REG_OFFSET_NAME_64("%sp",	rsp),
+	REG_OFFSET_NAME_64("%r8",	r8),
+	REG_OFFSET_NAME_64("%r9",	r9),
+	REG_OFFSET_NAME_64("%r10",	r10),
+	REG_OFFSET_NAME_64("%r11",	r11),
+	REG_OFFSET_NAME_64("%r12",	r12),
+	REG_OFFSET_NAME_64("%r13",	r13),
+	REG_OFFSET_NAME_64("%r14",	r14),
+	REG_OFFSET_NAME_64("%r15",	r15),
+	REG_OFFSET_END,
+};
+
+#define regoffset_table x86_64_regoffset_table
+#endif
+
+/* Minus 1 for the ending REG_OFFSET_END */
+#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_arch_regstr(unsigned int n)
+{
+	return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL;
+}
+
+/* Reuse code from arch/x86/kernel/ptrace.c */
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
diff --git a/arch/x86/util/group.c b/arch/x86/util/group.c
new file mode 100644
index 0000000..e2f8034
--- /dev/null
+++ b/arch/x86/util/group.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+	int n;
+
+	if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+		return false;
+	if (n > 0) {
+		*warn = true;
+		return false;
+	}
+	return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+	fprintf(stderr,
+		"nmi_watchdog enabled with topdown. May give wrong results.\n"
+		"Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/arch/x86/util/header.c b/arch/x86/util/header.c
new file mode 100644
index 0000000..fb0d71a
--- /dev/null
+++ b/arch/x86/util/header.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../util/header.h"
+
+static inline void
+cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
+      unsigned int *d)
+{
+	__asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
+			      "movl %%ebx, %%esi\n\t.byte 0x5b"
+			: "=a" (*a),
+			"=S" (*b),
+			"=c" (*c),
+			"=d" (*d)
+			: "a" (op));
+}
+
+static int
+__get_cpuid(char *buffer, size_t sz, const char *fmt)
+{
+	unsigned int a, b, c, d, lvl;
+	int family = -1, model = -1, step = -1;
+	int nb;
+	char vendor[16];
+
+	cpuid(0, &lvl, &b, &c, &d);
+	strncpy(&vendor[0], (char *)(&b), 4);
+	strncpy(&vendor[4], (char *)(&d), 4);
+	strncpy(&vendor[8], (char *)(&c), 4);
+	vendor[12] = '\0';
+
+	if (lvl >= 1) {
+		cpuid(1, &a, &b, &c, &d);
+
+		family = (a >> 8) & 0xf;  /* bits 11 - 8 */
+		model  = (a >> 4) & 0xf;  /* Bits  7 - 4 */
+		step   = a & 0xf;
+
+		/* extended family */
+		if (family == 0xf)
+			family += (a >> 20) & 0xff;
+
+		/* extended model */
+		if (family >= 0x6)
+			model += ((a >> 16) & 0xf) << 4;
+	}
+	nb = scnprintf(buffer, sz, fmt, vendor, family, model, step);
+
+	/* look for end marker to ensure the entire data fit */
+	if (strchr(buffer, '$')) {
+		buffer[nb-1] = '\0';
+		return 0;
+	}
+	return -1;
+}
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+	return __get_cpuid(buffer, sz, "%s,%u,%u,%u$");
+}
+
+char *
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	char *buf = malloc(128);
+
+	if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
+		free(buf);
+		return NULL;
+	}
+	return buf;
+}
diff --git a/arch/x86/util/intel-bts.c b/arch/x86/util/intel-bts.c
new file mode 100644
index 0000000..781df40
--- /dev/null
+++ b/arch/x86/util/intel-bts.c
@@ -0,0 +1,462 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/tsc.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-bts.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+struct intel_bts_snapshot_ref {
+	void	*ref_buf;
+	size_t	ref_offset;
+	bool	wrapped;
+};
+
+struct intel_bts_recording {
+	struct auxtrace_record		itr;
+	struct perf_pmu			*intel_bts_pmu;
+	struct perf_evlist		*evlist;
+	bool				snapshot_mode;
+	size_t				snapshot_size;
+	int				snapshot_ref_cnt;
+	struct intel_bts_snapshot_ref	*snapshot_refs;
+};
+
+struct branch {
+	u64 from;
+	u64 to;
+	u64 misc;
+};
+
+static size_t
+intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused)
+{
+	return INTEL_BTS_AUXTRACE_PRIV_SIZE;
+}
+
+static int intel_bts_info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct auxtrace_info_event *auxtrace_info,
+			       size_t priv_size)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+	struct perf_event_mmap_page *pc;
+	struct perf_tsc_conversion tc = { .time_mult = 0, };
+	bool cap_user_time_zero = false;
+	int err;
+
+	if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
+		return -EINVAL;
+
+	if (!session->evlist->nr_mmaps)
+		return -EINVAL;
+
+	pc = session->evlist->mmap[0].base;
+	if (pc) {
+		err = perf_read_tsc_conversion(pc, &tc);
+		if (err) {
+			if (err != -EOPNOTSUPP)
+				return err;
+		} else {
+			cap_user_time_zero = tc.time_mult != 0;
+		}
+		if (!cap_user_time_zero)
+			ui__warning("Intel BTS: TSC not available\n");
+	}
+
+	auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
+	auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
+	auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
+	auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
+	auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
+	auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+	auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
+
+	return 0;
+}
+
+static int intel_bts_recording_options(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist,
+				       struct record_opts *opts)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+	struct perf_evsel *evsel, *intel_bts_evsel = NULL;
+	const struct cpu_map *cpus = evlist->cpus;
+	bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+
+	btsr->evlist = evlist;
+	btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == intel_bts_pmu->type) {
+			if (intel_bts_evsel) {
+				pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
+				return -EINVAL;
+			}
+			evsel->attr.freq = 0;
+			evsel->attr.sample_period = 1;
+			intel_bts_evsel = evsel;
+			opts->full_auxtrace = true;
+		}
+	}
+
+	if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+		pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
+		return -EINVAL;
+	}
+
+	if (!opts->full_auxtrace)
+		return 0;
+
+	if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
+		pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
+		return -EINVAL;
+	}
+
+	/* Set default sizes for snapshot mode */
+	if (opts->auxtrace_snapshot_mode) {
+		if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+			if (privileged) {
+				opts->auxtrace_mmap_pages = MiB(4) / page_size;
+			} else {
+				opts->auxtrace_mmap_pages = KiB(128) / page_size;
+				if (opts->mmap_pages == UINT_MAX)
+					opts->mmap_pages = KiB(256) / page_size;
+			}
+		} else if (!opts->auxtrace_mmap_pages && !privileged &&
+			   opts->mmap_pages == UINT_MAX) {
+			opts->mmap_pages = KiB(256) / page_size;
+		}
+		if (!opts->auxtrace_snapshot_size)
+			opts->auxtrace_snapshot_size =
+				opts->auxtrace_mmap_pages * (size_t)page_size;
+		if (!opts->auxtrace_mmap_pages) {
+			size_t sz = opts->auxtrace_snapshot_size;
+
+			sz = round_up(sz, page_size) / page_size;
+			opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+		}
+		if (opts->auxtrace_snapshot_size >
+				opts->auxtrace_mmap_pages * (size_t)page_size) {
+			pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+			       opts->auxtrace_snapshot_size,
+			       opts->auxtrace_mmap_pages * (size_t)page_size);
+			return -EINVAL;
+		}
+		if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+			return -EINVAL;
+		}
+		pr_debug2("Intel BTS snapshot size: %zu\n",
+			  opts->auxtrace_snapshot_size);
+	}
+
+	/* Set default sizes for full trace mode */
+	if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+	}
+
+	/* Validate auxtrace_mmap_pages */
+	if (opts->auxtrace_mmap_pages) {
+		size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+		size_t min_sz;
+
+		if (opts->auxtrace_snapshot_mode)
+			min_sz = KiB(4);
+		else
+			min_sz = KiB(8);
+
+		if (sz < min_sz || !is_power_of_2(sz)) {
+			pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
+			       min_sz / 1024);
+			return -EINVAL;
+		}
+	}
+
+	if (intel_bts_evsel) {
+		/*
+		 * To obtain the auxtrace buffer file descriptor, the auxtrace event
+		 * must come first.
+		 */
+		perf_evlist__to_front(evlist, intel_bts_evsel);
+		/*
+		 * In the case of per-cpu mmaps, we need the CPU on the
+		 * AUX event.
+		 */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+	}
+
+	/* Add dummy event to keep tracking */
+	if (opts->full_auxtrace) {
+		struct perf_evsel *tracking_evsel;
+		int err;
+
+		err = parse_events(evlist, "dummy:u", NULL);
+		if (err)
+			return err;
+
+		tracking_evsel = perf_evlist__last(evlist);
+
+		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+		tracking_evsel->attr.freq = 0;
+		tracking_evsel->attr.sample_period = 1;
+	}
+
+	return 0;
+}
+
+static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
+					    struct record_opts *opts,
+					    const char *str)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	unsigned long long snapshot_size = 0;
+	char *endptr;
+
+	if (str) {
+		snapshot_size = strtoull(str, &endptr, 0);
+		if (*endptr || snapshot_size > SIZE_MAX)
+			return -1;
+	}
+
+	opts->auxtrace_snapshot_mode = true;
+	opts->auxtrace_snapshot_size = snapshot_size;
+
+	btsr->snapshot_size = snapshot_size;
+
+	return 0;
+}
+
+static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	return rdtsc();
+}
+
+static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
+					 int idx)
+{
+	const size_t sz = sizeof(struct intel_bts_snapshot_ref);
+	int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
+	struct intel_bts_snapshot_ref *refs;
+
+	if (!new_cnt)
+		new_cnt = 16;
+
+	while (new_cnt <= idx)
+		new_cnt *= 2;
+
+	refs = calloc(new_cnt, sz);
+	if (!refs)
+		return -ENOMEM;
+
+	memcpy(refs, btsr->snapshot_refs, cnt * sz);
+
+	btsr->snapshot_refs = refs;
+	btsr->snapshot_ref_cnt = new_cnt;
+
+	return 0;
+}
+
+static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
+{
+	int i;
+
+	for (i = 0; i < btsr->snapshot_ref_cnt; i++)
+		zfree(&btsr->snapshot_refs[i].ref_buf);
+	zfree(&btsr->snapshot_refs);
+}
+
+static void intel_bts_recording_free(struct auxtrace_record *itr)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+
+	intel_bts_free_snapshot_refs(btsr);
+	free(btsr);
+}
+
+static int intel_bts_snapshot_start(struct auxtrace_record *itr)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(btsr->evlist, evsel) {
+		if (evsel->attr.type == btsr->intel_bts_pmu->type)
+			return perf_evsel__disable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(btsr->evlist, evsel) {
+		if (evsel->attr.type == btsr->intel_bts_pmu->type)
+			return perf_evsel__enable(evsel);
+	}
+	return -EINVAL;
+}
+
+static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
+{
+	int i, a, b;
+
+	b = buf_size >> 3;
+	a = b - 512;
+	if (a < 0)
+		a = 0;
+
+	for (i = a; i < b; i++) {
+		if (data[i])
+			return true;
+	}
+
+	return false;
+}
+
+static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm, unsigned char *data,
+				   u64 *head, u64 *old)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	bool wrapped;
+	int err;
+
+	pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+		  __func__, idx, (size_t)*old, (size_t)*head);
+
+	if (idx >= btsr->snapshot_ref_cnt) {
+		err = intel_bts_alloc_snapshot_refs(btsr, idx);
+		if (err)
+			goto out_err;
+	}
+
+	wrapped = btsr->snapshot_refs[idx].wrapped;
+	if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
+		btsr->snapshot_refs[idx].wrapped = true;
+		wrapped = true;
+	}
+
+	/*
+	 * In full trace mode 'head' continually increases.  However in snapshot
+	 * mode 'head' is an offset within the buffer.  Here 'old' and 'head'
+	 * are adjusted to match the full trace case which expects that 'old' is
+	 * always less than 'head'.
+	 */
+	if (wrapped) {
+		*old = *head;
+		*head += mm->len;
+	} else {
+		if (mm->mask)
+			*old &= mm->mask;
+		else
+			*old %= mm->len;
+		if (*old > *head)
+			*head += mm->len;
+	}
+
+	pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+		  __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+	return 0;
+
+out_err:
+	pr_err("%s: failed, error %d\n", __func__, err);
+	return err;
+}
+
+static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct intel_bts_recording *btsr =
+			container_of(itr, struct intel_bts_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(btsr->evlist, evsel) {
+		if (evsel->attr.type == btsr->intel_bts_pmu->type)
+			return perf_evlist__enable_event_idx(btsr->evlist,
+							     evsel, idx);
+	}
+	return -EINVAL;
+}
+
+struct auxtrace_record *intel_bts_recording_init(int *err)
+{
+	struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+	struct intel_bts_recording *btsr;
+
+	if (!intel_bts_pmu)
+		return NULL;
+
+	if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+		*err = -errno;
+		return NULL;
+	}
+
+	btsr = zalloc(sizeof(struct intel_bts_recording));
+	if (!btsr) {
+		*err = -ENOMEM;
+		return NULL;
+	}
+
+	btsr->intel_bts_pmu = intel_bts_pmu;
+	btsr->itr.recording_options = intel_bts_recording_options;
+	btsr->itr.info_priv_size = intel_bts_info_priv_size;
+	btsr->itr.info_fill = intel_bts_info_fill;
+	btsr->itr.free = intel_bts_recording_free;
+	btsr->itr.snapshot_start = intel_bts_snapshot_start;
+	btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
+	btsr->itr.find_snapshot = intel_bts_find_snapshot;
+	btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
+	btsr->itr.reference = intel_bts_reference;
+	btsr->itr.read_finish = intel_bts_read_finish;
+	btsr->itr.alignment = sizeof(struct branch);
+	return &btsr->itr;
+}
diff --git a/arch/x86/util/intel-pt.c b/arch/x86/util/intel-pt.c
new file mode 100644
index 0000000..db0ba8c
--- /dev/null
+++ b/arch/x86/util/intel-pt.c
@@ -0,0 +1,1111 @@
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <cpuid.h>
+
+#include "../../perf.h"
+#include "../../util/session.h"
+#include "../../util/event.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/cpumap.h"
+#include <subcmd/parse-options.h>
+#include "../../util/parse-events.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/tsc.h"
+#include "../../util/intel-pt.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+#define INTEL_PT_PSB_PERIOD_NEAR	256
+
+struct intel_pt_snapshot_ref {
+	void *ref_buf;
+	size_t ref_offset;
+	bool wrapped;
+};
+
+struct intel_pt_recording {
+	struct auxtrace_record		itr;
+	struct perf_pmu			*intel_pt_pmu;
+	int				have_sched_switch;
+	struct perf_evlist		*evlist;
+	bool				snapshot_mode;
+	bool				snapshot_init_done;
+	size_t				snapshot_size;
+	size_t				snapshot_ref_buf_size;
+	int				snapshot_ref_cnt;
+	struct intel_pt_snapshot_ref	*snapshot_refs;
+	size_t				priv_size;
+};
+
+static int intel_pt_parse_terms_with_default(struct list_head *formats,
+					     const char *str,
+					     u64 *config)
+{
+	struct list_head *terms;
+	struct perf_event_attr attr = { .size = 0, };
+	int err;
+
+	terms = malloc(sizeof(struct list_head));
+	if (!terms)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(terms);
+
+	err = parse_events_terms(terms, str);
+	if (err)
+		goto out_free;
+
+	attr.config = *config;
+	err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
+	if (err)
+		goto out_free;
+
+	*config = attr.config;
+out_free:
+	parse_events_terms__delete(terms);
+	return err;
+}
+
+static int intel_pt_parse_terms(struct list_head *formats, const char *str,
+				u64 *config)
+{
+	*config = 0;
+	return intel_pt_parse_terms_with_default(formats, str, config);
+}
+
+static u64 intel_pt_masked_bits(u64 mask, u64 bits)
+{
+	const u64 top_bit = 1ULL << 63;
+	u64 res = 0;
+	int i;
+
+	for (i = 0; i < 64; i++) {
+		if (mask & top_bit) {
+			res <<= 1;
+			if (bits & top_bit)
+				res |= 1;
+		}
+		mask <<= 1;
+		bits <<= 1;
+	}
+
+	return res;
+}
+
+static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
+				struct perf_evlist *evlist, u64 *res)
+{
+	struct perf_evsel *evsel;
+	u64 mask;
+
+	*res = 0;
+
+	mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
+	if (!mask)
+		return -EINVAL;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == intel_pt_pmu->type) {
+			*res = intel_pt_masked_bits(mask, evsel->attr.config);
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
+				  struct perf_evlist *evlist)
+{
+	u64 val;
+	int err, topa_multiple_entries;
+	size_t psb_period;
+
+	if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
+				"%d", &topa_multiple_entries) != 1)
+		topa_multiple_entries = 0;
+
+	/*
+	 * Use caps/topa_multiple_entries to indicate early hardware that had
+	 * extra frequent PSBs.
+	 */
+	if (!topa_multiple_entries) {
+		psb_period = 256;
+		goto out;
+	}
+
+	err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
+	if (err)
+		val = 0;
+
+	psb_period = 1 << (val + 11);
+out:
+	pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
+	return psb_period;
+}
+
+static int intel_pt_pick_bit(int bits, int target)
+{
+	int pos, pick = -1;
+
+	for (pos = 0; bits; bits >>= 1, pos++) {
+		if (bits & 1) {
+			if (pos <= target || pick < 0)
+				pick = pos;
+			if (pos >= target)
+				break;
+		}
+	}
+
+	return pick;
+}
+
+static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
+{
+	char buf[256];
+	int mtc, mtc_periods = 0, mtc_period;
+	int psb_cyc, psb_periods, psb_period;
+	int pos = 0;
+	u64 config;
+	char c;
+
+	pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
+
+	if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
+				&mtc) != 1)
+		mtc = 1;
+
+	if (mtc) {
+		if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
+					&mtc_periods) != 1)
+			mtc_periods = 0;
+		if (mtc_periods) {
+			mtc_period = intel_pt_pick_bit(mtc_periods, 3);
+			pos += scnprintf(buf + pos, sizeof(buf) - pos,
+					 ",mtc,mtc_period=%d", mtc_period);
+		}
+	}
+
+	if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
+				&psb_cyc) != 1)
+		psb_cyc = 1;
+
+	if (psb_cyc && mtc_periods) {
+		if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
+					&psb_periods) != 1)
+			psb_periods = 0;
+		if (psb_periods) {
+			psb_period = intel_pt_pick_bit(psb_periods, 3);
+			pos += scnprintf(buf + pos, sizeof(buf) - pos,
+					 ",psb_period=%d", psb_period);
+		}
+	}
+
+	if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
+	    perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
+		pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
+
+	pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
+
+	intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+
+	return config;
+}
+
+static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
+					   struct record_opts *opts,
+					   const char *str)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	unsigned long long snapshot_size = 0;
+	char *endptr;
+
+	if (str) {
+		snapshot_size = strtoull(str, &endptr, 0);
+		if (*endptr || snapshot_size > SIZE_MAX)
+			return -1;
+	}
+
+	opts->auxtrace_snapshot_mode = true;
+	opts->auxtrace_snapshot_size = snapshot_size;
+
+	ptr->snapshot_size = snapshot_size;
+
+	return 0;
+}
+
+struct perf_event_attr *
+intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
+{
+	struct perf_event_attr *attr;
+
+	attr = zalloc(sizeof(struct perf_event_attr));
+	if (!attr)
+		return NULL;
+
+	attr->config = intel_pt_default_config(intel_pt_pmu);
+
+	intel_pt_pmu->selectable = true;
+
+	return attr;
+}
+
+static const char *intel_pt_find_filter(struct perf_evlist *evlist,
+					struct perf_pmu *intel_pt_pmu)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == intel_pt_pmu->type)
+			return evsel->filter;
+	}
+
+	return NULL;
+}
+
+static size_t intel_pt_filter_bytes(const char *filter)
+{
+	size_t len = filter ? strlen(filter) : 0;
+
+	return len ? roundup(len + 1, 8) : 0;
+}
+
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
+
+	ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
+			 intel_pt_filter_bytes(filter);
+
+	return ptr->priv_size;
+}
+
+static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
+{
+	unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+	__get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+	*n = ebx;
+	*d = eax;
+}
+
+static int intel_pt_info_fill(struct auxtrace_record *itr,
+			      struct perf_session *session,
+			      struct auxtrace_info_event *auxtrace_info,
+			      size_t priv_size)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+	struct perf_event_mmap_page *pc;
+	struct perf_tsc_conversion tc = { .time_mult = 0, };
+	bool cap_user_time_zero = false, per_cpu_mmaps;
+	u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
+	u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+	unsigned long max_non_turbo_ratio;
+	size_t filter_str_len;
+	const char *filter;
+	u64 *info;
+	int err;
+
+	if (priv_size != ptr->priv_size)
+		return -EINVAL;
+
+	intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+	intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
+			     &noretcomp_bit);
+	intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+	mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
+					      "mtc_period");
+	intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+
+	intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
+
+	if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
+				"%lu", &max_non_turbo_ratio) != 1)
+		max_non_turbo_ratio = 0;
+
+	filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
+	filter_str_len = filter ? strlen(filter) : 0;
+
+	if (!session->evlist->nr_mmaps)
+		return -EINVAL;
+
+	pc = session->evlist->mmap[0].base;
+	if (pc) {
+		err = perf_read_tsc_conversion(pc, &tc);
+		if (err) {
+			if (err != -EOPNOTSUPP)
+				return err;
+		} else {
+			cap_user_time_zero = tc.time_mult != 0;
+		}
+		if (!cap_user_time_zero)
+			ui__warning("Intel Processor Trace: TSC not available\n");
+	}
+
+	per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
+
+	auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
+	auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
+	auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
+	auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
+	auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
+	auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+	auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
+	auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
+	auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
+	auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
+	auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
+	auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
+	auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
+	auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
+	auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
+	auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+	auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
+	auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
+
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+
+	if (filter_str_len) {
+		size_t len = intel_pt_filter_bytes(filter);
+
+		strncpy((char *)info, filter, len);
+		info += len >> 3;
+	}
+
+	return 0;
+}
+
+static int intel_pt_track_switches(struct perf_evlist *evlist)
+{
+	const char *sched_switch = "sched:sched_switch";
+	struct perf_evsel *evsel;
+	int err;
+
+	if (!perf_evlist__can_select_event(evlist, sched_switch))
+		return -EPERM;
+
+	err = parse_events(evlist, sched_switch, NULL);
+	if (err) {
+		pr_debug2("%s: failed to parse %s, error %d\n",
+			  __func__, sched_switch, err);
+		return err;
+	}
+
+	evsel = perf_evlist__last(evlist);
+
+	perf_evsel__set_sample_bit(evsel, CPU);
+	perf_evsel__set_sample_bit(evsel, TIME);
+
+	evsel->system_wide = true;
+	evsel->no_aux_samples = true;
+	evsel->immediate = true;
+
+	return 0;
+}
+
+static void intel_pt_valid_str(char *str, size_t len, u64 valid)
+{
+	unsigned int val, last = 0, state = 1;
+	int p = 0;
+
+	str[0] = '\0';
+
+	for (val = 0; val <= 64; val++, valid >>= 1) {
+		if (valid & 1) {
+			last = val;
+			switch (state) {
+			case 0:
+				p += scnprintf(str + p, len - p, ",");
+				/* Fall through */
+			case 1:
+				p += scnprintf(str + p, len - p, "%u", val);
+				state = 2;
+				break;
+			case 2:
+				state = 3;
+				break;
+			case 3:
+				state = 4;
+				break;
+			default:
+				break;
+			}
+		} else {
+			switch (state) {
+			case 3:
+				p += scnprintf(str + p, len - p, ",%u", last);
+				state = 0;
+				break;
+			case 4:
+				p += scnprintf(str + p, len - p, "-%u", last);
+				state = 0;
+				break;
+			default:
+				break;
+			}
+			if (state != 1)
+				state = 0;
+		}
+	}
+}
+
+static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
+				    const char *caps, const char *name,
+				    const char *supported, u64 config)
+{
+	char valid_str[256];
+	unsigned int shift;
+	unsigned long long valid;
+	u64 bits;
+	int ok;
+
+	if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
+		valid = 0;
+
+	if (supported &&
+	    perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
+		valid = 0;
+
+	valid |= 1;
+
+	bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
+
+	config &= bits;
+
+	for (shift = 0; bits && !(bits & 1); shift++)
+		bits >>= 1;
+
+	config >>= shift;
+
+	if (config > 63)
+		goto out_err;
+
+	if (valid & (1 << config))
+		return 0;
+out_err:
+	intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
+	pr_err("Invalid %s for %s. Valid values are: %s\n",
+	       name, INTEL_PT_PMU_NAME, valid_str);
+	return -EINVAL;
+}
+
+static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
+				    struct perf_evsel *evsel)
+{
+	int err;
+
+	if (!evsel)
+		return 0;
+
+	err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
+				       "cyc_thresh", "caps/psb_cyc",
+				       evsel->attr.config);
+	if (err)
+		return err;
+
+	err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
+				       "mtc_period", "caps/mtc",
+				       evsel->attr.config);
+	if (err)
+		return err;
+
+	return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
+					"psb_period", "caps/psb_cyc",
+					evsel->attr.config);
+}
+
+static int intel_pt_recording_options(struct auxtrace_record *itr,
+				      struct perf_evlist *evlist,
+				      struct record_opts *opts)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+	bool have_timing_info, need_immediate = false;
+	struct perf_evsel *evsel, *intel_pt_evsel = NULL;
+	const struct cpu_map *cpus = evlist->cpus;
+	bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+	u64 tsc_bit;
+	int err;
+
+	ptr->evlist = evlist;
+	ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == intel_pt_pmu->type) {
+			if (intel_pt_evsel) {
+				pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
+				return -EINVAL;
+			}
+			evsel->attr.freq = 0;
+			evsel->attr.sample_period = 1;
+			intel_pt_evsel = evsel;
+			opts->full_auxtrace = true;
+		}
+	}
+
+	if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+		pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
+		return -EINVAL;
+	}
+
+	if (opts->use_clockid) {
+		pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
+		return -EINVAL;
+	}
+
+	if (!opts->full_auxtrace)
+		return 0;
+
+	err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
+	if (err)
+		return err;
+
+	/* Set default sizes for snapshot mode */
+	if (opts->auxtrace_snapshot_mode) {
+		size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
+
+		if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+			if (privileged) {
+				opts->auxtrace_mmap_pages = MiB(4) / page_size;
+			} else {
+				opts->auxtrace_mmap_pages = KiB(128) / page_size;
+				if (opts->mmap_pages == UINT_MAX)
+					opts->mmap_pages = KiB(256) / page_size;
+			}
+		} else if (!opts->auxtrace_mmap_pages && !privileged &&
+			   opts->mmap_pages == UINT_MAX) {
+			opts->mmap_pages = KiB(256) / page_size;
+		}
+		if (!opts->auxtrace_snapshot_size)
+			opts->auxtrace_snapshot_size =
+				opts->auxtrace_mmap_pages * (size_t)page_size;
+		if (!opts->auxtrace_mmap_pages) {
+			size_t sz = opts->auxtrace_snapshot_size;
+
+			sz = round_up(sz, page_size) / page_size;
+			opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+		}
+		if (opts->auxtrace_snapshot_size >
+				opts->auxtrace_mmap_pages * (size_t)page_size) {
+			pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+			       opts->auxtrace_snapshot_size,
+			       opts->auxtrace_mmap_pages * (size_t)page_size);
+			return -EINVAL;
+		}
+		if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+			return -EINVAL;
+		}
+		pr_debug2("Intel PT snapshot size: %zu\n",
+			  opts->auxtrace_snapshot_size);
+		if (psb_period &&
+		    opts->auxtrace_snapshot_size <= psb_period +
+						  INTEL_PT_PSB_PERIOD_NEAR)
+			ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
+				    opts->auxtrace_snapshot_size, psb_period);
+	}
+
+	/* Set default sizes for full trace mode */
+	if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+	}
+
+	/* Validate auxtrace_mmap_pages */
+	if (opts->auxtrace_mmap_pages) {
+		size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+		size_t min_sz;
+
+		if (opts->auxtrace_snapshot_mode)
+			min_sz = KiB(4);
+		else
+			min_sz = KiB(8);
+
+		if (sz < min_sz || !is_power_of_2(sz)) {
+			pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
+			       min_sz / 1024);
+			return -EINVAL;
+		}
+	}
+
+	intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+
+	if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
+		have_timing_info = true;
+	else
+		have_timing_info = false;
+
+	/*
+	 * Per-cpu recording needs sched_switch events to distinguish different
+	 * threads.
+	 */
+	if (have_timing_info && !cpu_map__empty(cpus)) {
+		if (perf_can_record_switch_events()) {
+			bool cpu_wide = !target__none(&opts->target) &&
+					!target__has_task(&opts->target);
+
+			if (!cpu_wide && perf_can_record_cpu_wide()) {
+				struct perf_evsel *switch_evsel;
+
+				err = parse_events(evlist, "dummy:u", NULL);
+				if (err)
+					return err;
+
+				switch_evsel = perf_evlist__last(evlist);
+
+				switch_evsel->attr.freq = 0;
+				switch_evsel->attr.sample_period = 1;
+				switch_evsel->attr.context_switch = 1;
+
+				switch_evsel->system_wide = true;
+				switch_evsel->no_aux_samples = true;
+				switch_evsel->immediate = true;
+
+				perf_evsel__set_sample_bit(switch_evsel, TID);
+				perf_evsel__set_sample_bit(switch_evsel, TIME);
+				perf_evsel__set_sample_bit(switch_evsel, CPU);
+				perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
+
+				opts->record_switch_events = false;
+				ptr->have_sched_switch = 3;
+			} else {
+				opts->record_switch_events = true;
+				need_immediate = true;
+				if (cpu_wide)
+					ptr->have_sched_switch = 3;
+				else
+					ptr->have_sched_switch = 2;
+			}
+		} else {
+			err = intel_pt_track_switches(evlist);
+			if (err == -EPERM)
+				pr_debug2("Unable to select sched:sched_switch\n");
+			else if (err)
+				return err;
+			else
+				ptr->have_sched_switch = 1;
+		}
+	}
+
+	if (intel_pt_evsel) {
+		/*
+		 * To obtain the auxtrace buffer file descriptor, the auxtrace
+		 * event must come first.
+		 */
+		perf_evlist__to_front(evlist, intel_pt_evsel);
+		/*
+		 * In the case of per-cpu mmaps, we need the CPU on the
+		 * AUX event.
+		 */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
+	}
+
+	/* Add dummy event to keep tracking */
+	if (opts->full_auxtrace) {
+		struct perf_evsel *tracking_evsel;
+
+		err = parse_events(evlist, "dummy:u", NULL);
+		if (err)
+			return err;
+
+		tracking_evsel = perf_evlist__last(evlist);
+
+		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+		tracking_evsel->attr.freq = 0;
+		tracking_evsel->attr.sample_period = 1;
+
+		tracking_evsel->no_aux_samples = true;
+		if (need_immediate)
+			tracking_evsel->immediate = true;
+
+		/* In per-cpu case, always need the time of mmap events etc */
+		if (!cpu_map__empty(cpus)) {
+			perf_evsel__set_sample_bit(tracking_evsel, TIME);
+			/* And the CPU for switch events */
+			perf_evsel__set_sample_bit(tracking_evsel, CPU);
+		}
+		perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+	}
+
+	/*
+	 * Warn the user when we do not have enough information to decode i.e.
+	 * per-cpu with no sched_switch (except workload-only).
+	 */
+	if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
+	    !target__none(&opts->target))
+		ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
+
+	return 0;
+}
+
+static int intel_pt_snapshot_start(struct auxtrace_record *itr)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->intel_pt_pmu->type)
+			return perf_evsel__disable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->intel_pt_pmu->type)
+			return perf_evsel__enable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
+{
+	const size_t sz = sizeof(struct intel_pt_snapshot_ref);
+	int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
+	struct intel_pt_snapshot_ref *refs;
+
+	if (!new_cnt)
+		new_cnt = 16;
+
+	while (new_cnt <= idx)
+		new_cnt *= 2;
+
+	refs = calloc(new_cnt, sz);
+	if (!refs)
+		return -ENOMEM;
+
+	memcpy(refs, ptr->snapshot_refs, cnt * sz);
+
+	ptr->snapshot_refs = refs;
+	ptr->snapshot_ref_cnt = new_cnt;
+
+	return 0;
+}
+
+static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
+{
+	int i;
+
+	for (i = 0; i < ptr->snapshot_ref_cnt; i++)
+		zfree(&ptr->snapshot_refs[i].ref_buf);
+	zfree(&ptr->snapshot_refs);
+}
+
+static void intel_pt_recording_free(struct auxtrace_record *itr)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+
+	intel_pt_free_snapshot_refs(ptr);
+	free(ptr);
+}
+
+static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
+				       size_t snapshot_buf_size)
+{
+	size_t ref_buf_size = ptr->snapshot_ref_buf_size;
+	void *ref_buf;
+
+	ref_buf = zalloc(ref_buf_size);
+	if (!ref_buf)
+		return -ENOMEM;
+
+	ptr->snapshot_refs[idx].ref_buf = ref_buf;
+	ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
+
+	return 0;
+}
+
+static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
+					     size_t snapshot_buf_size)
+{
+	const size_t max_size = 256 * 1024;
+	size_t buf_size = 0, psb_period;
+
+	if (ptr->snapshot_size <= 64 * 1024)
+		return 0;
+
+	psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
+	if (psb_period)
+		buf_size = psb_period * 2;
+
+	if (!buf_size || buf_size > max_size)
+		buf_size = max_size;
+
+	if (buf_size >= snapshot_buf_size)
+		return 0;
+
+	if (buf_size >= ptr->snapshot_size / 2)
+		return 0;
+
+	return buf_size;
+}
+
+static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
+				  size_t snapshot_buf_size)
+{
+	if (ptr->snapshot_init_done)
+		return 0;
+
+	ptr->snapshot_init_done = true;
+
+	ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
+							snapshot_buf_size);
+
+	return 0;
+}
+
+/**
+ * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
+ * @buf1: first buffer
+ * @compare_size: number of bytes to compare
+ * @buf2: second buffer (a circular buffer)
+ * @offs2: offset in second buffer
+ * @buf2_size: size of second buffer
+ *
+ * The comparison allows for the possibility that the bytes to compare in the
+ * circular buffer are not contiguous.  It is assumed that @compare_size <=
+ * @buf2_size.  This function returns %false if the bytes are identical, %true
+ * otherwise.
+ */
+static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
+				     void *buf2, size_t offs2, size_t buf2_size)
+{
+	size_t end2 = offs2 + compare_size, part_size;
+
+	if (end2 <= buf2_size)
+		return memcmp(buf1, buf2 + offs2, compare_size);
+
+	part_size = end2 - buf2_size;
+	if (memcmp(buf1, buf2 + offs2, part_size))
+		return true;
+
+	compare_size -= part_size;
+
+	return memcmp(buf1 + part_size, buf2, compare_size);
+}
+
+static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
+				 size_t ref_size, size_t buf_size,
+				 void *data, size_t head)
+{
+	size_t ref_end = ref_offset + ref_size;
+
+	if (ref_end > buf_size) {
+		if (head > ref_offset || head < ref_end - buf_size)
+			return true;
+	} else if (head > ref_offset && head < ref_end) {
+		return true;
+	}
+
+	return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
+					buf_size);
+}
+
+static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
+			      void *data, size_t head)
+{
+	if (head >= ref_size) {
+		memcpy(ref_buf, data + head - ref_size, ref_size);
+	} else {
+		memcpy(ref_buf, data, head);
+		ref_size -= head;
+		memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
+	}
+}
+
+static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
+			     struct auxtrace_mmap *mm, unsigned char *data,
+			     u64 head)
+{
+	struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
+	bool wrapped;
+
+	wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
+				       ptr->snapshot_ref_buf_size, mm->len,
+				       data, head);
+
+	intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
+			  data, head);
+
+	return wrapped;
+}
+
+static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
+{
+	int i, a, b;
+
+	b = buf_size >> 3;
+	a = b - 512;
+	if (a < 0)
+		a = 0;
+
+	for (i = a; i < b; i++) {
+		if (data[i])
+			return true;
+	}
+
+	return false;
+}
+
+static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
+				  struct auxtrace_mmap *mm, unsigned char *data,
+				  u64 *head, u64 *old)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	bool wrapped;
+	int err;
+
+	pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+		  __func__, idx, (size_t)*old, (size_t)*head);
+
+	err = intel_pt_snapshot_init(ptr, mm->len);
+	if (err)
+		goto out_err;
+
+	if (idx >= ptr->snapshot_ref_cnt) {
+		err = intel_pt_alloc_snapshot_refs(ptr, idx);
+		if (err)
+			goto out_err;
+	}
+
+	if (ptr->snapshot_ref_buf_size) {
+		if (!ptr->snapshot_refs[idx].ref_buf) {
+			err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
+			if (err)
+				goto out_err;
+		}
+		wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
+	} else {
+		wrapped = ptr->snapshot_refs[idx].wrapped;
+		if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
+			ptr->snapshot_refs[idx].wrapped = true;
+			wrapped = true;
+		}
+	}
+
+	/*
+	 * In full trace mode 'head' continually increases.  However in snapshot
+	 * mode 'head' is an offset within the buffer.  Here 'old' and 'head'
+	 * are adjusted to match the full trace case which expects that 'old' is
+	 * always less than 'head'.
+	 */
+	if (wrapped) {
+		*old = *head;
+		*head += mm->len;
+	} else {
+		if (mm->mask)
+			*old &= mm->mask;
+		else
+			*old %= mm->len;
+		if (*old > *head)
+			*head += mm->len;
+	}
+
+	pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+		  __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+	return 0;
+
+out_err:
+	pr_err("%s: failed, error %d\n", __func__, err);
+	return err;
+}
+
+static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	return rdtsc();
+}
+
+static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->intel_pt_pmu->type)
+			return perf_evlist__enable_event_idx(ptr->evlist, evsel,
+							     idx);
+	}
+	return -EINVAL;
+}
+
+struct auxtrace_record *intel_pt_recording_init(int *err)
+{
+	struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+	struct intel_pt_recording *ptr;
+
+	if (!intel_pt_pmu)
+		return NULL;
+
+	if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+		*err = -errno;
+		return NULL;
+	}
+
+	ptr = zalloc(sizeof(struct intel_pt_recording));
+	if (!ptr) {
+		*err = -ENOMEM;
+		return NULL;
+	}
+
+	ptr->intel_pt_pmu = intel_pt_pmu;
+	ptr->itr.recording_options = intel_pt_recording_options;
+	ptr->itr.info_priv_size = intel_pt_info_priv_size;
+	ptr->itr.info_fill = intel_pt_info_fill;
+	ptr->itr.free = intel_pt_recording_free;
+	ptr->itr.snapshot_start = intel_pt_snapshot_start;
+	ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
+	ptr->itr.find_snapshot = intel_pt_find_snapshot;
+	ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
+	ptr->itr.reference = intel_pt_reference;
+	ptr->itr.read_finish = intel_pt_read_finish;
+	return &ptr->itr;
+}
diff --git a/arch/x86/util/kvm-stat.c b/arch/x86/util/kvm-stat.c
new file mode 100644
index 0000000..b32409a
--- /dev/null
+++ b/arch/x86/util/kvm-stat.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include "../../util/kvm-stat.h"
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
+
+define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
+define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
+
+static struct kvm_events_ops exit_events = {
+	.is_begin_event = exit_event_begin,
+	.is_end_event = exit_event_end,
+	.decode_key = exit_event_decode_key,
+	.name = "VM-EXIT"
+};
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
+/*
+ * For the mmio events, we treat:
+ * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
+ * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
+ */
+static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
+			       struct event_key *key)
+{
+	key->key  = perf_evsel__intval(evsel, sample, "gpa");
+	key->info = perf_evsel__intval(evsel, sample, "type");
+}
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+static bool mmio_event_begin(struct perf_evsel *evsel,
+			     struct perf_sample *sample, struct event_key *key)
+{
+	/* MMIO read begin event in kernel. */
+	if (kvm_exit_event(evsel))
+		return true;
+
+	/* MMIO write begin event in kernel. */
+	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
+	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
+		mmio_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+
+static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
+			   struct event_key *key)
+{
+	/* MMIO write end event in kernel. */
+	if (kvm_entry_event(evsel))
+		return true;
+
+	/* MMIO read end event in kernel.*/
+	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
+	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
+		mmio_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+
+static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+				  struct event_key *key,
+				  char *decode)
+{
+	scnprintf(decode, decode_str_len, "%#lx:%s",
+		  (unsigned long)key->key,
+		  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
+}
+
+static struct kvm_events_ops mmio_events = {
+	.is_begin_event = mmio_event_begin,
+	.is_end_event = mmio_event_end,
+	.decode_key = mmio_event_decode_key,
+	.name = "MMIO Access"
+};
+
+ /* The time of emulation pio access is from kvm_pio to kvm_entry. */
+static void ioport_event_get_key(struct perf_evsel *evsel,
+				 struct perf_sample *sample,
+				 struct event_key *key)
+{
+	key->key  = perf_evsel__intval(evsel, sample, "port");
+	key->info = perf_evsel__intval(evsel, sample, "rw");
+}
+
+static bool ioport_event_begin(struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       struct event_key *key)
+{
+	if (!strcmp(evsel->name, "kvm:kvm_pio")) {
+		ioport_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+
+static bool ioport_event_end(struct perf_evsel *evsel,
+			     struct perf_sample *sample __maybe_unused,
+			     struct event_key *key __maybe_unused)
+{
+	return kvm_entry_event(evsel);
+}
+
+static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+				    struct event_key *key,
+				    char *decode)
+{
+	scnprintf(decode, decode_str_len, "%#llx:%s",
+		  (unsigned long long)key->key,
+		  key->info ? "POUT" : "PIN");
+}
+
+static struct kvm_events_ops ioport_events = {
+	.is_begin_event = ioport_event_begin,
+	.is_end_event = ioport_event_end,
+	.decode_key = ioport_event_decode_key,
+	.name = "IO Port Access"
+};
+
+const char *kvm_events_tp[] = {
+	"kvm:kvm_entry",
+	"kvm:kvm_exit",
+	"kvm:kvm_mmio",
+	"kvm:kvm_pio",
+	NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+	{ .name = "vmexit", .ops = &exit_events },
+	{ .name = "mmio", .ops = &mmio_events },
+	{ .name = "ioport", .ops = &ioport_events },
+	{ NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+	"HLT",
+	NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
+{
+	if (strstr(cpuid, "Intel")) {
+		kvm->exit_reasons = vmx_exit_reasons;
+		kvm->exit_reasons_isa = "VMX";
+	} else if (strstr(cpuid, "AMD")) {
+		kvm->exit_reasons = svm_exit_reasons;
+		kvm->exit_reasons_isa = "SVM";
+	} else
+		return -ENOTSUP;
+
+	return 0;
+}
diff --git a/arch/x86/util/perf_regs.c b/arch/x86/util/perf_regs.c
new file mode 100644
index 0000000..4b2caf6
--- /dev/null
+++ b/arch/x86/util/perf_regs.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include <regex.h>
+
+#include "../../perf.h"
+#include "../../util/util.h"
+#include "../../util/perf_regs.h"
+#include "../../util/debug.h"
+
+const struct sample_reg sample_reg_masks[] = {
+	SMPL_REG(AX, PERF_REG_X86_AX),
+	SMPL_REG(BX, PERF_REG_X86_BX),
+	SMPL_REG(CX, PERF_REG_X86_CX),
+	SMPL_REG(DX, PERF_REG_X86_DX),
+	SMPL_REG(SI, PERF_REG_X86_SI),
+	SMPL_REG(DI, PERF_REG_X86_DI),
+	SMPL_REG(BP, PERF_REG_X86_BP),
+	SMPL_REG(SP, PERF_REG_X86_SP),
+	SMPL_REG(IP, PERF_REG_X86_IP),
+	SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
+	SMPL_REG(CS, PERF_REG_X86_CS),
+	SMPL_REG(SS, PERF_REG_X86_SS),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+	SMPL_REG(R8, PERF_REG_X86_R8),
+	SMPL_REG(R9, PERF_REG_X86_R9),
+	SMPL_REG(R10, PERF_REG_X86_R10),
+	SMPL_REG(R11, PERF_REG_X86_R11),
+	SMPL_REG(R12, PERF_REG_X86_R12),
+	SMPL_REG(R13, PERF_REG_X86_R13),
+	SMPL_REG(R14, PERF_REG_X86_R14),
+	SMPL_REG(R15, PERF_REG_X86_R15),
+#endif
+	SMPL_REG_END
+};
+
+struct sdt_name_reg {
+	const char *sdt_name;
+	const char *uprobe_name;
+};
+#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
+#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
+
+static const struct sdt_name_reg sdt_reg_tbl[] = {
+	SDT_NAME_REG(eax, ax),
+	SDT_NAME_REG(rax, ax),
+	SDT_NAME_REG(al,  ax),
+	SDT_NAME_REG(ah,  ax),
+	SDT_NAME_REG(ebx, bx),
+	SDT_NAME_REG(rbx, bx),
+	SDT_NAME_REG(bl,  bx),
+	SDT_NAME_REG(bh,  bx),
+	SDT_NAME_REG(ecx, cx),
+	SDT_NAME_REG(rcx, cx),
+	SDT_NAME_REG(cl,  cx),
+	SDT_NAME_REG(ch,  cx),
+	SDT_NAME_REG(edx, dx),
+	SDT_NAME_REG(rdx, dx),
+	SDT_NAME_REG(dl,  dx),
+	SDT_NAME_REG(dh,  dx),
+	SDT_NAME_REG(esi, si),
+	SDT_NAME_REG(rsi, si),
+	SDT_NAME_REG(sil, si),
+	SDT_NAME_REG(edi, di),
+	SDT_NAME_REG(rdi, di),
+	SDT_NAME_REG(dil, di),
+	SDT_NAME_REG(ebp, bp),
+	SDT_NAME_REG(rbp, bp),
+	SDT_NAME_REG(bpl, bp),
+	SDT_NAME_REG(rsp, sp),
+	SDT_NAME_REG(esp, sp),
+	SDT_NAME_REG(spl, sp),
+
+	/* rNN registers */
+	SDT_NAME_REG(r8b,  r8),
+	SDT_NAME_REG(r8w,  r8),
+	SDT_NAME_REG(r8d,  r8),
+	SDT_NAME_REG(r9b,  r9),
+	SDT_NAME_REG(r9w,  r9),
+	SDT_NAME_REG(r9d,  r9),
+	SDT_NAME_REG(r10b, r10),
+	SDT_NAME_REG(r10w, r10),
+	SDT_NAME_REG(r10d, r10),
+	SDT_NAME_REG(r11b, r11),
+	SDT_NAME_REG(r11w, r11),
+	SDT_NAME_REG(r11d, r11),
+	SDT_NAME_REG(r12b, r12),
+	SDT_NAME_REG(r12w, r12),
+	SDT_NAME_REG(r12d, r12),
+	SDT_NAME_REG(r13b, r13),
+	SDT_NAME_REG(r13w, r13),
+	SDT_NAME_REG(r13d, r13),
+	SDT_NAME_REG(r14b, r14),
+	SDT_NAME_REG(r14w, r14),
+	SDT_NAME_REG(r14d, r14),
+	SDT_NAME_REG(r15b, r15),
+	SDT_NAME_REG(r15w, r15),
+	SDT_NAME_REG(r15d, r15),
+	SDT_NAME_REG_END,
+};
+
+/*
+ * Perf only supports OP which is in  +/-NUM(REG)  form.
+ * Here plus-minus sign, NUM and parenthesis are optional,
+ * only REG is mandatory.
+ *
+ * SDT events also supports indirect addressing mode with a
+ * symbol as offset, scaled mode and constants in OP. But
+ * perf does not support them yet. Below are few examples.
+ *
+ * OP with scaled mode:
+ *     (%rax,%rsi,8)
+ *     10(%ras,%rsi,8)
+ *
+ * OP with indirect addressing mode:
+ *     check_action(%rip)
+ *     mp_+52(%rip)
+ *     44+mp_(%rip)
+ *
+ * OP with constant values:
+ *     $0
+ *     $123
+ *     $-1
+ */
+#define SDT_OP_REGEX  "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
+
+static regex_t sdt_op_regex;
+
+static int sdt_init_op_regex(void)
+{
+	static int initialized;
+	int ret = 0;
+
+	if (initialized)
+		return 0;
+
+	ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
+	if (ret < 0) {
+		pr_debug4("Regex compilation error.\n");
+		return ret;
+	}
+
+	initialized = 1;
+	return 0;
+}
+
+/*
+ * Max x86 register name length is 5(ex: %r15d). So, 6th char
+ * should always contain NULL. This helps to find register name
+ * length using strlen, insted of maintaing one more variable.
+ */
+#define SDT_REG_NAME_SIZE  6
+
+/*
+ * The uprobe parser does not support all gas register names;
+ * so, we have to replace them (ex. for x86_64: %rax -> %ax).
+ * Note: If register does not require renaming, just copy
+ * paste as it is, but don't leave it empty.
+ */
+static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
+{
+	int i = 0;
+
+	for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
+		if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
+			strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
+			return;
+		}
+	}
+
+	strncpy(uprobe_reg, sdt_reg, sdt_len);
+}
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+	char new_reg[SDT_REG_NAME_SIZE] = {0};
+	int new_len = 0, ret;
+	/*
+	 * rm[0]:  +/-NUM(REG)
+	 * rm[1]:  +/-
+	 * rm[2]:  NUM
+	 * rm[3]:  (
+	 * rm[4]:  REG
+	 * rm[5]:  )
+	 */
+	regmatch_t rm[6];
+	/*
+	 * Max prefix length is 2 as it may contains sign(+/-)
+	 * and displacement 0 (Both sign and displacement 0 are
+	 * optional so it may be empty). Use one more character
+	 * to hold last NULL so that strlen can be used to find
+	 * prefix length, instead of maintaing one more variable.
+	 */
+	char prefix[3] = {0};
+
+	ret = sdt_init_op_regex();
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * If unsupported OR does not match with regex OR
+	 * register name too long, skip it.
+	 */
+	if (strchr(old_op, ',') || strchr(old_op, '$') ||
+	    regexec(&sdt_op_regex, old_op, 6, rm, 0)   ||
+	    rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+		return SDT_ARG_SKIP;
+	}
+
+	/*
+	 * Prepare prefix.
+	 * If SDT OP has parenthesis but does not provide
+	 * displacement, add 0 for displacement.
+	 *     SDT         Uprobe     Prefix
+	 *     -----------------------------
+	 *     +24(%rdi)   +24(%di)   +
+	 *     24(%rdi)    +24(%di)   +
+	 *     %rdi        %di
+	 *     (%rdi)      +0(%di)    +0
+	 *     -80(%rbx)   -80(%bx)   -
+	 */
+	if (rm[3].rm_so != rm[3].rm_eo) {
+		if (rm[1].rm_so != rm[1].rm_eo)
+			prefix[0] = *(old_op + rm[1].rm_so);
+		else if (rm[2].rm_so != rm[2].rm_eo)
+			prefix[0] = '+';
+		else
+			strncpy(prefix, "+0", 2);
+	}
+
+	/* Rename register */
+	sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
+			    new_reg);
+
+	/* Prepare final OP which should be valid for uprobe_events */
+	new_len = strlen(prefix)              +
+		  (rm[2].rm_eo - rm[2].rm_so) +
+		  (rm[3].rm_eo - rm[3].rm_so) +
+		  strlen(new_reg)             +
+		  (rm[5].rm_eo - rm[5].rm_so) +
+		  1;					/* NULL */
+
+	*new_op = zalloc(new_len);
+	if (!*new_op)
+		return -ENOMEM;
+
+	scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
+		  strlen(prefix), prefix,
+		  (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+		  (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
+		  strlen(new_reg), new_reg,
+		  (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
+
+	return SDT_ARG_VALID;
+}
diff --git a/arch/x86/util/pmu.c b/arch/x86/util/pmu.c
new file mode 100644
index 0000000..63a74c3
--- /dev/null
+++ b/arch/x86/util/pmu.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/perf_event.h>
+
+#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+	if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
+		return intel_pt_pmu_default_config(pmu);
+	if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
+		pmu->selectable = true;
+#endif
+	return NULL;
+}
diff --git a/arch/x86/util/tsc.c b/arch/x86/util/tsc.c
new file mode 100644
index 0000000..06bae70
--- /dev/null
+++ b/arch/x86/util/tsc.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <errno.h>
+
+#include <linux/perf_event.h>
+
+#include "../../perf.h"
+#include <linux/types.h>
+#include "../../util/debug.h"
+#include "../../util/tsc.h"
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc)
+{
+	bool cap_user_time_zero;
+	u32 seq;
+	int i = 0;
+
+	while (1) {
+		seq = pc->lock;
+		rmb();
+		tc->time_mult = pc->time_mult;
+		tc->time_shift = pc->time_shift;
+		tc->time_zero = pc->time_zero;
+		cap_user_time_zero = pc->cap_user_time_zero;
+		rmb();
+		if (pc->lock == seq && !(seq & 1))
+			break;
+		if (++i > 10000) {
+			pr_debug("failed to get perf_event_mmap_page lock\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!cap_user_time_zero)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+u64 rdtsc(void)
+{
+	unsigned int low, high;
+
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+	return low | ((u64)high) << 32;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+				struct perf_tool *tool,
+				perf_event__handler_t process,
+				struct machine *machine)
+{
+	union perf_event event = {
+		.time_conv = {
+			.header = {
+				.type = PERF_RECORD_TIME_CONV,
+				.size = sizeof(struct time_conv_event),
+			},
+		},
+	};
+	struct perf_tsc_conversion tc;
+	int err;
+
+	if (!pc)
+		return 0;
+	err = perf_read_tsc_conversion(pc, &tc);
+	if (err == -EOPNOTSUPP)
+		return 0;
+	if (err)
+		return err;
+
+	pr_debug2("Synthesizing TSC conversion information\n");
+
+	event.time_conv.time_mult  = tc.time_mult;
+	event.time_conv.time_shift = tc.time_shift;
+	event.time_conv.time_zero  = tc.time_zero;
+
+	return process(tool, &event, NULL, machine);
+}
diff --git a/arch/x86/util/unwind-libdw.c b/arch/x86/util/unwind-libdw.c
new file mode 100644
index 0000000..fda8f42
--- /dev/null
+++ b/arch/x86/util/unwind-libdw.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[17];
+	unsigned nregs;
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_X86_##r);	\
+	val;							\
+})
+
+	if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
+		dwarf_regs[0] = REG(AX);
+		dwarf_regs[1] = REG(CX);
+		dwarf_regs[2] = REG(DX);
+		dwarf_regs[3] = REG(BX);
+		dwarf_regs[4] = REG(SP);
+		dwarf_regs[5] = REG(BP);
+		dwarf_regs[6] = REG(SI);
+		dwarf_regs[7] = REG(DI);
+		dwarf_regs[8] = REG(IP);
+		nregs = 9;
+	} else {
+		dwarf_regs[0]  = REG(AX);
+		dwarf_regs[1]  = REG(DX);
+		dwarf_regs[2]  = REG(CX);
+		dwarf_regs[3]  = REG(BX);
+		dwarf_regs[4]  = REG(SI);
+		dwarf_regs[5]  = REG(DI);
+		dwarf_regs[6]  = REG(BP);
+		dwarf_regs[7]  = REG(SP);
+		dwarf_regs[8]  = REG(R8);
+		dwarf_regs[9]  = REG(R9);
+		dwarf_regs[10] = REG(R10);
+		dwarf_regs[11] = REG(R11);
+		dwarf_regs[12] = REG(R12);
+		dwarf_regs[13] = REG(R13);
+		dwarf_regs[14] = REG(R14);
+		dwarf_regs[15] = REG(R15);
+		dwarf_regs[16] = REG(IP);
+		nregs = 17;
+	}
+
+	return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
+}
diff --git a/arch/x86/util/unwind-libunwind.c b/arch/x86/util/unwind-libunwind.c
new file mode 100644
index 0000000..05920e3
--- /dev/null
+++ b/arch/x86/util/unwind-libunwind.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+#endif
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+int LIBUNWIND__ARCH_REG_ID(int regnum)
+{
+	int id;
+
+	switch (regnum) {
+	case UNW_X86_64_RAX:
+		id = PERF_REG_X86_AX;
+		break;
+	case UNW_X86_64_RDX:
+		id = PERF_REG_X86_DX;
+		break;
+	case UNW_X86_64_RCX:
+		id = PERF_REG_X86_CX;
+		break;
+	case UNW_X86_64_RBX:
+		id = PERF_REG_X86_BX;
+		break;
+	case UNW_X86_64_RSI:
+		id = PERF_REG_X86_SI;
+		break;
+	case UNW_X86_64_RDI:
+		id = PERF_REG_X86_DI;
+		break;
+	case UNW_X86_64_RBP:
+		id = PERF_REG_X86_BP;
+		break;
+	case UNW_X86_64_RSP:
+		id = PERF_REG_X86_SP;
+		break;
+	case UNW_X86_64_R8:
+		id = PERF_REG_X86_R8;
+		break;
+	case UNW_X86_64_R9:
+		id = PERF_REG_X86_R9;
+		break;
+	case UNW_X86_64_R10:
+		id = PERF_REG_X86_R10;
+		break;
+	case UNW_X86_64_R11:
+		id = PERF_REG_X86_R11;
+		break;
+	case UNW_X86_64_R12:
+		id = PERF_REG_X86_R12;
+		break;
+	case UNW_X86_64_R13:
+		id = PERF_REG_X86_R13;
+		break;
+	case UNW_X86_64_R14:
+		id = PERF_REG_X86_R14;
+		break;
+	case UNW_X86_64_R15:
+		id = PERF_REG_X86_R15;
+		break;
+	case UNW_X86_64_RIP:
+		id = PERF_REG_X86_IP;
+		break;
+	default:
+		pr_err("unwind: invalid reg id %d\n", regnum);
+		return -EINVAL;
+	}
+
+	return id;
+}
+#else
+int LIBUNWIND__ARCH_REG_ID(int regnum)
+{
+	int id;
+
+	switch (regnum) {
+	case UNW_X86_EAX:
+		id = PERF_REG_X86_AX;
+		break;
+	case UNW_X86_EDX:
+		id = PERF_REG_X86_DX;
+		break;
+	case UNW_X86_ECX:
+		id = PERF_REG_X86_CX;
+		break;
+	case UNW_X86_EBX:
+		id = PERF_REG_X86_BX;
+		break;
+	case UNW_X86_ESI:
+		id = PERF_REG_X86_SI;
+		break;
+	case UNW_X86_EDI:
+		id = PERF_REG_X86_DI;
+		break;
+	case UNW_X86_EBP:
+		id = PERF_REG_X86_BP;
+		break;
+	case UNW_X86_ESP:
+		id = PERF_REG_X86_SP;
+		break;
+	case UNW_X86_EIP:
+		id = PERF_REG_X86_IP;
+		break;
+	default:
+		pr_err("unwind: invalid reg id %d\n", regnum);
+		return -EINVAL;
+	}
+
+	return id;
+}
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
diff --git a/arch/xtensa/Build b/arch/xtensa/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/arch/xtensa/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
new file mode 100644
index 0000000..7fbca17
--- /dev/null
+++ b/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/arch/xtensa/include/dwarf-regs-table.h b/arch/xtensa/include/dwarf-regs-table.h
new file mode 100644
index 0000000..d7c9f1f
--- /dev/null
+++ b/arch/xtensa/include/dwarf-regs-table.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const xtensa_regstr_tbl[] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+#endif
diff --git a/arch/xtensa/util/Build b/arch/xtensa/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/arch/xtensa/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/arch/xtensa/util/dwarf-regs.c b/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 0000000..4dba76b
--- /dev/null
+++ b/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+	return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
diff --git a/bench/Build b/bench/Build
new file mode 100644
index 0000000..60bf119
--- /dev/null
+++ b/bench/Build
@@ -0,0 +1,13 @@
+perf-y += sched-messaging.o
+perf-y += sched-pipe.o
+perf-y += mem-functions.o
+perf-y += futex-hash.o
+perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
+perf-y += futex-requeue.o
+perf-y += futex-lock-pi.o
+
+perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
+
+perf-$(CONFIG_NUMA) += numa.o
diff --git a/bench/bench.h b/bench/bench.h
new file mode 100644
index 0000000..6c9fcd7
--- /dev/null
+++ b/bench/bench.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BENCH_H
+#define BENCH_H
+
+/*
+ * The madvise transparent hugepage constants were added in glibc
+ * 2.13. For compatibility with older versions of glibc, define these
+ * tokens if they are not already defined.
+ *
+ * PA-RISC uses different madvise values from other architectures and
+ * needs to be special-cased.
+ */
+#ifdef __hppa__
+# ifndef MADV_HUGEPAGE
+#  define MADV_HUGEPAGE		67
+# endif
+# ifndef MADV_NOHUGEPAGE
+#  define MADV_NOHUGEPAGE	68
+# endif
+#else
+# ifndef MADV_HUGEPAGE
+#  define MADV_HUGEPAGE		14
+# endif
+# ifndef MADV_NOHUGEPAGE
+#  define MADV_NOHUGEPAGE	15
+# endif
+#endif
+
+int bench_numa(int argc, const char **argv);
+int bench_sched_messaging(int argc, const char **argv);
+int bench_sched_pipe(int argc, const char **argv);
+int bench_mem_memcpy(int argc, const char **argv);
+int bench_mem_memset(int argc, const char **argv);
+int bench_futex_hash(int argc, const char **argv);
+int bench_futex_wake(int argc, const char **argv);
+int bench_futex_wake_parallel(int argc, const char **argv);
+int bench_futex_requeue(int argc, const char **argv);
+/* pi futexes */
+int bench_futex_lock_pi(int argc, const char **argv);
+
+#define BENCH_FORMAT_DEFAULT_STR	"default"
+#define BENCH_FORMAT_DEFAULT		0
+#define BENCH_FORMAT_SIMPLE_STR		"simple"
+#define BENCH_FORMAT_SIMPLE		1
+
+#define BENCH_FORMAT_UNKNOWN		-1
+
+extern int bench_format;
+extern unsigned int bench_repeat;
+
+#endif
diff --git a/bench/futex-hash.c b/bench/futex-hash.c
new file mode 100644
index 0000000..9aa3a67
--- /dev/null
+++ b/bench/futex-hash.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
+ *
+ * This program is particularly useful for measuring the kernel's futex hash
+ * table/function implementation. In order for it to make sense, use with as
+ * many threads and futexes as possible.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <sys/time.h>
+
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+
+static unsigned int nthreads = 0;
+static unsigned int nsecs    = 10;
+/* amount of futexes per thread */
+static unsigned int nfutexes = 1024;
+static bool fshared = false, done = false, silent = false;
+static int futex_flag = 0;
+
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+struct worker {
+	int tid;
+	u_int32_t *futex;
+	pthread_t thread;
+	unsigned long ops;
+};
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('r', "runtime", &nsecs,    "Specify runtime (in seconds)"),
+	OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
+	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_hash_usage[] = {
+	"perf bench futex hash <options>",
+	NULL
+};
+
+static void *workerfn(void *arg)
+{
+	int ret;
+	struct worker *w = (struct worker *) arg;
+	unsigned int i;
+	unsigned long ops = w->ops; /* avoid cacheline bouncing */
+
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	do {
+		for (i = 0; i < nfutexes; i++, ops++) {
+			/*
+			 * We want the futex calls to fail in order to stress
+			 * the hashing of uaddr and not measure other steps,
+			 * such as internal waitqueue handling, thus enlarging
+			 * the critical region protected by hb->lock.
+			 */
+			ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
+			if (!silent &&
+			    (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
+				warn("Non-expected futex return call");
+		}
+	}  while (!done);
+
+	w->ops = ops;
+	return NULL;
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	/* inform all threads that we're done for the day */
+	done = true;
+	gettimeofday(&end, NULL);
+	timersub(&end, &start, &runtime);
+}
+
+static void print_summary(void)
+{
+	unsigned long avg = avg_stats(&throughput_stats);
+	double stddev = stddev_stats(&throughput_stats);
+
+	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+	       (int) runtime.tv_sec);
+}
+
+int bench_futex_hash(int argc, const char **argv)
+{
+	int ret = 0;
+	cpu_set_t cpuset;
+	struct sigaction act;
+	unsigned int i;
+	pthread_attr_t thread_attr;
+	struct worker *worker = NULL;
+	struct cpu_map *cpu;
+
+	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
+	if (argc) {
+		usage_with_options(bench_futex_hash_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		goto errmem;
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	if (!nthreads) /* default to the number of CPUs */
+		nthreads = cpu->nr;
+
+	worker = calloc(nthreads, sizeof(*worker));
+	if (!worker)
+		goto errmem;
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+	       getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+
+	init_stats(&throughput_stats);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	threads_starting = nthreads;
+	pthread_attr_init(&thread_attr);
+	gettimeofday(&start, NULL);
+	for (i = 0; i < nthreads; i++) {
+		worker[i].tid = i;
+		worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+		if (!worker[i].futex)
+			goto errmem;
+
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
+		if (ret)
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
+				     (void *)(struct worker *) &worker[i]);
+		if (ret)
+			err(EXIT_FAILURE, "pthread_create");
+
+	}
+	pthread_attr_destroy(&thread_attr);
+
+	pthread_mutex_lock(&thread_lock);
+	while (threads_starting)
+		pthread_cond_wait(&thread_parent, &thread_lock);
+	pthread_cond_broadcast(&thread_worker);
+	pthread_mutex_unlock(&thread_lock);
+
+	sleep(nsecs);
+	toggle_done(0, NULL, NULL);
+
+	for (i = 0; i < nthreads; i++) {
+		ret = pthread_join(worker[i].thread, NULL);
+		if (ret)
+			err(EXIT_FAILURE, "pthread_join");
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+
+	for (i = 0; i < nthreads; i++) {
+		unsigned long t = worker[i].ops/runtime.tv_sec;
+		update_stats(&throughput_stats, t);
+		if (!silent) {
+			if (nfutexes == 1)
+				printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
+				       worker[i].tid, &worker[i].futex[0], t);
+			else
+				printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
+				       worker[i].tid, &worker[i].futex[0],
+				       &worker[i].futex[nfutexes-1], t);
+		}
+
+		free(worker[i].futex);
+	}
+
+	print_summary();
+
+	free(worker);
+	free(cpu);
+	return ret;
+errmem:
+	err(EXIT_FAILURE, "calloc");
+}
diff --git a/bench/futex-lock-pi.c b/bench/futex-lock-pi.c
new file mode 100644
index 0000000..8e9c475
--- /dev/null
+++ b/bench/futex-lock-pi.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+struct worker {
+	int tid;
+	u_int32_t *futex;
+	pthread_t thread;
+	unsigned long ops;
+};
+
+static u_int32_t global_futex = 0;
+static struct worker *worker;
+static unsigned int nsecs = 10;
+static bool silent = false, multi = false;
+static bool done = false, fshared = false;
+static unsigned int nthreads = 0;
+static int futex_flag = 0;
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('r', "runtime", &nsecs,     "Specify runtime (in seconds)"),
+	OPT_BOOLEAN( 'M', "multi",   &multi,     "Use multiple futexes"),
+	OPT_BOOLEAN( 's', "silent",  &silent,    "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,   "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_lock_pi_usage[] = {
+	"perf bench futex lock-pi <options>",
+	NULL
+};
+
+static void print_summary(void)
+{
+	unsigned long avg = avg_stats(&throughput_stats);
+	double stddev = stddev_stats(&throughput_stats);
+
+	printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+	       !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+	       (int) runtime.tv_sec);
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	/* inform all threads that we're done for the day */
+	done = true;
+	gettimeofday(&end, NULL);
+	timersub(&end, &start, &runtime);
+}
+
+static void *workerfn(void *arg)
+{
+	struct worker *w = (struct worker *) arg;
+	unsigned long ops = w->ops;
+
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	do {
+		int ret;
+	again:
+		ret = futex_lock_pi(w->futex, NULL, futex_flag);
+
+		if (ret) { /* handle lock acquisition */
+			if (!silent)
+				warn("thread %d: Could not lock pi-lock for %p (%d)",
+				     w->tid, w->futex, ret);
+			if (done)
+				break;
+
+			goto again;
+		}
+
+		usleep(1);
+		ret = futex_unlock_pi(w->futex, futex_flag);
+		if (ret && !silent)
+			warn("thread %d: Could not unlock pi-lock for %p (%d)",
+			     w->tid, w->futex, ret);
+		ops++; /* account for thread's share of work */
+	}  while (!done);
+
+	w->ops = ops;
+	return NULL;
+}
+
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+			   struct cpu_map *cpu)
+{
+	cpu_set_t cpuset;
+	unsigned int i;
+
+	threads_starting = nthreads;
+
+	for (i = 0; i < nthreads; i++) {
+		worker[i].tid = i;
+
+		if (multi) {
+			worker[i].futex = calloc(1, sizeof(u_int32_t));
+			if (!worker[i].futex)
+				err(EXIT_FAILURE, "calloc");
+		} else
+			worker[i].futex = &global_futex;
+
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+int bench_futex_lock_pi(int argc, const char **argv)
+{
+	int ret = 0;
+	unsigned int i;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
+
+	argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
+	if (argc)
+		goto err;
+
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	if (!nthreads)
+		nthreads = cpu->nr;
+
+	worker = calloc(nthreads, sizeof(*worker));
+	if (!worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
+	       getpid(), nthreads, nsecs);
+
+	init_stats(&throughput_stats);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	threads_starting = nthreads;
+	pthread_attr_init(&thread_attr);
+	gettimeofday(&start, NULL);
+
+	create_threads(worker, thread_attr, cpu);
+	pthread_attr_destroy(&thread_attr);
+
+	pthread_mutex_lock(&thread_lock);
+	while (threads_starting)
+		pthread_cond_wait(&thread_parent, &thread_lock);
+	pthread_cond_broadcast(&thread_worker);
+	pthread_mutex_unlock(&thread_lock);
+
+	sleep(nsecs);
+	toggle_done(0, NULL, NULL);
+
+	for (i = 0; i < nthreads; i++) {
+		ret = pthread_join(worker[i].thread, NULL);
+		if (ret)
+			err(EXIT_FAILURE, "pthread_join");
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+
+	for (i = 0; i < nthreads; i++) {
+		unsigned long t = worker[i].ops/runtime.tv_sec;
+
+		update_stats(&throughput_stats, t);
+		if (!silent)
+			printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
+			       worker[i].tid, worker[i].futex, t);
+
+		if (multi)
+			free(worker[i].futex);
+	}
+
+	print_summary();
+
+	free(worker);
+	return ret;
+err:
+	usage_with_options(bench_futex_lock_pi_usage, options);
+	exit(EXIT_FAILURE);
+}
diff --git a/bench/futex-requeue.c b/bench/futex-requeue.c
new file mode 100644
index 0000000..fc692ef
--- /dev/null
+++ b/bench/futex-requeue.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-requeue: Block a bunch of threads on futex1 and requeue them
+ *                on futex2, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread
+ * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+static u_int32_t futex1 = 0, futex2 = 0;
+
+/*
+ * How many tasks to requeue at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nrequeue = 1;
+
+static pthread_t *worker;
+static bool done = false, silent = false, fshared = false;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats requeuetime_stats, requeued_stats;
+static unsigned int threads_starting, nthreads = 0;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads",  &nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
+	OPT_BOOLEAN( 's', "silent",   &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",   &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_requeue_usage[] = {
+	"perf bench futex requeue <options>",
+	NULL
+};
+
+static void print_summary(void)
+{
+	double requeuetime_avg = avg_stats(&requeuetime_stats);
+	double requeuetime_stddev = stddev_stats(&requeuetime_stats);
+	unsigned int requeued_avg = avg_stats(&requeued_stats);
+
+	printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
+	       requeued_avg,
+	       nthreads,
+	       requeuetime_avg / USEC_PER_MSEC,
+	       rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+}
+
+static void *workerfn(void *arg __maybe_unused)
+{
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	futex_wait(&futex1, 0, NULL, futex_flag);
+	return NULL;
+}
+
+static void block_threads(pthread_t *w,
+			  pthread_attr_t thread_attr, struct cpu_map *cpu)
+{
+	cpu_set_t cpuset;
+	unsigned int i;
+
+	threads_starting = nthreads;
+
+	/* create and block all threads */
+	for (i = 0; i < nthreads; i++) {
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	done = true;
+}
+
+int bench_futex_requeue(int argc, const char **argv)
+{
+	int ret = 0;
+	unsigned int i, j;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
+
+	argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
+	if (argc)
+		goto err;
+
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "cpu_map__new");
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	if (!nthreads)
+		nthreads = cpu->nr;
+
+	worker = calloc(nthreads, sizeof(*worker));
+	if (!worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	if (nrequeue > nthreads)
+		nrequeue = nthreads;
+
+	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
+	       "%d at a time.\n\n",  getpid(), nthreads,
+	       fshared ? "shared":"private", &futex1, &futex2, nrequeue);
+
+	init_stats(&requeued_stats);
+	init_stats(&requeuetime_stats);
+	pthread_attr_init(&thread_attr);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	for (j = 0; j < bench_repeat && !done; j++) {
+		unsigned int nrequeued = 0;
+		struct timeval start, end, runtime;
+
+		/* create, launch & block all threads */
+		block_threads(worker, thread_attr, cpu);
+
+		/* make sure all threads are already blocked */
+		pthread_mutex_lock(&thread_lock);
+		while (threads_starting)
+			pthread_cond_wait(&thread_parent, &thread_lock);
+		pthread_cond_broadcast(&thread_worker);
+		pthread_mutex_unlock(&thread_lock);
+
+		usleep(100000);
+
+		/* Ok, all threads are patiently blocked, start requeueing */
+		gettimeofday(&start, NULL);
+		while (nrequeued < nthreads) {
+			/*
+			 * Do not wakeup any tasks blocked on futex1, allowing
+			 * us to really measure futex_wait functionality.
+			 */
+			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
+						       nrequeue, futex_flag);
+		}
+
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &runtime);
+
+		update_stats(&requeued_stats, nrequeued);
+		update_stats(&requeuetime_stats, runtime.tv_usec);
+
+		if (!silent) {
+			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
+			       j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+		}
+
+		/* everybody should be blocked on futex2, wake'em up */
+		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
+		if (nthreads != nrequeued)
+			warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+
+		for (i = 0; i < nthreads; i++) {
+			ret = pthread_join(worker[i], NULL);
+			if (ret)
+				err(EXIT_FAILURE, "pthread_join");
+		}
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+	pthread_attr_destroy(&thread_attr);
+
+	print_summary();
+
+	free(worker);
+	return ret;
+err:
+	usage_with_options(bench_futex_requeue_usage, options);
+	exit(EXIT_FAILURE);
+}
diff --git a/bench/futex-wake-parallel.c b/bench/futex-wake-parallel.c
new file mode 100644
index 0000000..69d8fdc
--- /dev/null
+++ b/bench/futex-wake-parallel.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+#include "bench.h"
+#include <linux/compiler.h>
+#include "../util/debug.h"
+
+#ifndef HAVE_PTHREAD_BARRIER
+int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+	pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
+	return 0;
+}
+#else /* HAVE_PTHREAD_BARRIER */
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+struct thread_data {
+	pthread_t worker;
+	unsigned int nwoken;
+	struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static pthread_barrier_t barrier;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+	"perf bench futex wake-parallel <options>",
+	NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+	struct thread_data *waker = (struct thread_data *) arg;
+	struct timeval start, end;
+
+	pthread_barrier_wait(&barrier);
+
+	gettimeofday(&start, NULL);
+
+	waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+	if (waker->nwoken != nwakes)
+		warnx("couldn't wakeup all tasks (%d/%d)",
+		      waker->nwoken, nwakes);
+
+	gettimeofday(&end, NULL);
+	timersub(&end, &start, &waker->runtime);
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+	unsigned int i;
+
+	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+	pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+
+	/* create and block all threads */
+	for (i = 0; i < nwaking_threads; i++) {
+		/*
+		 * Thread creation order will impact per-thread latency
+		 * as it will affect the order to acquire the hb spinlock.
+		 * For now let the scheduler decide.
+		 */
+		if (pthread_create(&td[i].worker, &thread_attr,
+				   waking_workerfn, (void *)&td[i]))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+
+	pthread_barrier_wait(&barrier);
+
+	for (i = 0; i < nwaking_threads; i++)
+		if (pthread_join(td[i].worker, NULL))
+			err(EXIT_FAILURE, "pthread_join");
+
+	pthread_barrier_destroy(&barrier);
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	while (1) { /* handle spurious wakeups */
+		if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+			  struct cpu_map *cpu)
+{
+	cpu_set_t cpuset;
+	unsigned int i;
+
+	threads_starting = nblocked_threads;
+
+	/* create and block all threads */
+	for (i = 0; i < nblocked_threads; i++) {
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+	unsigned int i, wakeup_avg;
+	double waketime_avg, waketime_stddev;
+	struct stats __waketime_stats, __wakeup_stats;
+
+	init_stats(&__wakeup_stats);
+	init_stats(&__waketime_stats);
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+	}
+
+	waketime_avg = avg_stats(&__waketime_stats);
+	waketime_stddev = stddev_stats(&__waketime_stats);
+	wakeup_avg = avg_stats(&__wakeup_stats);
+
+	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+	       nblocked_threads, waketime_avg / USEC_PER_MSEC,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+	unsigned int wakeup_avg;
+	double waketime_avg, waketime_stddev;
+
+	waketime_avg = avg_stats(&waketime_stats);
+	waketime_stddev = stddev_stats(&waketime_stats);
+	wakeup_avg = avg_stats(&wakeup_stats);
+
+	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+	       wakeup_avg,
+	       nblocked_threads,
+	       waketime_avg / USEC_PER_MSEC,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+	unsigned int i;
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&wakeup_stats, waking_worker[i].nwoken);
+	}
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv)
+{
+	int ret = 0;
+	unsigned int i, j;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct thread_data *waking_worker;
+	struct cpu_map *cpu;
+
+	argc = parse_options(argc, argv, options,
+			     bench_futex_wake_parallel_usage, 0);
+	if (argc) {
+		usage_with_options(bench_futex_wake_parallel_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!nblocked_threads)
+		nblocked_threads = cpu->nr;
+
+	/* some sanity checks */
+	if (nwaking_threads > nblocked_threads || !nwaking_threads)
+		nwaking_threads = nblocked_threads;
+
+	if (nblocked_threads % nwaking_threads)
+		errx(EXIT_FAILURE, "Must be perfectly divisible");
+	/*
+	 * Each thread will wakeup nwakes tasks in
+	 * a single futex_wait call.
+	 */
+	nwakes = nblocked_threads/nwaking_threads;
+
+	blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+	if (!blocked_worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+	       "futex %p), %d threads waking up %d at a time.\n\n",
+	       getpid(), nblocked_threads, fshared ? "shared":"private",
+	       &futex, nwaking_threads, nwakes);
+
+	init_stats(&wakeup_stats);
+	init_stats(&waketime_stats);
+
+	pthread_attr_init(&thread_attr);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	for (j = 0; j < bench_repeat && !done; j++) {
+		waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+		if (!waking_worker)
+			err(EXIT_FAILURE, "calloc");
+
+		/* create, launch & block all threads */
+		block_threads(blocked_worker, thread_attr, cpu);
+
+		/* make sure all threads are already blocked */
+		pthread_mutex_lock(&thread_lock);
+		while (threads_starting)
+			pthread_cond_wait(&thread_parent, &thread_lock);
+		pthread_cond_broadcast(&thread_worker);
+		pthread_mutex_unlock(&thread_lock);
+
+		usleep(100000);
+
+		/* Ok, all threads are patiently blocked, start waking folks up */
+		wakeup_threads(waking_worker, thread_attr);
+
+		for (i = 0; i < nblocked_threads; i++) {
+			ret = pthread_join(blocked_worker[i], NULL);
+			if (ret)
+				err(EXIT_FAILURE, "pthread_join");
+		}
+
+		do_run_stats(waking_worker);
+		if (!silent)
+			print_run(waking_worker, j);
+
+		free(waking_worker);
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+	pthread_attr_destroy(&thread_attr);
+
+	print_summary();
+
+	free(blocked_worker);
+	return ret;
+}
+#endif /* HAVE_PTHREAD_BARRIER */
diff --git a/bench/futex-wake.c b/bench/futex-wake.c
new file mode 100644
index 0000000..e8181ad
--- /dev/null
+++ b/bench/futex-wake.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread wakeups
+ * in non-error situations:  all waiters are queued and all wake calls wakeup
+ * one or more tasks, and thus the waitqueue is never empty.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+/* all threads will block on the same futex */
+static u_int32_t futex1 = 0;
+
+/*
+ * How many wakeups to do at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nwakes = 1;
+
+pthread_t *worker;
+static bool done = false, silent = false, fshared = false;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakes",  &nwakes,   "Specify amount of threads to wake at once"),
+	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_wake_usage[] = {
+	"perf bench futex wake <options>",
+	NULL
+};
+
+static void *workerfn(void *arg __maybe_unused)
+{
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	while (1) {
+		if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void print_summary(void)
+{
+	double waketime_avg = avg_stats(&waketime_stats);
+	double waketime_stddev = stddev_stats(&waketime_stats);
+	unsigned int wakeup_avg = avg_stats(&wakeup_stats);
+
+	printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
+	       wakeup_avg,
+	       nthreads,
+	       waketime_avg / USEC_PER_MSEC,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void block_threads(pthread_t *w,
+			  pthread_attr_t thread_attr, struct cpu_map *cpu)
+{
+	cpu_set_t cpuset;
+	unsigned int i;
+
+	threads_starting = nthreads;
+
+	/* create and block all threads */
+	for (i = 0; i < nthreads; i++) {
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	done = true;
+}
+
+int bench_futex_wake(int argc, const char **argv)
+{
+	int ret = 0;
+	unsigned int i, j;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct cpu_map *cpu;
+
+	argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
+	if (argc) {
+		usage_with_options(bench_futex_wake_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	cpu = cpu_map__new(NULL);
+	if (!cpu)
+		err(EXIT_FAILURE, "calloc");
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	if (!nthreads)
+		nthreads = ncpus;
+
+	worker = calloc(nthreads, sizeof(*worker));
+	if (!worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
+	       "waking up %d at a time.\n\n",
+	       getpid(), nthreads, fshared ? "shared":"private",  &futex1, nwakes);
+
+	init_stats(&wakeup_stats);
+	init_stats(&waketime_stats);
+	pthread_attr_init(&thread_attr);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	for (j = 0; j < bench_repeat && !done; j++) {
+		unsigned int nwoken = 0;
+		struct timeval start, end, runtime;
+
+		/* create, launch & block all threads */
+		block_threads(worker, thread_attr, cpu);
+
+		/* make sure all threads are already blocked */
+		pthread_mutex_lock(&thread_lock);
+		while (threads_starting)
+			pthread_cond_wait(&thread_parent, &thread_lock);
+		pthread_cond_broadcast(&thread_worker);
+		pthread_mutex_unlock(&thread_lock);
+
+		usleep(100000);
+
+		/* Ok, all threads are patiently blocked, start waking folks up */
+		gettimeofday(&start, NULL);
+		while (nwoken != nthreads)
+			nwoken += futex_wake(&futex1, nwakes, futex_flag);
+		gettimeofday(&end, NULL);
+		timersub(&end, &start, &runtime);
+
+		update_stats(&wakeup_stats, nwoken);
+		update_stats(&waketime_stats, runtime.tv_usec);
+
+		if (!silent) {
+			printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
+			       j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+		}
+
+		for (i = 0; i < nthreads; i++) {
+			ret = pthread_join(worker[i], NULL);
+			if (ret)
+				err(EXIT_FAILURE, "pthread_join");
+		}
+
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+	pthread_attr_destroy(&thread_attr);
+
+	print_summary();
+
+	free(worker);
+	return ret;
+}
diff --git a/bench/futex.h b/bench/futex.h
new file mode 100644
index 0000000..db4853f
--- /dev/null
+++ b/bench/futex.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Glibc independent futex library for testing kernel functionality.
+ * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com>
+ *    http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/
+ */
+
+#ifndef _FUTEX_H
+#define _FUTEX_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr:	address of first futex
+ * @op:		futex op code
+ * @val:	typically expected value of uaddr, but varies by op
+ * @timeout:	typically an absolute struct timespec (except where noted
+ *		otherwise). Overloaded by some ops
+ * @uaddr2:	address of second futex for some ops\
+ * @val3:	varies by op
+ * @opflags:	flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+	syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout:	relative timeout
+ */
+static inline int
+futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake:	wake up to this many tasks
+ */
+static inline int
+futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ */
+static inline int
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(u_int32_t *uaddr, int opflags)
+{
+	return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+* @nr_wake:        wake up to this many tasks
+* @nr_requeue:        requeue up to this many tasks
+*/
+static inline int
+futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
+		 int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+		 val, opflags);
+}
+
+#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
+#include <pthread.h>
+#include <linux/compiler.h>
+static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused,
+					      size_t cpusetsize __maybe_unused,
+					      cpu_set_t *cpuset __maybe_unused)
+{
+	return 0;
+}
+#endif
+
+#endif /* _FUTEX_H */
diff --git a/bench/mem-functions.c b/bench/mem-functions.c
new file mode 100644
index 0000000..0251dd3
--- /dev/null
+++ b/bench/mem-functions.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mem-memcpy.c
+ *
+ * Simple memcpy() and memset() benchmarks
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+
+#include "debug.h"
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../util/header.h"
+#include "../util/cloexec.h"
+#include "../util/string2.h"
+#include "bench.h"
+#include "mem-memcpy-arch.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <linux/time64.h>
+
+#define K 1024
+
+static const char	*size_str	= "1MB";
+static const char	*function_str	= "all";
+static int		nr_loops	= 1;
+static bool		use_cycles;
+static int		cycles_fd;
+
+static const struct option options[] = {
+	OPT_STRING('s', "size", &size_str, "1MB",
+		    "Specify the size of the memory buffers. "
+		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
+
+	OPT_STRING('f', "function", &function_str, "all",
+		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
+
+	OPT_INTEGER('l', "nr_loops", &nr_loops,
+		    "Specify the number of loops to run. (default: 1)"),
+
+	OPT_BOOLEAN('c', "cycles", &use_cycles,
+		    "Use a cycles event instead of gettimeofday() to measure performance"),
+
+	OPT_END()
+};
+
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct function {
+	const char *name;
+	const char *desc;
+	union {
+		memcpy_t memcpy;
+		memset_t memset;
+	} fn;
+};
+
+static struct perf_event_attr cycle_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES
+};
+
+static int init_cycles(void)
+{
+	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
+
+	if (cycles_fd < 0 && errno == ENOSYS) {
+		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+		return -1;
+	}
+
+	return cycles_fd;
+}
+
+static u64 get_cycles(void)
+{
+	int ret;
+	u64 clk;
+
+	ret = read(cycles_fd, &clk, sizeof(u64));
+	BUG_ON(ret != sizeof(u64));
+
+	return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
+}
+
+#define print_bps(x) do {						\
+		if (x < K)						\
+			printf(" %14lf bytes/sec\n", x);		\
+		else if (x < K * K)					\
+			printf(" %14lfd KB/sec\n", x / K);		\
+		else if (x < K * K * K)					\
+			printf(" %14lf MB/sec\n", x / K / K);		\
+		else							\
+			printf(" %14lf GB/sec\n", x / K / K / K);	\
+	} while (0)
+
+struct bench_mem_info {
+	const struct function *functions;
+	u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
+	double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
+	const char *const *usage;
+	bool alloc_src;
+};
+
+static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
+{
+	const struct function *r = &info->functions[r_idx];
+	double result_bps = 0.0;
+	u64 result_cycles = 0;
+	void *src = NULL, *dst = zalloc(size);
+
+	printf("# function '%s' (%s)\n", r->name, r->desc);
+
+	if (dst == NULL)
+		goto out_alloc_failed;
+
+	if (info->alloc_src) {
+		src = zalloc(size);
+		if (src == NULL)
+			goto out_alloc_failed;
+	}
+
+	if (bench_format == BENCH_FORMAT_DEFAULT)
+		printf("# Copying %s bytes ...\n\n", size_str);
+
+	if (use_cycles) {
+		result_cycles = info->do_cycles(r, size, src, dst);
+	} else {
+		result_bps = info->do_gettimeofday(r, size, src, dst);
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		if (use_cycles) {
+			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
+		} else {
+			print_bps(result_bps);
+		}
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		if (use_cycles) {
+			printf("%lf\n", (double)result_cycles/size_total);
+		} else {
+			printf("%lf\n", result_bps);
+		}
+		break;
+
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+out_free:
+	free(src);
+	free(dst);
+	return;
+out_alloc_failed:
+	printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+	goto out_free;
+}
+
+static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
+{
+	int i;
+	size_t size;
+	double size_total;
+
+	argc = parse_options(argc, argv, options, info->usage, 0);
+
+	if (use_cycles) {
+		i = init_cycles();
+		if (i < 0) {
+			fprintf(stderr, "Failed to open cycles counter\n");
+			return i;
+		}
+	}
+
+	size = (size_t)perf_atoll((char *)size_str);
+	size_total = (double)size * nr_loops;
+
+	if ((s64)size <= 0) {
+		fprintf(stderr, "Invalid size:%s\n", size_str);
+		return 1;
+	}
+
+	if (!strncmp(function_str, "all", 3)) {
+		for (i = 0; info->functions[i].name; i++)
+			__bench_mem_function(info, i, size, size_total);
+		return 0;
+	}
+
+	for (i = 0; info->functions[i].name; i++) {
+		if (!strcmp(info->functions[i].name, function_str))
+			break;
+	}
+	if (!info->functions[i].name) {
+		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
+			printf("Unknown function: %s\n", function_str);
+		printf("Available functions:\n");
+		for (i = 0; info->functions[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       info->functions[i].name, info->functions[i].desc);
+		}
+		return 1;
+	}
+
+	__bench_mem_function(info, i, size, size_total);
+
+	return 0;
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
+	memset(src, 0, size);
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, src, size);
+
+	cycle_start = get_cycles();
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, src, size);
+	cycle_end = get_cycles();
+
+	return cycle_end - cycle_start;
+}
+
+static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, src, size);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, src, size);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+}
+
+struct function memcpy_functions[] = {
+	{ .name		= "default",
+	  .desc		= "Default memcpy() provided by glibc",
+	  .fn.memcpy	= memcpy },
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
+# include "mem-memcpy-x86-64-asm-def.h"
+# undef MEMCPY_FN
+#endif
+
+	{ .name = NULL, }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+	"perf bench mem memcpy <options>",
+	NULL
+};
+
+int bench_mem_memcpy(int argc, const char **argv)
+{
+	struct bench_mem_info info = {
+		.functions		= memcpy_functions,
+		.do_cycles		= do_memcpy_cycles,
+		.do_gettimeofday	= do_memcpy_gettimeofday,
+		.usage			= bench_mem_memcpy_usage,
+		.alloc_src              = true,
+	};
+
+	return bench_mem_common(argc, argv, &info);
+}
+
+static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memset_t fn = r->fn.memset;
+	int i;
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, -1, size);
+
+	cycle_start = get_cycles();
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, i, size);
+	cycle_end = get_cycles();
+
+	return cycle_end - cycle_start;
+}
+
+static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memset_t fn = r->fn.memset;
+	int i;
+
+	/*
+	 * We prefault the freshly allocated memory range here,
+	 * to not measure page fault overhead:
+	 */
+	fn(dst, -1, size);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < nr_loops; ++i)
+		fn(dst, i, size);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+}
+
+static const char * const bench_mem_memset_usage[] = {
+	"perf bench mem memset <options>",
+	NULL
+};
+
+static const struct function memset_functions[] = {
+	{ .name		= "default",
+	  .desc		= "Default memset() provided by glibc",
+	  .fn.memset	= memset },
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+# include "mem-memset-x86-64-asm-def.h"
+# undef MEMSET_FN
+#endif
+
+	{ .name = NULL, }
+};
+
+int bench_mem_memset(int argc, const char **argv)
+{
+	struct bench_mem_info info = {
+		.functions		= memset_functions,
+		.do_cycles		= do_memset_cycles,
+		.do_gettimeofday	= do_memset_gettimeofday,
+		.usage			= bench_mem_memset_usage,
+	};
+
+	return bench_mem_common(argc, argv, &info);
+}
diff --git a/bench/mem-memcpy-arch.h b/bench/mem-memcpy-arch.h
new file mode 100644
index 0000000..5bcaec5
--- /dev/null
+++ b/bench/mem-memcpy-arch.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+#define MEMCPY_FN(fn, name, desc)		\
+	void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/bench/mem-memcpy-x86-64-asm-def.h b/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 0000000..50ae8bd
--- /dev/null
+++ b/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+MEMCPY_FN(memcpy_orig,
+	"x86-64-unrolled",
+	"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(__memcpy,
+	"x86-64-movsq",
+	"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_erms,
+	"x86-64-movsb",
+	"movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/bench/mem-memcpy-x86-64-asm.S b/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 0000000..b43f8d2
--- /dev/null
+++ b/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Various wrappers to make the kernel .S file build in user-space: */
+
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define _ASM_EXTABLE_FAULT(x, y)
+
+#include "../../arch/x86/lib/memcpy_64.S"
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/bench/mem-memset-arch.h b/bench/mem-memset-arch.h
new file mode 100644
index 0000000..53f4548
--- /dev/null
+++ b/bench/mem-memset-arch.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+#define MEMSET_FN(fn, name, desc)		\
+	void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/bench/mem-memset-x86-64-asm-def.h b/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 0000000..dac6d2b
--- /dev/null
+++ b/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+MEMSET_FN(memset_orig,
+	"x86-64-unrolled",
+	"unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(__memset,
+	"x86-64-stosq",
+	"movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_erms,
+	"x86-64-stosb",
+	"movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/bench/mem-memset-x86-64-asm.S b/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 0000000..d550bd5
--- /dev/null
+++ b/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#include "../../arch/x86/lib/memset_64.S"
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/bench/numa.c b/bench/numa.c
new file mode 100644
index 0000000..63eb490
--- /dev/null
+++ b/bench/numa.c
@@ -0,0 +1,1837 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * numa.c
+ *
+ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
+ */
+
+#include <inttypes.h>
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include "../perf.h"
+#include "../builtin.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../util/cloexec.h"
+
+#include "bench.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+#include <numa.h>
+#include <numaif.h>
+
+/*
+ * Regular printout to the terminal, supressed if -q is specified:
+ */
+#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
+
+/*
+ * Debug printf:
+ */
+#undef dprintf
+#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
+
+struct thread_data {
+	int			curr_cpu;
+	cpu_set_t		bind_cpumask;
+	int			bind_node;
+	u8			*process_data;
+	int			process_nr;
+	int			thread_nr;
+	int			task_nr;
+	unsigned int		loops_done;
+	u64			val;
+	u64			runtime_ns;
+	u64			system_time_ns;
+	u64			user_time_ns;
+	double			speed_gbs;
+	pthread_mutex_t		*process_lock;
+};
+
+/* Parameters set by options: */
+
+struct params {
+	/* Startup synchronization: */
+	bool			serialize_startup;
+
+	/* Task hierarchy: */
+	int			nr_proc;
+	int			nr_threads;
+
+	/* Working set sizes: */
+	const char		*mb_global_str;
+	const char		*mb_proc_str;
+	const char		*mb_proc_locked_str;
+	const char		*mb_thread_str;
+
+	double			mb_global;
+	double			mb_proc;
+	double			mb_proc_locked;
+	double			mb_thread;
+
+	/* Access patterns to the working set: */
+	bool			data_reads;
+	bool			data_writes;
+	bool			data_backwards;
+	bool			data_zero_memset;
+	bool			data_rand_walk;
+	u32			nr_loops;
+	u32			nr_secs;
+	u32			sleep_usecs;
+
+	/* Working set initialization: */
+	bool			init_zero;
+	bool			init_random;
+	bool			init_cpu0;
+
+	/* Misc options: */
+	int			show_details;
+	int			run_all;
+	int			thp;
+
+	long			bytes_global;
+	long			bytes_process;
+	long			bytes_process_locked;
+	long			bytes_thread;
+
+	int			nr_tasks;
+	bool			show_quiet;
+
+	bool			show_convergence;
+	bool			measure_convergence;
+
+	int			perturb_secs;
+	int			nr_cpus;
+	int			nr_nodes;
+
+	/* Affinity options -C and -N: */
+	char			*cpu_list_str;
+	char			*node_list_str;
+};
+
+
+/* Global, read-writable area, accessible to all processes and threads: */
+
+struct global_info {
+	u8			*data;
+
+	pthread_mutex_t		startup_mutex;
+	int			nr_tasks_started;
+
+	pthread_mutex_t		startup_done_mutex;
+
+	pthread_mutex_t		start_work_mutex;
+	int			nr_tasks_working;
+
+	pthread_mutex_t		stop_work_mutex;
+	u64			bytes_done;
+
+	struct thread_data	*threads;
+
+	/* Convergence latency measurement: */
+	bool			all_converged;
+	bool			stop_work;
+
+	int			print_once;
+
+	struct params		p;
+};
+
+static struct global_info	*g = NULL;
+
+static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
+static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
+
+struct params p0;
+
+static const struct option options[] = {
+	OPT_INTEGER('p', "nr_proc"	, &p0.nr_proc,		"number of processes"),
+	OPT_INTEGER('t', "nr_threads"	, &p0.nr_threads,	"number of threads per process"),
+
+	OPT_STRING('G', "mb_global"	, &p0.mb_global_str,	"MB", "global  memory (MBs)"),
+	OPT_STRING('P', "mb_proc"	, &p0.mb_proc_str,	"MB", "process memory (MBs)"),
+	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
+	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),
+
+	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run (default: unlimited)"),
+	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run (default: 5 secs)"),
+	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),
+
+	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via reads (can be mixed with -W)"),
+	OPT_BOOLEAN('W', "data_writes"	, &p0.data_writes,	"access the data via writes (can be mixed with -R)"),
+	OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,	"access the data backwards as well"),
+	OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
+	OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk,	"access the data with random (32bit LFSR) walk"),
+
+
+	OPT_BOOLEAN('z', "init_zero"	, &p0.init_zero,	"bzero the initial allocations"),
+	OPT_BOOLEAN('I', "init_random"	, &p0.init_random,	"randomize the contents of the initial allocations"),
+	OPT_BOOLEAN('0', "init_cpu0"	, &p0.init_cpu0,	"do the initial allocations on CPU#0"),
+	OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs,	"perturb thread 0/0 every X secs, to test convergence stability"),
+
+	OPT_INCR   ('d', "show_details"	, &p0.show_details,	"Show details"),
+	OPT_INCR   ('a', "all"		, &p0.run_all,		"Run all tests in the suite"),
+	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
+	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
+		    "convergence is reached when each process (all its threads) is running on a single NUMA node."),
+	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
+	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"quiet mode"),
+	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
+
+	/* Special option string parsing callbacks: */
+        OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
+			"bind the first N tasks to these specific cpus (the rest is unbound)",
+			parse_cpus_opt),
+        OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
+			"bind the first N tasks to these specific memory nodes (the rest is unbound)",
+			parse_nodes_opt),
+	OPT_END()
+};
+
+static const char * const bench_numa_usage[] = {
+	"perf bench numa <options>",
+	NULL
+};
+
+static const char * const numa_usage[] = {
+	"perf bench numa mem [<options>]",
+	NULL
+};
+
+/*
+ * To get number of numa nodes present.
+ */
+static int nr_numa_nodes(void)
+{
+	int i, nr_nodes = 0;
+
+	for (i = 0; i < g->p.nr_nodes; i++) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			nr_nodes++;
+	}
+
+	return nr_nodes;
+}
+
+/*
+ * To check if given numa node is present.
+ */
+static int is_node_present(int node)
+{
+	return numa_bitmask_isbitset(numa_nodes_ptr, node);
+}
+
+/*
+ * To check given numa node has cpus.
+ */
+static bool node_has_cpus(int node)
+{
+	struct bitmask *cpu = numa_allocate_cpumask();
+	unsigned int i;
+
+	if (cpu && !numa_node_to_cpus(node, cpu)) {
+		for (i = 0; i < cpu->size; i++) {
+			if (numa_bitmask_isbitset(cpu, i))
+				return true;
+		}
+	}
+
+	return false; /* lets fall back to nocpus safely */
+}
+
+static cpu_set_t bind_to_cpu(int target_cpu)
+{
+	cpu_set_t orig_mask, mask;
+	int ret;
+
+	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+	BUG_ON(ret);
+
+	CPU_ZERO(&mask);
+
+	if (target_cpu == -1) {
+		int cpu;
+
+		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+			CPU_SET(cpu, &mask);
+	} else {
+		BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
+		CPU_SET(target_cpu, &mask);
+	}
+
+	ret = sched_setaffinity(0, sizeof(mask), &mask);
+	BUG_ON(ret);
+
+	return orig_mask;
+}
+
+static cpu_set_t bind_to_node(int target_node)
+{
+	int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
+	cpu_set_t orig_mask, mask;
+	int cpu;
+	int ret;
+
+	BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
+	BUG_ON(!cpus_per_node);
+
+	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+	BUG_ON(ret);
+
+	CPU_ZERO(&mask);
+
+	if (target_node == -1) {
+		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+			CPU_SET(cpu, &mask);
+	} else {
+		int cpu_start = (target_node + 0) * cpus_per_node;
+		int cpu_stop  = (target_node + 1) * cpus_per_node;
+
+		BUG_ON(cpu_stop > g->p.nr_cpus);
+
+		for (cpu = cpu_start; cpu < cpu_stop; cpu++)
+			CPU_SET(cpu, &mask);
+	}
+
+	ret = sched_setaffinity(0, sizeof(mask), &mask);
+	BUG_ON(ret);
+
+	return orig_mask;
+}
+
+static void bind_to_cpumask(cpu_set_t mask)
+{
+	int ret;
+
+	ret = sched_setaffinity(0, sizeof(mask), &mask);
+	BUG_ON(ret);
+}
+
+static void mempol_restore(void)
+{
+	int ret;
+
+	ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
+
+	BUG_ON(ret);
+}
+
+static void bind_to_memnode(int node)
+{
+	unsigned long nodemask;
+	int ret;
+
+	if (node == -1)
+		return;
+
+	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
+	nodemask = 1L << node;
+
+	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
+	dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+
+	BUG_ON(ret);
+}
+
+#define HPSIZE (2*1024*1024)
+
+#define set_taskname(fmt...)				\
+do {							\
+	char name[20];					\
+							\
+	snprintf(name, 20, fmt);			\
+	prctl(PR_SET_NAME, name);			\
+} while (0)
+
+static u8 *alloc_data(ssize_t bytes0, int map_flags,
+		      int init_zero, int init_cpu0, int thp, int init_random)
+{
+	cpu_set_t orig_mask;
+	ssize_t bytes;
+	u8 *buf;
+	int ret;
+
+	if (!bytes0)
+		return NULL;
+
+	/* Allocate and initialize all memory on CPU#0: */
+	if (init_cpu0) {
+		orig_mask = bind_to_node(0);
+		bind_to_memnode(0);
+	}
+
+	bytes = bytes0 + HPSIZE;
+
+	buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
+	BUG_ON(buf == (void *)-1);
+
+	if (map_flags == MAP_PRIVATE) {
+		if (thp > 0) {
+			ret = madvise(buf, bytes, MADV_HUGEPAGE);
+			if (ret && !g->print_once) {
+				g->print_once = 1;
+				printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
+			}
+		}
+		if (thp < 0) {
+			ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
+			if (ret && !g->print_once) {
+				g->print_once = 1;
+				printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
+			}
+		}
+	}
+
+	if (init_zero) {
+		bzero(buf, bytes);
+	} else {
+		/* Initialize random contents, different in each word: */
+		if (init_random) {
+			u64 *wbuf = (void *)buf;
+			long off = rand();
+			long i;
+
+			for (i = 0; i < bytes/8; i++)
+				wbuf[i] = i + off;
+		}
+	}
+
+	/* Align to 2MB boundary: */
+	buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
+
+	/* Restore affinity: */
+	if (init_cpu0) {
+		bind_to_cpumask(orig_mask);
+		mempol_restore();
+	}
+
+	return buf;
+}
+
+static void free_data(void *data, ssize_t bytes)
+{
+	int ret;
+
+	if (!data)
+		return;
+
+	ret = munmap(data, bytes);
+	BUG_ON(ret);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes, zeroed:
+ */
+static void * zalloc_shared_data(ssize_t bytes)
+{
+	return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0,  g->p.thp, g->p.init_random);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes:
+ */
+static void * setup_shared_data(ssize_t bytes)
+{
+	return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
+}
+
+/*
+ * Allocate process-local memory - this will either be shared between
+ * threads of this process, or only be accessed by this thread:
+ */
+static void * setup_private_data(ssize_t bytes)
+{
+	return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
+}
+
+/*
+ * Return a process-shared (global) mutex:
+ */
+static void init_global_mutex(pthread_mutex_t *mutex)
+{
+	pthread_mutexattr_t attr;
+
+	pthread_mutexattr_init(&attr);
+	pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+	pthread_mutex_init(mutex, &attr);
+}
+
+static int parse_cpu_list(const char *arg)
+{
+	p0.cpu_list_str = strdup(arg);
+
+	dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
+
+	return 0;
+}
+
+static int parse_setup_cpu_list(void)
+{
+	struct thread_data *td;
+	char *str0, *str;
+	int t;
+
+	if (!g->p.cpu_list_str)
+		return 0;
+
+	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+	str0 = str = strdup(g->p.cpu_list_str);
+	t = 0;
+
+	BUG_ON(!str);
+
+	tprintf("# binding tasks to CPUs:\n");
+	tprintf("#  ");
+
+	while (true) {
+		int bind_cpu, bind_cpu_0, bind_cpu_1;
+		char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
+		int bind_len;
+		int step;
+		int mul;
+
+		tok = strsep(&str, ",");
+		if (!tok)
+			break;
+
+		tok_end = strstr(tok, "-");
+
+		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+		if (!tok_end) {
+			/* Single CPU specified: */
+			bind_cpu_0 = bind_cpu_1 = atol(tok);
+		} else {
+			/* CPU range specified (for example: "5-11"): */
+			bind_cpu_0 = atol(tok);
+			bind_cpu_1 = atol(tok_end + 1);
+		}
+
+		step = 1;
+		tok_step = strstr(tok, "#");
+		if (tok_step) {
+			step = atol(tok_step + 1);
+			BUG_ON(step <= 0 || step >= g->p.nr_cpus);
+		}
+
+		/*
+		 * Mask length.
+		 * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
+		 * where the _4 means the next 4 CPUs are allowed.
+		 */
+		bind_len = 1;
+		tok_len = strstr(tok, "_");
+		if (tok_len) {
+			bind_len = atol(tok_len + 1);
+			BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
+		}
+
+		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+		mul = 1;
+		tok_mul = strstr(tok, "x");
+		if (tok_mul) {
+			mul = atol(tok_mul + 1);
+			BUG_ON(mul <= 0);
+		}
+
+		dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
+
+		if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
+			printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
+			return -1;
+		}
+
+		BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
+		BUG_ON(bind_cpu_0 > bind_cpu_1);
+
+		for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+			int i;
+
+			for (i = 0; i < mul; i++) {
+				int cpu;
+
+				if (t >= g->p.nr_tasks) {
+					printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
+					goto out;
+				}
+				td = g->threads + t;
+
+				if (t)
+					tprintf(",");
+				if (bind_len > 1) {
+					tprintf("%2d/%d", bind_cpu, bind_len);
+				} else {
+					tprintf("%2d", bind_cpu);
+				}
+
+				CPU_ZERO(&td->bind_cpumask);
+				for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
+					BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
+					CPU_SET(cpu, &td->bind_cpumask);
+				}
+				t++;
+			}
+		}
+	}
+out:
+
+	tprintf("\n");
+
+	if (t < g->p.nr_tasks)
+		printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+	free(str0);
+	return 0;
+}
+
+static int parse_cpus_opt(const struct option *opt __maybe_unused,
+			  const char *arg, int unset __maybe_unused)
+{
+	if (!arg)
+		return -1;
+
+	return parse_cpu_list(arg);
+}
+
+static int parse_node_list(const char *arg)
+{
+	p0.node_list_str = strdup(arg);
+
+	dprintf("got NODE list: {%s}\n", p0.node_list_str);
+
+	return 0;
+}
+
+static int parse_setup_node_list(void)
+{
+	struct thread_data *td;
+	char *str0, *str;
+	int t;
+
+	if (!g->p.node_list_str)
+		return 0;
+
+	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+	str0 = str = strdup(g->p.node_list_str);
+	t = 0;
+
+	BUG_ON(!str);
+
+	tprintf("# binding tasks to NODEs:\n");
+	tprintf("# ");
+
+	while (true) {
+		int bind_node, bind_node_0, bind_node_1;
+		char *tok, *tok_end, *tok_step, *tok_mul;
+		int step;
+		int mul;
+
+		tok = strsep(&str, ",");
+		if (!tok)
+			break;
+
+		tok_end = strstr(tok, "-");
+
+		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+		if (!tok_end) {
+			/* Single NODE specified: */
+			bind_node_0 = bind_node_1 = atol(tok);
+		} else {
+			/* NODE range specified (for example: "5-11"): */
+			bind_node_0 = atol(tok);
+			bind_node_1 = atol(tok_end + 1);
+		}
+
+		step = 1;
+		tok_step = strstr(tok, "#");
+		if (tok_step) {
+			step = atol(tok_step + 1);
+			BUG_ON(step <= 0 || step >= g->p.nr_nodes);
+		}
+
+		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+		mul = 1;
+		tok_mul = strstr(tok, "x");
+		if (tok_mul) {
+			mul = atol(tok_mul + 1);
+			BUG_ON(mul <= 0);
+		}
+
+		dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
+
+		if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
+			printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
+			return -1;
+		}
+
+		BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
+		BUG_ON(bind_node_0 > bind_node_1);
+
+		for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
+			int i;
+
+			for (i = 0; i < mul; i++) {
+				if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
+					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
+					goto out;
+				}
+				td = g->threads + t;
+
+				if (!t)
+					tprintf(" %2d", bind_node);
+				else
+					tprintf(",%2d", bind_node);
+
+				td->bind_node = bind_node;
+				t++;
+			}
+		}
+	}
+out:
+
+	tprintf("\n");
+
+	if (t < g->p.nr_tasks)
+		printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+	free(str0);
+	return 0;
+}
+
+static int parse_nodes_opt(const struct option *opt __maybe_unused,
+			  const char *arg, int unset __maybe_unused)
+{
+	if (!arg)
+		return -1;
+
+	return parse_node_list(arg);
+
+	return 0;
+}
+
+#define BIT(x) (1ul << x)
+
+static inline uint32_t lfsr_32(uint32_t lfsr)
+{
+	const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
+	return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
+}
+
+/*
+ * Make sure there's real data dependency to RAM (when read
+ * accesses are enabled), so the compiler, the CPU and the
+ * kernel (KSM, zero page, etc.) cannot optimize away RAM
+ * accesses:
+ */
+static inline u64 access_data(u64 *data, u64 val)
+{
+	if (g->p.data_reads)
+		val += *data;
+	if (g->p.data_writes)
+		*data = val + 1;
+	return val;
+}
+
+/*
+ * The worker process does two types of work, a forwards going
+ * loop and a backwards going loop.
+ *
+ * We do this so that on multiprocessor systems we do not create
+ * a 'train' of processing, with highly synchronized processes,
+ * skewing the whole benchmark.
+ */
+static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
+{
+	long words = bytes/sizeof(u64);
+	u64 *data = (void *)__data;
+	long chunk_0, chunk_1;
+	u64 *d0, *d, *d1;
+	long off;
+	long i;
+
+	BUG_ON(!data && words);
+	BUG_ON(data && !words);
+
+	if (!data)
+		return val;
+
+	/* Very simple memset() work variant: */
+	if (g->p.data_zero_memset && !g->p.data_rand_walk) {
+		bzero(data, bytes);
+		return val;
+	}
+
+	/* Spread out by PID/TID nr and by loop nr: */
+	chunk_0 = words/nr_max;
+	chunk_1 = words/g->p.nr_loops;
+	off = nr*chunk_0 + loop*chunk_1;
+
+	while (off >= words)
+		off -= words;
+
+	if (g->p.data_rand_walk) {
+		u32 lfsr = nr + loop + val;
+		int j;
+
+		for (i = 0; i < words/1024; i++) {
+			long start, end;
+
+			lfsr = lfsr_32(lfsr);
+
+			start = lfsr % words;
+			end = min(start + 1024, words-1);
+
+			if (g->p.data_zero_memset) {
+				bzero(data + start, (end-start) * sizeof(u64));
+			} else {
+				for (j = start; j < end; j++)
+					val = access_data(data + j, val);
+			}
+		}
+	} else if (!g->p.data_backwards || (nr + loop) & 1) {
+
+		d0 = data + off;
+		d  = data + off + 1;
+		d1 = data + words;
+
+		/* Process data forwards: */
+		for (;;) {
+			if (unlikely(d >= d1))
+				d = data;
+			if (unlikely(d == d0))
+				break;
+
+			val = access_data(d, val);
+
+			d++;
+		}
+	} else {
+		/* Process data backwards: */
+
+		d0 = data + off;
+		d  = data + off - 1;
+		d1 = data + words;
+
+		/* Process data forwards: */
+		for (;;) {
+			if (unlikely(d < data))
+				d = data + words-1;
+			if (unlikely(d == d0))
+				break;
+
+			val = access_data(d, val);
+
+			d--;
+		}
+	}
+
+	return val;
+}
+
+static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
+{
+	unsigned int cpu;
+
+	cpu = sched_getcpu();
+
+	g->threads[task_nr].curr_cpu = cpu;
+	prctl(0, bytes_worked);
+}
+
+#define MAX_NR_NODES	64
+
+/*
+ * Count the number of nodes a process's threads
+ * are spread out on.
+ *
+ * A count of 1 means that the process is compressed
+ * to a single node. A count of g->p.nr_nodes means it's
+ * spread out on the whole system.
+ */
+static int count_process_nodes(int process_nr)
+{
+	char node_present[MAX_NR_NODES] = { 0, };
+	int nodes;
+	int n, t;
+
+	for (t = 0; t < g->p.nr_threads; t++) {
+		struct thread_data *td;
+		int task_nr;
+		int node;
+
+		task_nr = process_nr*g->p.nr_threads + t;
+		td = g->threads + task_nr;
+
+		node = numa_node_of_cpu(td->curr_cpu);
+		if (node < 0) /* curr_cpu was likely still -1 */
+			return 0;
+
+		node_present[node] = 1;
+	}
+
+	nodes = 0;
+
+	for (n = 0; n < MAX_NR_NODES; n++)
+		nodes += node_present[n];
+
+	return nodes;
+}
+
+/*
+ * Count the number of distinct process-threads a node contains.
+ *
+ * A count of 1 means that the node contains only a single
+ * process. If all nodes on the system contain at most one
+ * process then we are well-converged.
+ */
+static int count_node_processes(int node)
+{
+	int processes = 0;
+	int t, p;
+
+	for (p = 0; p < g->p.nr_proc; p++) {
+		for (t = 0; t < g->p.nr_threads; t++) {
+			struct thread_data *td;
+			int task_nr;
+			int n;
+
+			task_nr = p*g->p.nr_threads + t;
+			td = g->threads + task_nr;
+
+			n = numa_node_of_cpu(td->curr_cpu);
+			if (n == node) {
+				processes++;
+				break;
+			}
+		}
+	}
+
+	return processes;
+}
+
+static void calc_convergence_compression(int *strong)
+{
+	unsigned int nodes_min, nodes_max;
+	int p;
+
+	nodes_min = -1;
+	nodes_max =  0;
+
+	for (p = 0; p < g->p.nr_proc; p++) {
+		unsigned int nodes = count_process_nodes(p);
+
+		if (!nodes) {
+			*strong = 0;
+			return;
+		}
+
+		nodes_min = min(nodes, nodes_min);
+		nodes_max = max(nodes, nodes_max);
+	}
+
+	/* Strong convergence: all threads compress on a single node: */
+	if (nodes_min == 1 && nodes_max == 1) {
+		*strong = 1;
+	} else {
+		*strong = 0;
+		tprintf(" {%d-%d}", nodes_min, nodes_max);
+	}
+}
+
+static void calc_convergence(double runtime_ns_max, double *convergence)
+{
+	unsigned int loops_done_min, loops_done_max;
+	int process_groups;
+	int nodes[MAX_NR_NODES];
+	int distance;
+	int nr_min;
+	int nr_max;
+	int strong;
+	int sum;
+	int nr;
+	int node;
+	int cpu;
+	int t;
+
+	if (!g->p.show_convergence && !g->p.measure_convergence)
+		return;
+
+	for (node = 0; node < g->p.nr_nodes; node++)
+		nodes[node] = 0;
+
+	loops_done_min = -1;
+	loops_done_max = 0;
+
+	for (t = 0; t < g->p.nr_tasks; t++) {
+		struct thread_data *td = g->threads + t;
+		unsigned int loops_done;
+
+		cpu = td->curr_cpu;
+
+		/* Not all threads have written it yet: */
+		if (cpu < 0)
+			continue;
+
+		node = numa_node_of_cpu(cpu);
+
+		nodes[node]++;
+
+		loops_done = td->loops_done;
+		loops_done_min = min(loops_done, loops_done_min);
+		loops_done_max = max(loops_done, loops_done_max);
+	}
+
+	nr_max = 0;
+	nr_min = g->p.nr_tasks;
+	sum = 0;
+
+	for (node = 0; node < g->p.nr_nodes; node++) {
+		if (!is_node_present(node))
+			continue;
+		nr = nodes[node];
+		nr_min = min(nr, nr_min);
+		nr_max = max(nr, nr_max);
+		sum += nr;
+	}
+	BUG_ON(nr_min > nr_max);
+
+	BUG_ON(sum > g->p.nr_tasks);
+
+	if (0 && (sum < g->p.nr_tasks))
+		return;
+
+	/*
+	 * Count the number of distinct process groups present
+	 * on nodes - when we are converged this will decrease
+	 * to g->p.nr_proc:
+	 */
+	process_groups = 0;
+
+	for (node = 0; node < g->p.nr_nodes; node++) {
+		int processes;
+
+		if (!is_node_present(node))
+			continue;
+		processes = count_node_processes(node);
+		nr = nodes[node];
+		tprintf(" %2d/%-2d", nr, processes);
+
+		process_groups += processes;
+	}
+
+	distance = nr_max - nr_min;
+
+	tprintf(" [%2d/%-2d]", distance, process_groups);
+
+	tprintf(" l:%3d-%-3d (%3d)",
+		loops_done_min, loops_done_max, loops_done_max-loops_done_min);
+
+	if (loops_done_min && loops_done_max) {
+		double skew = 1.0 - (double)loops_done_min/loops_done_max;
+
+		tprintf(" [%4.1f%%]", skew * 100.0);
+	}
+
+	calc_convergence_compression(&strong);
+
+	if (strong && process_groups == g->p.nr_proc) {
+		if (!*convergence) {
+			*convergence = runtime_ns_max;
+			tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
+			if (g->p.measure_convergence) {
+				g->all_converged = true;
+				g->stop_work = true;
+			}
+		}
+	} else {
+		if (*convergence) {
+			tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
+			*convergence = 0;
+		}
+		tprintf("\n");
+	}
+}
+
+static void show_summary(double runtime_ns_max, int l, double *convergence)
+{
+	tprintf("\r #  %5.1f%%  [%.1f mins]",
+		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
+
+	calc_convergence(runtime_ns_max, convergence);
+
+	if (g->p.show_details >= 0)
+		fflush(stdout);
+}
+
+static void *worker_thread(void *__tdata)
+{
+	struct thread_data *td = __tdata;
+	struct timeval start0, start, stop, diff;
+	int process_nr = td->process_nr;
+	int thread_nr = td->thread_nr;
+	unsigned long last_perturbance;
+	int task_nr = td->task_nr;
+	int details = g->p.show_details;
+	int first_task, last_task;
+	double convergence = 0;
+	u64 val = td->val;
+	double runtime_ns_max;
+	u8 *global_data;
+	u8 *process_data;
+	u8 *thread_data;
+	u64 bytes_done;
+	long work_done;
+	u32 l;
+	struct rusage rusage;
+
+	bind_to_cpumask(td->bind_cpumask);
+	bind_to_memnode(td->bind_node);
+
+	set_taskname("thread %d/%d", process_nr, thread_nr);
+
+	global_data = g->data;
+	process_data = td->process_data;
+	thread_data = setup_private_data(g->p.bytes_thread);
+
+	bytes_done = 0;
+
+	last_task = 0;
+	if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
+		last_task = 1;
+
+	first_task = 0;
+	if (process_nr == 0 && thread_nr == 0)
+		first_task = 1;
+
+	if (details >= 2) {
+		printf("#  thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
+			process_nr, thread_nr, global_data, process_data, thread_data);
+	}
+
+	if (g->p.serialize_startup) {
+		pthread_mutex_lock(&g->startup_mutex);
+		g->nr_tasks_started++;
+		pthread_mutex_unlock(&g->startup_mutex);
+
+		/* Here we will wait for the main process to start us all at once: */
+		pthread_mutex_lock(&g->start_work_mutex);
+		g->nr_tasks_working++;
+
+		/* Last one wake the main process: */
+		if (g->nr_tasks_working == g->p.nr_tasks)
+			pthread_mutex_unlock(&g->startup_done_mutex);
+
+		pthread_mutex_unlock(&g->start_work_mutex);
+	}
+
+	gettimeofday(&start0, NULL);
+
+	start = stop = start0;
+	last_perturbance = start.tv_sec;
+
+	for (l = 0; l < g->p.nr_loops; l++) {
+		start = stop;
+
+		if (g->stop_work)
+			break;
+
+		val += do_work(global_data,  g->p.bytes_global,  process_nr, g->p.nr_proc,	l, val);
+		val += do_work(process_data, g->p.bytes_process, thread_nr,  g->p.nr_threads,	l, val);
+		val += do_work(thread_data,  g->p.bytes_thread,  0,          1,		l, val);
+
+		if (g->p.sleep_usecs) {
+			pthread_mutex_lock(td->process_lock);
+			usleep(g->p.sleep_usecs);
+			pthread_mutex_unlock(td->process_lock);
+		}
+		/*
+		 * Amount of work to be done under a process-global lock:
+		 */
+		if (g->p.bytes_process_locked) {
+			pthread_mutex_lock(td->process_lock);
+			val += do_work(process_data, g->p.bytes_process_locked, thread_nr,  g->p.nr_threads,	l, val);
+			pthread_mutex_unlock(td->process_lock);
+		}
+
+		work_done = g->p.bytes_global + g->p.bytes_process +
+			    g->p.bytes_process_locked + g->p.bytes_thread;
+
+		update_curr_cpu(task_nr, work_done);
+		bytes_done += work_done;
+
+		if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
+			continue;
+
+		td->loops_done = l;
+
+		gettimeofday(&stop, NULL);
+
+		/* Check whether our max runtime timed out: */
+		if (g->p.nr_secs) {
+			timersub(&stop, &start0, &diff);
+			if ((u32)diff.tv_sec >= g->p.nr_secs) {
+				g->stop_work = true;
+				break;
+			}
+		}
+
+		/* Update the summary at most once per second: */
+		if (start.tv_sec == stop.tv_sec)
+			continue;
+
+		/*
+		 * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
+		 * by migrating to CPU#0:
+		 */
+		if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
+			cpu_set_t orig_mask;
+			int target_cpu;
+			int this_cpu;
+
+			last_perturbance = stop.tv_sec;
+
+			/*
+			 * Depending on where we are running, move into
+			 * the other half of the system, to create some
+			 * real disturbance:
+			 */
+			this_cpu = g->threads[task_nr].curr_cpu;
+			if (this_cpu < g->p.nr_cpus/2)
+				target_cpu = g->p.nr_cpus-1;
+			else
+				target_cpu = 0;
+
+			orig_mask = bind_to_cpu(target_cpu);
+
+			/* Here we are running on the target CPU already */
+			if (details >= 1)
+				printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
+
+			bind_to_cpumask(orig_mask);
+		}
+
+		if (details >= 3) {
+			timersub(&stop, &start, &diff);
+			runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+			runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
+
+			if (details >= 0) {
+				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
+					process_nr, thread_nr, runtime_ns_max / bytes_done, val);
+			}
+			fflush(stdout);
+		}
+		if (!last_task)
+			continue;
+
+		timersub(&stop, &start0, &diff);
+		runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+		runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
+
+		show_summary(runtime_ns_max, l, &convergence);
+	}
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start0, &diff);
+	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
+	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
+	td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
+
+	getrusage(RUSAGE_THREAD, &rusage);
+	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
+	td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+	td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
+	td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
+
+	free_data(thread_data, g->p.bytes_thread);
+
+	pthread_mutex_lock(&g->stop_work_mutex);
+	g->bytes_done += bytes_done;
+	pthread_mutex_unlock(&g->stop_work_mutex);
+
+	return NULL;
+}
+
+/*
+ * A worker process starts a couple of threads:
+ */
+static void worker_process(int process_nr)
+{
+	pthread_mutex_t process_lock;
+	struct thread_data *td;
+	pthread_t *pthreads;
+	u8 *process_data;
+	int task_nr;
+	int ret;
+	int t;
+
+	pthread_mutex_init(&process_lock, NULL);
+	set_taskname("process %d", process_nr);
+
+	/*
+	 * Pick up the memory policy and the CPU binding of our first thread,
+	 * so that we initialize memory accordingly:
+	 */
+	task_nr = process_nr*g->p.nr_threads;
+	td = g->threads + task_nr;
+
+	bind_to_memnode(td->bind_node);
+	bind_to_cpumask(td->bind_cpumask);
+
+	pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
+	process_data = setup_private_data(g->p.bytes_process);
+
+	if (g->p.show_details >= 3) {
+		printf(" # process %2d global mem: %p, process mem: %p\n",
+			process_nr, g->data, process_data);
+	}
+
+	for (t = 0; t < g->p.nr_threads; t++) {
+		task_nr = process_nr*g->p.nr_threads + t;
+		td = g->threads + task_nr;
+
+		td->process_data = process_data;
+		td->process_nr   = process_nr;
+		td->thread_nr    = t;
+		td->task_nr	 = task_nr;
+		td->val          = rand();
+		td->curr_cpu	 = -1;
+		td->process_lock = &process_lock;
+
+		ret = pthread_create(pthreads + t, NULL, worker_thread, td);
+		BUG_ON(ret);
+	}
+
+	for (t = 0; t < g->p.nr_threads; t++) {
+                ret = pthread_join(pthreads[t], NULL);
+		BUG_ON(ret);
+	}
+
+	free_data(process_data, g->p.bytes_process);
+	free(pthreads);
+}
+
+static void print_summary(void)
+{
+	if (g->p.show_details < 0)
+		return;
+
+	printf("\n ###\n");
+	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
+		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
+	printf(" #      %5dx %5ldMB global  shared mem operations\n",
+			g->p.nr_loops, g->p.bytes_global/1024/1024);
+	printf(" #      %5dx %5ldMB process shared mem operations\n",
+			g->p.nr_loops, g->p.bytes_process/1024/1024);
+	printf(" #      %5dx %5ldMB thread  local  mem operations\n",
+			g->p.nr_loops, g->p.bytes_thread/1024/1024);
+
+	printf(" ###\n");
+
+	printf("\n ###\n"); fflush(stdout);
+}
+
+static void init_thread_data(void)
+{
+	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+	int t;
+
+	g->threads = zalloc_shared_data(size);
+
+	for (t = 0; t < g->p.nr_tasks; t++) {
+		struct thread_data *td = g->threads + t;
+		int cpu;
+
+		/* Allow all nodes by default: */
+		td->bind_node = -1;
+
+		/* Allow all CPUs by default: */
+		CPU_ZERO(&td->bind_cpumask);
+		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+			CPU_SET(cpu, &td->bind_cpumask);
+	}
+}
+
+static void deinit_thread_data(void)
+{
+	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+
+	free_data(g->threads, size);
+}
+
+static int init(void)
+{
+	g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
+
+	/* Copy over options: */
+	g->p = p0;
+
+	g->p.nr_cpus = numa_num_configured_cpus();
+
+	g->p.nr_nodes = numa_max_node() + 1;
+
+	/* char array in count_process_nodes(): */
+	BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+
+	if (g->p.show_quiet && !g->p.show_details)
+		g->p.show_details = -1;
+
+	/* Some memory should be specified: */
+	if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
+		return -1;
+
+	if (g->p.mb_global_str) {
+		g->p.mb_global = atof(g->p.mb_global_str);
+		BUG_ON(g->p.mb_global < 0);
+	}
+
+	if (g->p.mb_proc_str) {
+		g->p.mb_proc = atof(g->p.mb_proc_str);
+		BUG_ON(g->p.mb_proc < 0);
+	}
+
+	if (g->p.mb_proc_locked_str) {
+		g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
+		BUG_ON(g->p.mb_proc_locked < 0);
+		BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
+	}
+
+	if (g->p.mb_thread_str) {
+		g->p.mb_thread = atof(g->p.mb_thread_str);
+		BUG_ON(g->p.mb_thread < 0);
+	}
+
+	BUG_ON(g->p.nr_threads <= 0);
+	BUG_ON(g->p.nr_proc <= 0);
+
+	g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
+
+	g->p.bytes_global		= g->p.mb_global	*1024L*1024L;
+	g->p.bytes_process		= g->p.mb_proc		*1024L*1024L;
+	g->p.bytes_process_locked	= g->p.mb_proc_locked	*1024L*1024L;
+	g->p.bytes_thread		= g->p.mb_thread	*1024L*1024L;
+
+	g->data = setup_shared_data(g->p.bytes_global);
+
+	/* Startup serialization: */
+	init_global_mutex(&g->start_work_mutex);
+	init_global_mutex(&g->startup_mutex);
+	init_global_mutex(&g->startup_done_mutex);
+	init_global_mutex(&g->stop_work_mutex);
+
+	init_thread_data();
+
+	tprintf("#\n");
+	if (parse_setup_cpu_list() || parse_setup_node_list())
+		return -1;
+	tprintf("#\n");
+
+	print_summary();
+
+	return 0;
+}
+
+static void deinit(void)
+{
+	free_data(g->data, g->p.bytes_global);
+	g->data = NULL;
+
+	deinit_thread_data();
+
+	free_data(g, sizeof(*g));
+	g = NULL;
+}
+
+/*
+ * Print a short or long result, depending on the verbosity setting:
+ */
+static void print_res(const char *name, double val,
+		      const char *txt_unit, const char *txt_short, const char *txt_long)
+{
+	if (!name)
+		name = "main,";
+
+	if (!g->p.show_quiet)
+		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
+	else
+		printf(" %14.3f %s\n", val, txt_long);
+}
+
+static int __bench_numa(const char *name)
+{
+	struct timeval start, stop, diff;
+	u64 runtime_ns_min, runtime_ns_sum;
+	pid_t *pids, pid, wpid;
+	double delta_runtime;
+	double runtime_avg;
+	double runtime_sec_max;
+	double runtime_sec_min;
+	int wait_stat;
+	double bytes;
+	int i, t, p;
+
+	if (init())
+		return -1;
+
+	pids = zalloc(g->p.nr_proc * sizeof(*pids));
+	pid = -1;
+
+	/* All threads try to acquire it, this way we can wait for them to start up: */
+	pthread_mutex_lock(&g->start_work_mutex);
+
+	if (g->p.serialize_startup) {
+		tprintf(" #\n");
+		tprintf(" # Startup synchronization: ..."); fflush(stdout);
+	}
+
+	gettimeofday(&start, NULL);
+
+	for (i = 0; i < g->p.nr_proc; i++) {
+		pid = fork();
+		dprintf(" # process %2d: PID %d\n", i, pid);
+
+		BUG_ON(pid < 0);
+		if (!pid) {
+			/* Child process: */
+			worker_process(i);
+
+			exit(0);
+		}
+		pids[i] = pid;
+
+	}
+	/* Wait for all the threads to start up: */
+	while (g->nr_tasks_started != g->p.nr_tasks)
+		usleep(USEC_PER_MSEC);
+
+	BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
+
+	if (g->p.serialize_startup) {
+		double startup_sec;
+
+		pthread_mutex_lock(&g->startup_done_mutex);
+
+		/* This will start all threads: */
+		pthread_mutex_unlock(&g->start_work_mutex);
+
+		/* This mutex is locked - the last started thread will wake us: */
+		pthread_mutex_lock(&g->startup_done_mutex);
+
+		gettimeofday(&stop, NULL);
+
+		timersub(&stop, &start, &diff);
+
+		startup_sec = diff.tv_sec * NSEC_PER_SEC;
+		startup_sec += diff.tv_usec * NSEC_PER_USEC;
+		startup_sec /= NSEC_PER_SEC;
+
+		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
+		tprintf(" #\n");
+
+		start = stop;
+		pthread_mutex_unlock(&g->startup_done_mutex);
+	} else {
+		gettimeofday(&start, NULL);
+	}
+
+	/* Parent process: */
+
+
+	for (i = 0; i < g->p.nr_proc; i++) {
+		wpid = waitpid(pids[i], &wait_stat, 0);
+		BUG_ON(wpid < 0);
+		BUG_ON(!WIFEXITED(wait_stat));
+
+	}
+
+	runtime_ns_sum = 0;
+	runtime_ns_min = -1LL;
+
+	for (t = 0; t < g->p.nr_tasks; t++) {
+		u64 thread_runtime_ns = g->threads[t].runtime_ns;
+
+		runtime_ns_sum += thread_runtime_ns;
+		runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
+	}
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
+	BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
+
+	tprintf("\n ###\n");
+	tprintf("\n");
+
+	runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
+	runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
+	runtime_sec_max /= NSEC_PER_SEC;
+
+	runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
+
+	bytes = g->bytes_done;
+	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
+
+	if (g->p.measure_convergence) {
+		print_res(name, runtime_sec_max,
+			"secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
+	}
+
+	print_res(name, runtime_sec_max,
+		"secs,", "runtime-max/thread",	"secs slowest (max) thread-runtime");
+
+	print_res(name, runtime_sec_min,
+		"secs,", "runtime-min/thread",	"secs fastest (min) thread-runtime");
+
+	print_res(name, runtime_avg,
+		"secs,", "runtime-avg/thread",	"secs average thread-runtime");
+
+	delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
+	print_res(name, delta_runtime / runtime_sec_max * 100.0,
+		"%,", "spread-runtime/thread",	"% difference between max/avg runtime");
+
+	print_res(name, bytes / g->p.nr_tasks / 1e9,
+		"GB,", "data/thread",		"GB data processed, per thread");
+
+	print_res(name, bytes / 1e9,
+		"GB,", "data-total",		"GB data processed, total");
+
+	print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
+		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
+
+	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
+		"GB/sec,", "thread-speed",	"GB/sec/thread speed");
+
+	print_res(name, bytes / runtime_sec_max / 1e9,
+		"GB/sec,", "total-speed",	"GB/sec total speed");
+
+	if (g->p.show_details >= 2) {
+		char tname[14 + 2 * 10 + 1];
+		struct thread_data *td;
+		for (p = 0; p < g->p.nr_proc; p++) {
+			for (t = 0; t < g->p.nr_threads; t++) {
+				memset(tname, 0, sizeof(tname));
+				td = g->threads + p*g->p.nr_threads + t;
+				snprintf(tname, sizeof(tname), "process%d:thread%d", p, t);
+				print_res(tname, td->speed_gbs,
+					"GB/sec",	"thread-speed", "GB/sec/thread speed");
+				print_res(tname, td->system_time_ns / NSEC_PER_SEC,
+					"secs",	"thread-system-time", "system CPU time/thread");
+				print_res(tname, td->user_time_ns / NSEC_PER_SEC,
+					"secs",	"thread-user-time", "user CPU time/thread");
+			}
+		}
+	}
+
+	free(pids);
+
+	deinit();
+
+	return 0;
+}
+
+#define MAX_ARGS 50
+
+static int command_size(const char **argv)
+{
+	int size = 0;
+
+	while (*argv) {
+		size++;
+		argv++;
+	}
+
+	BUG_ON(size >= MAX_ARGS);
+
+	return size;
+}
+
+static void init_params(struct params *p, const char *name, int argc, const char **argv)
+{
+	int i;
+
+	printf("\n # Running %s \"perf bench numa", name);
+
+	for (i = 0; i < argc; i++)
+		printf(" %s", argv[i]);
+
+	printf("\"\n");
+
+	memset(p, 0, sizeof(*p));
+
+	/* Initialize nonzero defaults: */
+
+	p->serialize_startup		= 1;
+	p->data_reads			= true;
+	p->data_writes			= true;
+	p->data_backwards		= true;
+	p->data_rand_walk		= true;
+	p->nr_loops			= -1;
+	p->init_random			= true;
+	p->mb_global_str		= "1";
+	p->nr_proc			= 1;
+	p->nr_threads			= 1;
+	p->nr_secs			= 5;
+	p->run_all			= argc == 1;
+}
+
+static int run_bench_numa(const char *name, const char **argv)
+{
+	int argc = command_size(argv);
+
+	init_params(&p0, name, argc, argv);
+	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+	if (argc)
+		goto err;
+
+	if (__bench_numa(name))
+		goto err;
+
+	return 0;
+
+err:
+	return -1;
+}
+
+#define OPT_BW_RAM		"-s",  "20", "-zZq",    "--thp", " 1", "--no-data_rand_walk"
+#define OPT_BW_RAM_NOTHP	OPT_BW_RAM,		"--thp", "-1"
+
+#define OPT_CONV		"-s", "100", "-zZ0qcm", "--thp", " 1"
+#define OPT_CONV_NOTHP		OPT_CONV,		"--thp", "-1"
+
+#define OPT_BW			"-s",  "20", "-zZ0q",   "--thp", " 1"
+#define OPT_BW_NOTHP		OPT_BW,			"--thp", "-1"
+
+/*
+ * The built-in test-suite executed by "perf bench numa -a".
+ *
+ * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
+ */
+static const char *tests[][MAX_ARGS] = {
+   /* Basic single-stream NUMA bandwidth measurements: */
+   { "RAM-bw-local,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
+			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM },
+   { "RAM-bw-local-NOTHP,",
+			  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
+			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM_NOTHP },
+   { "RAM-bw-remote,",	  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
+			  "-C" ,   "0", "-M",   "1", OPT_BW_RAM },
+
+   /* 2-stream NUMA bandwidth measurements: */
+   { "RAM-bw-local-2x,",  "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
+			   "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
+   { "RAM-bw-remote-2x,", "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
+		 	   "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
+
+   /* Cross-stream NUMA bandwidth measurement: */
+   { "RAM-bw-cross,",     "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
+		 	   "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
+
+   /* Convergence latency measurements: */
+   { " 1x3-convergence,", "mem",  "-p",  "1", "-t",  "3", "-P",  "512", OPT_CONV },
+   { " 1x4-convergence,", "mem",  "-p",  "1", "-t",  "4", "-P",  "512", OPT_CONV },
+   { " 1x6-convergence,", "mem",  "-p",  "1", "-t",  "6", "-P", "1020", OPT_CONV },
+   { " 2x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
+   { " 3x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
+   { " 4x4-convergence,", "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV },
+   { " 4x4-convergence-NOTHP,",
+			  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
+   { " 4x6-convergence,", "mem",  "-p",  "4", "-t",  "6", "-P", "1020", OPT_CONV },
+   { " 4x8-convergence,", "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_CONV },
+   { " 8x4-convergence,", "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV },
+   { " 8x4-convergence-NOTHP,",
+			  "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
+   { " 3x1-convergence,", "mem",  "-p",  "3", "-t",  "1", "-P",  "512", OPT_CONV },
+   { " 4x1-convergence,", "mem",  "-p",  "4", "-t",  "1", "-P",  "512", OPT_CONV },
+   { " 8x1-convergence,", "mem",  "-p",  "8", "-t",  "1", "-P",  "512", OPT_CONV },
+   { "16x1-convergence,", "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_CONV },
+   { "32x1-convergence,", "mem",  "-p", "32", "-t",  "1", "-P",  "128", OPT_CONV },
+
+   /* Various NUMA process/thread layout bandwidth measurements: */
+   { " 2x1-bw-process,",  "mem",  "-p",  "2", "-t",  "1", "-P", "1024", OPT_BW },
+   { " 3x1-bw-process,",  "mem",  "-p",  "3", "-t",  "1", "-P", "1024", OPT_BW },
+   { " 4x1-bw-process,",  "mem",  "-p",  "4", "-t",  "1", "-P", "1024", OPT_BW },
+   { " 8x1-bw-process,",  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW },
+   { " 8x1-bw-process-NOTHP,",
+			  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW_NOTHP },
+   { "16x1-bw-process,",  "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_BW },
+
+   { " 4x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
+   { " 8x1-bw-thread,",	  "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
+   { "16x1-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
+   { "32x1-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
+
+   { " 2x3-bw-thread,",	  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
+   { " 4x4-bw-thread,",	  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
+   { " 4x6-bw-thread,",	  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
+   { " 4x8-bw-thread,",	  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
+   { " 4x8-bw-thread-NOTHP,",
+			  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW_NOTHP },
+   { " 3x3-bw-thread,",	  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
+   { " 5x5-bw-thread,",	  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
+
+   { "2x16-bw-thread,",   "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
+   { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
+
+   { "numa02-bw,",	  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
+   { "numa02-bw-NOTHP,",  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW_NOTHP },
+   { "numa01-bw-thread,", "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW },
+   { "numa01-bw-thread-NOTHP,",
+			  "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW_NOTHP },
+};
+
+static int bench_all(void)
+{
+	int nr = ARRAY_SIZE(tests);
+	int ret;
+	int i;
+
+	ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
+	BUG_ON(ret < 0);
+
+	for (i = 0; i < nr; i++) {
+		run_bench_numa(tests[i][0], tests[i] + 1);
+	}
+
+	printf("\n");
+
+	return 0;
+}
+
+int bench_numa(int argc, const char **argv)
+{
+	init_params(&p0, "main,", argc, argv);
+	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+	if (argc)
+		goto err;
+
+	if (p0.run_all)
+		return bench_all();
+
+	if (__bench_numa(NULL))
+		goto err;
+
+	return 0;
+
+err:
+	usage_with_options(numa_usage, options);
+	return -1;
+}
diff --git a/bench/sched-messaging.c b/bench/sched-messaging.c
new file mode 100644
index 0000000..f9d7641
--- /dev/null
+++ b/bench/sched-messaging.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * sched-messaging.c
+ *
+ * messaging: Benchmark for scheduler and IPC mechanisms
+ *
+ * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+/* Test groups of 20 processes spraying to 20 receivers */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <poll.h>
+#include <limits.h>
+#include <err.h>
+#include <linux/time64.h>
+
+#define DATASIZE 100
+
+static bool use_pipes = false;
+static unsigned int nr_loops = 100;
+static bool thread_mode = false;
+static unsigned int num_groups = 10;
+
+struct sender_context {
+	unsigned int num_fds;
+	int ready_out;
+	int wakefd;
+	int out_fds[0];
+};
+
+struct receiver_context {
+	unsigned int num_packets;
+	int in_fds[2];
+	int ready_out;
+	int wakefd;
+};
+
+static void fdpair(int fds[2])
+{
+	if (use_pipes) {
+		if (pipe(fds) == 0)
+			return;
+	} else {
+		if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
+			return;
+	}
+
+	err(EXIT_FAILURE, use_pipes ? "pipe()" : "socketpair()");
+}
+
+/* Block until we're ready to go */
+static void ready(int ready_out, int wakefd)
+{
+	char dummy;
+	struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
+
+	/* Tell them we're ready. */
+	if (write(ready_out, &dummy, 1) != 1)
+		err(EXIT_FAILURE, "CLIENT: ready write");
+
+	/* Wait for "GO" signal */
+	if (poll(&pollfd, 1, -1) != 1)
+		err(EXIT_FAILURE, "poll");
+}
+
+/* Sender sprays nr_loops messages down each file descriptor */
+static void *sender(struct sender_context *ctx)
+{
+	char data[DATASIZE];
+	unsigned int i, j;
+
+	ready(ctx->ready_out, ctx->wakefd);
+
+	/* Now pump to every receiver. */
+	for (i = 0; i < nr_loops; i++) {
+		for (j = 0; j < ctx->num_fds; j++) {
+			int ret, done = 0;
+
+again:
+			ret = write(ctx->out_fds[j], data + done,
+				    sizeof(data)-done);
+			if (ret < 0)
+				err(EXIT_FAILURE, "SENDER: write");
+			done += ret;
+			if (done < DATASIZE)
+				goto again;
+		}
+	}
+
+	return NULL;
+}
+
+
+/* One receiver per fd */
+static void *receiver(struct receiver_context* ctx)
+{
+	unsigned int i;
+
+	if (!thread_mode)
+		close(ctx->in_fds[1]);
+
+	/* Wait for start... */
+	ready(ctx->ready_out, ctx->wakefd);
+
+	/* Receive them all */
+	for (i = 0; i < ctx->num_packets; i++) {
+		char data[DATASIZE];
+		int ret, done = 0;
+
+again:
+		ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
+		if (ret < 0)
+			err(EXIT_FAILURE, "SERVER: read");
+		done += ret;
+		if (done < DATASIZE)
+			goto again;
+	}
+
+	return NULL;
+}
+
+static pthread_t create_worker(void *ctx, void *(*func)(void *))
+{
+	pthread_attr_t attr;
+	pthread_t childid;
+	int ret;
+
+	if (!thread_mode) {
+		/* process mode */
+		/* Fork the receiver. */
+		switch (fork()) {
+		case -1:
+			err(EXIT_FAILURE, "fork()");
+			break;
+		case 0:
+			(*func) (ctx);
+			exit(0);
+			break;
+		default:
+			break;
+		}
+
+		return (pthread_t)0;
+	}
+
+	if (pthread_attr_init(&attr) != 0)
+		err(EXIT_FAILURE, "pthread_attr_init:");
+
+#ifndef __ia64__
+	if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
+		err(EXIT_FAILURE, "pthread_attr_setstacksize");
+#endif
+
+	ret = pthread_create(&childid, &attr, func, ctx);
+	if (ret != 0)
+		err(EXIT_FAILURE, "pthread_create failed");
+
+	return childid;
+}
+
+static void reap_worker(pthread_t id)
+{
+	int proc_status;
+	void *thread_status;
+
+	if (!thread_mode) {
+		/* process mode */
+		wait(&proc_status);
+		if (!WIFEXITED(proc_status))
+			exit(1);
+	} else {
+		pthread_join(id, &thread_status);
+	}
+}
+
+/* One group of senders and receivers */
+static unsigned int group(pthread_t *pth,
+		unsigned int num_fds,
+		int ready_out,
+		int wakefd)
+{
+	unsigned int i;
+	struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+			+ num_fds * sizeof(int));
+
+	if (!snd_ctx)
+		err(EXIT_FAILURE, "malloc()");
+
+	for (i = 0; i < num_fds; i++) {
+		int fds[2];
+		struct receiver_context *ctx = malloc(sizeof(*ctx));
+
+		if (!ctx)
+			err(EXIT_FAILURE, "malloc()");
+
+
+		/* Create the pipe between client and server */
+		fdpair(fds);
+
+		ctx->num_packets = num_fds * nr_loops;
+		ctx->in_fds[0] = fds[0];
+		ctx->in_fds[1] = fds[1];
+		ctx->ready_out = ready_out;
+		ctx->wakefd = wakefd;
+
+		pth[i] = create_worker(ctx, (void *)receiver);
+
+		snd_ctx->out_fds[i] = fds[1];
+		if (!thread_mode)
+			close(fds[0]);
+	}
+
+	/* Now we have all the fds, fork the senders */
+	for (i = 0; i < num_fds; i++) {
+		snd_ctx->ready_out = ready_out;
+		snd_ctx->wakefd = wakefd;
+		snd_ctx->num_fds = num_fds;
+
+		pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
+	}
+
+	/* Close the fds we have left */
+	if (!thread_mode)
+		for (i = 0; i < num_fds; i++)
+			close(snd_ctx->out_fds[i]);
+
+	/* Return number of children to reap */
+	return num_fds * 2;
+}
+
+static const struct option options[] = {
+	OPT_BOOLEAN('p', "pipe", &use_pipes,
+		    "Use pipe() instead of socketpair()"),
+	OPT_BOOLEAN('t', "thread", &thread_mode,
+		    "Be multi thread instead of multi process"),
+	OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"),
+	OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run (default: 100)"),
+	OPT_END()
+};
+
+static const char * const bench_sched_message_usage[] = {
+	"perf bench sched messaging <options>",
+	NULL
+};
+
+int bench_sched_messaging(int argc, const char **argv)
+{
+	unsigned int i, total_children;
+	struct timeval start, stop, diff;
+	unsigned int num_fds = 20;
+	int readyfds[2], wakefds[2];
+	char dummy;
+	pthread_t *pth_tab;
+
+	argc = parse_options(argc, argv, options,
+			     bench_sched_message_usage, 0);
+
+	pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
+	if (!pth_tab)
+		err(EXIT_FAILURE, "main:malloc()");
+
+	fdpair(readyfds);
+	fdpair(wakefds);
+
+	total_children = 0;
+	for (i = 0; i < num_groups; i++)
+		total_children += group(pth_tab+total_children, num_fds,
+					readyfds[1], wakefds[0]);
+
+	/* Wait for everyone to be ready */
+	for (i = 0; i < total_children; i++)
+		if (read(readyfds[0], &dummy, 1) != 1)
+			err(EXIT_FAILURE, "Reading for readyfds");
+
+	gettimeofday(&start, NULL);
+
+	/* Kick them off */
+	if (write(wakefds[1], &dummy, 1) != 1)
+		err(EXIT_FAILURE, "Writing to start them");
+
+	/* Reap them all */
+	for (i = 0; i < total_children; i++)
+		reap_worker(pth_tab[i]);
+
+	gettimeofday(&stop, NULL);
+
+	timersub(&stop, &start, &diff);
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# %d sender and receiver %s per group\n",
+		       num_fds, thread_mode ? "threads" : "processes");
+		printf("# %d groups == %d %s run\n\n",
+		       num_groups, num_groups * 2 * num_fds,
+		       thread_mode ? "threads" : "processes");
+		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n", diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+		break;
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	free(pth_tab);
+
+	return 0;
+}
diff --git a/bench/sched-pipe.c b/bench/sched-pipe.c
new file mode 100644
index 0000000..0591be0
--- /dev/null
+++ b/bench/sched-pipe.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * sched-pipe.c
+ *
+ * pipe: Benchmark for pipe()
+ *
+ * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
+ *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/time64.h>
+
+#include <pthread.h>
+
+struct thread_data {
+	int			nr;
+	int			pipe_read;
+	int			pipe_write;
+	pthread_t		pthread;
+};
+
+#define LOOPS_DEFAULT 1000000
+static	int			loops = LOOPS_DEFAULT;
+
+/* Use processes by default: */
+static bool			threaded;
+
+static const struct option options[] = {
+	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based task setup"),
+	OPT_END()
+};
+
+static const char * const bench_sched_pipe_usage[] = {
+	"perf bench sched pipe <options>",
+	NULL
+};
+
+static void *worker_thread(void *__tdata)
+{
+	struct thread_data *td = __tdata;
+	int m = 0, i;
+	int ret;
+
+	for (i = 0; i < loops; i++) {
+		if (!td->nr) {
+			ret = read(td->pipe_read, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+			ret = write(td->pipe_write, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+		} else {
+			ret = write(td->pipe_write, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+			ret = read(td->pipe_read, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+		}
+	}
+
+	return NULL;
+}
+
+int bench_sched_pipe(int argc, const char **argv)
+{
+	struct thread_data threads[2], *td;
+	int pipe_1[2], pipe_2[2];
+	struct timeval start, stop, diff;
+	unsigned long long result_usec = 0;
+	int nr_threads = 2;
+	int t;
+
+	/*
+	 * why does "ret" exist?
+	 * discarding returned value of read(), write()
+	 * causes error in building environment for perf
+	 */
+	int __maybe_unused ret, wait_stat;
+	pid_t pid, retpid __maybe_unused;
+
+	argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
+
+	BUG_ON(pipe(pipe_1));
+	BUG_ON(pipe(pipe_2));
+
+	gettimeofday(&start, NULL);
+
+	for (t = 0; t < nr_threads; t++) {
+		td = threads + t;
+
+		td->nr = t;
+
+		if (t == 0) {
+			td->pipe_read = pipe_1[0];
+			td->pipe_write = pipe_2[1];
+		} else {
+			td->pipe_write = pipe_1[1];
+			td->pipe_read = pipe_2[0];
+		}
+	}
+
+
+	if (threaded) {
+
+		for (t = 0; t < nr_threads; t++) {
+			td = threads + t;
+
+			ret = pthread_create(&td->pthread, NULL, worker_thread, td);
+			BUG_ON(ret);
+		}
+
+		for (t = 0; t < nr_threads; t++) {
+			td = threads + t;
+
+			ret = pthread_join(td->pthread, NULL);
+			BUG_ON(ret);
+		}
+
+	} else {
+		pid = fork();
+		assert(pid >= 0);
+
+		if (!pid) {
+			worker_thread(threads + 0);
+			exit(0);
+		} else {
+			worker_thread(threads + 1);
+		}
+
+		retpid = waitpid(pid, &wait_stat, 0);
+		assert((retpid == pid) && WIFEXITED(wait_stat));
+	}
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# Executed %d pipe operations between two %s\n\n",
+			loops, threaded ? "threads" : "processes");
+
+		result_usec = diff.tv_sec * USEC_PER_SEC;
+		result_usec += diff.tv_usec;
+
+		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+
+		printf(" %14lf usecs/op\n",
+		       (double)result_usec / (double)loops);
+		printf(" %14d ops/sec\n",
+		       (int)((double)loops /
+			     ((double)result_usec / (double)USEC_PER_SEC)));
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n",
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+		break;
+
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}
diff --git a/builtin-annotate.c b/builtin-annotate.c
new file mode 100644
index 0000000..51709a9
--- /dev/null
+++ b/builtin-annotate.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-annotate.c
+ *
+ * Builtin annotate command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+#include "util/color.h"
+#include <linux/list.h>
+#include "util/cache.h"
+#include <linux/rbtree.h>
+#include "util/symbol.h"
+
+#include "perf.h"
+#include "util/debug.h"
+
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/annotate.h"
+#include "util/event.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/data.h"
+#include "arch/common.h"
+#include "util/block-range.h"
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+
+struct perf_annotate {
+	struct perf_tool tool;
+	struct perf_session *session;
+	bool	   use_tui, use_stdio, use_stdio2, use_gtk;
+	bool	   full_paths;
+	bool	   print_line;
+	bool	   skip_missing;
+	bool	   has_br_stack;
+	const char *sym_hist_filter;
+	const char *cpu_list;
+	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
+
+/*
+ * Given one basic block:
+ *
+ *	from	to		branch_i
+ *	* ----> *
+ *		|
+ *		| block
+ *		v
+ *		* ----> *
+ *		from	to	branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+				struct addr_map_symbol *end,
+				struct branch_flags *flags)
+{
+	struct symbol *sym = start->sym;
+	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+	struct block_range_iter iter;
+	struct block_range *entry;
+
+	/*
+	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
+	 */
+	if (!start->addr || start->addr > end->addr)
+		return;
+
+	iter = block_range__create(start->addr, end->addr);
+	if (!block_range_iter__valid(&iter))
+		return;
+
+	/*
+	 * First block in range is a branch target.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_target);
+	entry->entry++;
+
+	do {
+		entry = block_range_iter(&iter);
+
+		entry->coverage++;
+		entry->sym = sym;
+
+		if (notes)
+			notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+	} while (block_range_iter__next(&iter));
+
+	/*
+	 * Last block in rage is a branch.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_branch);
+	entry->taken++;
+	if (flags->predicted)
+		entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+				 struct perf_sample *sample)
+{
+	struct addr_map_symbol *prev = NULL;
+	struct branch_info *bi;
+	int i;
+
+	if (!bs || !bs->nr)
+		return;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return;
+
+	for (i = bs->nr - 1; i >= 0; i--) {
+		/*
+		 * XXX filter against symbol
+		 */
+		if (prev)
+			process_basic_block(prev, &bi[i].from, &bi[i].flags);
+		prev = &bi[i].to;
+	}
+
+	free(bi);
+}
+
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+				      struct addr_location *al __maybe_unused,
+				      bool single __maybe_unused,
+				      void *arg __maybe_unused)
+{
+	struct hist_entry *he = iter->he;
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+	struct perf_evsel *evsel = iter->evsel;
+	int err;
+
+	hist__account_cycles(sample->branch_stack, al, sample, false);
+
+	bi = he->branch_info;
+	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+
+	if (err)
+		goto out;
+
+	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+
+out:
+	return err;
+}
+
+static int process_branch_callback(struct perf_evsel *evsel,
+				   struct perf_sample *sample,
+				   struct addr_location *al __maybe_unused,
+				   struct perf_annotate *ann,
+				   struct machine *machine)
+{
+	struct hist_entry_iter iter = {
+		.evsel		= evsel,
+		.sample		= sample,
+		.add_entry_cb	= hist_iter__branch_callback,
+		.hide_unresolved	= symbol_conf.hide_unresolved,
+		.ops		= &hist_iter_branch,
+	};
+
+	struct addr_location a;
+	int ret;
+
+	if (machine__resolve(machine, &a, sample) < 0)
+		return -1;
+
+	if (a.sym == NULL)
+		return 0;
+
+	if (a.map != NULL)
+		a.map->dso->hit = 1;
+
+	ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
+	return ret;
+}
+
+static bool has_annotation(struct perf_annotate *ann)
+{
+	return ui__has_annotation() || ann->use_stdio2;
+}
+
+static int perf_evsel__add_sample(struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct addr_location *al,
+				  struct perf_annotate *ann,
+				  struct machine *machine)
+{
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he;
+	int ret;
+
+	if ((!ann->has_br_stack || !has_annotation(ann)) &&
+	    ann->sym_hist_filter != NULL &&
+	    (al->sym == NULL ||
+	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
+		/* We're only interested in a symbol named sym_hist_filter */
+		/*
+		 * FIXME: why isn't this done in the symbol_filter when loading
+		 * the DSO?
+		 */
+		if (al->sym != NULL) {
+			rb_erase(&al->sym->rb_node,
+				 &al->map->dso->symbols[al->map->type]);
+			symbol__delete(al->sym);
+			dso__reset_find_symbol_cache(al->map->dso);
+		}
+		return 0;
+	}
+
+	/*
+	 * XXX filtered samples can still have branch entires pointing into our
+	 * symbol and are missed.
+	 */
+	process_branch_stack(sample->branch_stack, al, sample);
+
+	if (ann->has_br_stack && has_annotation(ann))
+		return process_branch_callback(evsel, sample, al, ann, machine);
+
+	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
+	hists__inc_nr_samples(hists, true);
+	return ret;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
+	struct addr_location al;
+	int ret = 0;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_warning("problem processing %d event, skipping it.\n",
+			   event->header.type);
+		return -1;
+	}
+
+	if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
+		goto out_put;
+
+	if (!al.filtered &&
+	    perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
+		pr_warning("problem incrementing symbol count, "
+			   "skipping event\n");
+		ret = -1;
+	}
+out_put:
+	addr_location__put(&al);
+	return ret;
+}
+
+static int hist_entry__tty_annotate(struct hist_entry *he,
+				    struct perf_evsel *evsel,
+				    struct perf_annotate *ann)
+{
+	if (!ann->use_stdio2)
+		return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel,
+					    ann->print_line, ann->full_paths, 0, 0);
+	return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel,
+				     ann->print_line, ann->full_paths);
+}
+
+static void hists__find_annotations(struct hists *hists,
+				    struct perf_evsel *evsel,
+				    struct perf_annotate *ann)
+{
+	struct rb_node *nd = rb_first(&hists->entries), *next;
+	int key = K_RIGHT;
+
+	while (nd) {
+		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+		struct annotation *notes;
+
+		if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
+			goto find_next;
+
+		if (ann->sym_hist_filter &&
+		    (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
+			goto find_next;
+
+		notes = symbol__annotation(he->ms.sym);
+		if (notes->src == NULL) {
+find_next:
+			if (key == K_LEFT)
+				nd = rb_prev(nd);
+			else
+				nd = rb_next(nd);
+			continue;
+		}
+
+		if (use_browser == 2) {
+			int ret;
+			int (*annotate)(struct hist_entry *he,
+					struct perf_evsel *evsel,
+					struct hist_browser_timer *hbt);
+
+			annotate = dlsym(perf_gtk_handle,
+					 "hist_entry__gtk_annotate");
+			if (annotate == NULL) {
+				ui__error("GTK browser not found!\n");
+				return;
+			}
+
+			ret = annotate(he, evsel, NULL);
+			if (!ret || !ann->skip_missing)
+				return;
+
+			/* skip missing symbols */
+			nd = rb_next(nd);
+		} else if (use_browser == 1) {
+			key = hist_entry__tui_annotate(he, evsel, NULL);
+
+			switch (key) {
+			case -1:
+				if (!ann->skip_missing)
+					return;
+				/* fall through */
+			case K_RIGHT:
+				next = rb_next(nd);
+				break;
+			case K_LEFT:
+				next = rb_prev(nd);
+				break;
+			default:
+				return;
+			}
+
+			if (next != NULL)
+				nd = next;
+		} else {
+			hist_entry__tty_annotate(he, evsel, ann);
+			nd = rb_next(nd);
+			/*
+			 * Since we have a hist_entry per IP for the same
+			 * symbol, free he->ms.sym->src to signal we already
+			 * processed this symbol.
+			 */
+			zfree(&notes->src->cycles_hist);
+			zfree(&notes->src);
+		}
+	}
+}
+
+static int __cmd_annotate(struct perf_annotate *ann)
+{
+	int ret;
+	struct perf_session *session = ann->session;
+	struct perf_evsel *pos;
+	u64 total_nr_samples;
+
+	if (ann->cpu_list) {
+		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
+					       ann->cpu_bitmap);
+		if (ret)
+			goto out;
+	}
+
+	if (!objdump_path) {
+		ret = perf_env__lookup_objdump(&session->header.env);
+		if (ret)
+			goto out;
+	}
+
+	ret = perf_session__process_events(session);
+	if (ret)
+		goto out;
+
+	if (dump_trace) {
+		perf_session__fprintf_nr_events(session, stdout);
+		perf_evlist__fprintf_nr_events(session->evlist, stdout);
+		goto out;
+	}
+
+	if (verbose > 3)
+		perf_session__fprintf(session, stdout);
+
+	if (verbose > 2)
+		perf_session__fprintf_dsos(session, stdout);
+
+	total_nr_samples = 0;
+	evlist__for_each_entry(session->evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+		u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+
+		if (nr_samples > 0) {
+			total_nr_samples += nr_samples;
+			hists__collapse_resort(hists, NULL);
+			/* Don't sort callchain */
+			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+			perf_evsel__output_resort(pos, NULL);
+
+			if (symbol_conf.event_group &&
+			    !perf_evsel__is_group_leader(pos))
+				continue;
+
+			hists__find_annotations(hists, pos, ann);
+		}
+	}
+
+	if (total_nr_samples == 0) {
+		ui__error("The %s file has no samples!\n", session->data->file.path);
+		goto out;
+	}
+
+	if (use_browser == 2) {
+		void (*show_annotations)(void);
+
+		show_annotations = dlsym(perf_gtk_handle,
+					 "perf_gtk__show_annotations");
+		if (show_annotations == NULL) {
+			ui__error("GTK browser not found!\n");
+			goto out;
+		}
+		show_annotations();
+	}
+
+out:
+	return ret;
+}
+
+static const char * const annotate_usage[] = {
+	"perf annotate [<options>]",
+	NULL
+};
+
+int cmd_annotate(int argc, const char **argv)
+{
+	struct perf_annotate annotate = {
+		.tool = {
+			.sample	= process_sample_event,
+			.mmap	= perf_event__process_mmap,
+			.mmap2	= perf_event__process_mmap2,
+			.comm	= perf_event__process_comm,
+			.exit	= perf_event__process_exit,
+			.fork	= perf_event__process_fork,
+			.namespaces = perf_event__process_namespaces,
+			.attr	= perf_event__process_attr,
+			.build_id = perf_event__process_build_id,
+			.tracing_data   = perf_event__process_tracing_data,
+			.feature	= perf_event__process_feature,
+			.ordered_events = true,
+			.ordering_requires_timestamps = true,
+		},
+	};
+	struct perf_data data = {
+		.mode  = PERF_DATA_MODE_READ,
+	};
+	struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+		   "only consider symbols in these dsos"),
+	OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
+		    "symbol to annotate"),
+	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
+	OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
+	OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
+	OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
+	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+                    "don't load vmlinux even if found"),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
+	OPT_BOOLEAN('l', "print-line", &annotate.print_line,
+		    "print matching source lines (may be slow)"),
+	OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
+		    "Don't shorten the displayed pathnames"),
+	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
+		    "Skip symbols that cannot be annotated"),
+	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
+	OPT_CALLBACK(0, "symfs", NULL, "directory",
+		     "Look for files with symbols relative to this directory",
+		     symbol__config_symfs),
+	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
+		    "Interleave source code with assembly code (default)"),
+	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
+		    "Display raw encoding of assembly instructions (default)"),
+	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
+	OPT_STRING(0, "objdump", &objdump_path, "path",
+		   "objdump binary to use for disassembly and annotations"),
+	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
+		    "Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+		    "Show a column with the sum of periods"),
+	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
+		    "Show a column with the number of samples"),
+	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
+			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
+			     stdio__config_color, "always"),
+	OPT_END()
+	};
+	int ret;
+
+	set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);
+
+
+	ret = hists__init();
+	if (ret < 0)
+		return ret;
+
+	argc = parse_options(argc, argv, options, annotate_usage, 0);
+	if (argc) {
+		/*
+		 * Special case: if there's an argument left then assume that
+		 * it's a symbol filter:
+		 */
+		if (argc > 1)
+			usage_with_options(annotate_usage, options);
+
+		annotate.sym_hist_filter = argv[0];
+	}
+
+	if (symbol_conf.show_nr_samples && annotate.use_gtk) {
+		pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
+		return ret;
+	}
+
+	if (quiet)
+		perf_quiet_option();
+
+	data.file.path = input_name;
+
+	annotate.session = perf_session__new(&data, false, &annotate.tool);
+	if (annotate.session == NULL)
+		return -1;
+
+	annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
+						      HEADER_BRANCH_STACK);
+
+	ret = symbol__annotation_init();
+	if (ret < 0)
+		goto out_delete;
+
+	annotation_config__init();
+
+	symbol_conf.try_vmlinux_path = true;
+
+	ret = symbol__init(&annotate.session->header.env);
+	if (ret < 0)
+		goto out_delete;
+
+	if (annotate.use_stdio || annotate.use_stdio2)
+		use_browser = 0;
+	else if (annotate.use_tui)
+		use_browser = 1;
+	else if (annotate.use_gtk)
+		use_browser = 2;
+
+	setup_browser(true);
+
+	if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) {
+		sort__mode = SORT_MODE__BRANCH;
+		if (setup_sorting(annotate.session->evlist) < 0)
+			usage_with_options(annotate_usage, options);
+	} else {
+		if (setup_sorting(NULL) < 0)
+			usage_with_options(annotate_usage, options);
+	}
+
+	ret = __cmd_annotate(&annotate);
+
+out_delete:
+	/*
+	 * Speed up the exit process, for large files this can
+	 * take quite a while.
+	 *
+	 * XXX Enable this when using valgrind or if we ever
+	 * librarize this command.
+	 *
+	 * Also experiment with obstacks to see how much speed
+	 * up we'll get here.
+	 *
+	 * perf_session__delete(session);
+	 */
+	return ret;
+}
diff --git a/builtin-bench.c b/builtin-bench.c
new file mode 100644
index 0000000..17a6bcd
--- /dev/null
+++ b/builtin-bench.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-bench.c
+ *
+ * General benchmarking collections provided by perf
+ *
+ * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+
+/*
+ * Available benchmark collection list:
+ *
+ *  sched ... scheduler and IPC performance
+ *  mem   ... memory access performance
+ *  numa  ... NUMA scheduling and MM performance
+ *  futex ... Futex performance
+ */
+#include "perf.h"
+#include "util/util.h"
+#include <subcmd/parse-options.h>
+#include "builtin.h"
+#include "bench/bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+typedef int (*bench_fn_t)(int argc, const char **argv);
+
+struct bench {
+	const char	*name;
+	const char	*summary;
+	bench_fn_t	fn;
+};
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static struct bench numa_benchmarks[] = {
+	{ "mem",	"Benchmark for NUMA workloads",			bench_numa		},
+	{ "all",	"Run all NUMA benchmarks",			NULL			},
+	{ NULL,		NULL,						NULL			}
+};
+#endif
+
+static struct bench sched_benchmarks[] = {
+	{ "messaging",	"Benchmark for scheduling and IPC",		bench_sched_messaging	},
+	{ "pipe",	"Benchmark for pipe() between two processes",	bench_sched_pipe	},
+	{ "all",	"Run all scheduler benchmarks",		NULL			},
+	{ NULL,		NULL,						NULL			}
+};
+
+static struct bench mem_benchmarks[] = {
+	{ "memcpy",	"Benchmark for memcpy() functions",		bench_mem_memcpy	},
+	{ "memset",	"Benchmark for memset() functions",		bench_mem_memset	},
+	{ "all",	"Run all memory access benchmarks",		NULL			},
+	{ NULL,		NULL,						NULL			}
+};
+
+static struct bench futex_benchmarks[] = {
+	{ "hash",	"Benchmark for futex hash table",               bench_futex_hash	},
+	{ "wake",	"Benchmark for futex wake calls",               bench_futex_wake	},
+	{ "wake-parallel", "Benchmark for parallel futex wake calls",   bench_futex_wake_parallel },
+	{ "requeue",	"Benchmark for futex requeue calls",            bench_futex_requeue	},
+	/* pi-futexes */
+	{ "lock-pi",	"Benchmark for futex lock_pi calls",            bench_futex_lock_pi	},
+	{ "all",	"Run all futex benchmarks",			NULL			},
+	{ NULL,		NULL,						NULL			}
+};
+
+struct collection {
+	const char	*name;
+	const char	*summary;
+	struct bench	*benchmarks;
+};
+
+static struct collection collections[] = {
+	{ "sched",	"Scheduler and IPC benchmarks",			sched_benchmarks	},
+	{ "mem",	"Memory access benchmarks",			mem_benchmarks		},
+#ifdef HAVE_LIBNUMA_SUPPORT
+	{ "numa",	"NUMA scheduling and MM benchmarks",		numa_benchmarks		},
+#endif
+	{"futex",       "Futex stressing benchmarks",                   futex_benchmarks        },
+	{ "all",	"All benchmarks",				NULL			},
+	{ NULL,		NULL,						NULL			}
+};
+
+/* Iterate over all benchmark collections: */
+#define for_each_collection(coll) \
+	for (coll = collections; coll->name; coll++)
+
+/* Iterate over all benchmarks within a collection: */
+#define for_each_bench(coll, bench) \
+	for (bench = coll->benchmarks; bench && bench->name; bench++)
+
+static void dump_benchmarks(struct collection *coll)
+{
+	struct bench *bench;
+
+	printf("\n        # List of available benchmarks for collection '%s':\n\n", coll->name);
+
+	for_each_bench(coll, bench)
+		printf("%14s: %s\n", bench->name, bench->summary);
+
+	printf("\n");
+}
+
+static const char *bench_format_str;
+
+/* Output/formatting style, exported to benchmark modules: */
+int bench_format = BENCH_FORMAT_DEFAULT;
+unsigned int bench_repeat = 10; /* default number of times to repeat the run */
+
+static const struct option bench_options[] = {
+	OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"),
+	OPT_UINTEGER('r', "repeat",  &bench_repeat,   "Specify amount of times to repeat the run"),
+	OPT_END()
+};
+
+static const char * const bench_usage[] = {
+	"perf bench [<common options>] <collection> <benchmark> [<options>]",
+	NULL
+};
+
+static void print_usage(void)
+{
+	struct collection *coll;
+	int i;
+
+	printf("Usage: \n");
+	for (i = 0; bench_usage[i]; i++)
+		printf("\t%s\n", bench_usage[i]);
+	printf("\n");
+
+	printf("        # List of all available benchmark collections:\n\n");
+
+	for_each_collection(coll)
+		printf("%14s: %s\n", coll->name, coll->summary);
+	printf("\n");
+}
+
+static int bench_str2int(const char *str)
+{
+	if (!str)
+		return BENCH_FORMAT_DEFAULT;
+
+	if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
+		return BENCH_FORMAT_DEFAULT;
+	else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
+		return BENCH_FORMAT_SIMPLE;
+
+	return BENCH_FORMAT_UNKNOWN;
+}
+
+/*
+ * Run a specific benchmark but first rename the running task's ->comm[]
+ * to something meaningful:
+ */
+static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn,
+		     int argc, const char **argv)
+{
+	int size;
+	char *name;
+	int ret;
+
+	size = strlen(coll_name) + 1 + strlen(bench_name) + 1;
+
+	name = zalloc(size);
+	BUG_ON(!name);
+
+	scnprintf(name, size, "%s-%s", coll_name, bench_name);
+
+	prctl(PR_SET_NAME, name);
+	argv[0] = name;
+
+	ret = fn(argc, argv);
+
+	free(name);
+
+	return ret;
+}
+
+static void run_collection(struct collection *coll)
+{
+	struct bench *bench;
+	const char *argv[2];
+
+	argv[1] = NULL;
+	/*
+	 * TODO:
+	 *
+	 * Preparing preset parameters for
+	 * embedded, ordinary PC, HPC, etc...
+	 * would be helpful.
+	 */
+	for_each_bench(coll, bench) {
+		if (!bench->fn)
+			break;
+		printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
+		fflush(stdout);
+
+		argv[1] = bench->name;
+		run_bench(coll->name, bench->name, bench->fn, 1, argv);
+		printf("\n");
+	}
+}
+
+static void run_all_collections(void)
+{
+	struct collection *coll;
+
+	for_each_collection(coll)
+		run_collection(coll);
+}
+
+int cmd_bench(int argc, const char **argv)
+{
+	struct collection *coll;
+	int ret = 0;
+
+	if (argc < 2) {
+		/* No collection specified. */
+		print_usage();
+		goto end;
+	}
+
+	argc = parse_options(argc, argv, bench_options, bench_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	bench_format = bench_str2int(bench_format_str);
+	if (bench_format == BENCH_FORMAT_UNKNOWN) {
+		printf("Unknown format descriptor: '%s'\n", bench_format_str);
+		goto end;
+	}
+
+	if (bench_repeat == 0) {
+		printf("Invalid repeat option: Must specify a positive value\n");
+		goto end;
+	}
+
+	if (argc < 1) {
+		print_usage();
+		goto end;
+	}
+
+	if (!strcmp(argv[0], "all")) {
+		run_all_collections();
+		goto end;
+	}
+
+	for_each_collection(coll) {
+		struct bench *bench;
+
+		if (strcmp(coll->name, argv[0]))
+			continue;
+
+		if (argc < 2) {
+			/* No bench specified. */
+			dump_benchmarks(coll);
+			goto end;
+		}
+
+		if (!strcmp(argv[1], "all")) {
+			run_collection(coll);
+			goto end;
+		}
+
+		for_each_bench(coll, bench) {
+			if (strcmp(bench->name, argv[1]))
+				continue;
+
+			if (bench_format == BENCH_FORMAT_DEFAULT)
+				printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
+			fflush(stdout);
+			ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
+			goto end;
+		}
+
+		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+			dump_benchmarks(coll);
+			goto end;
+		}
+
+		printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]);
+		ret = 1;
+		goto end;
+	}
+
+	printf("Unknown collection: '%s'\n", argv[0]);
+	ret = 1;
+
+end:
+	return ret;
+}
diff --git a/builtin-buildid-cache.c b/builtin-buildid-cache.c
new file mode 100644
index 0000000..41db2cb
--- /dev/null
+++ b/builtin-buildid-cache.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-buildid-cache.c
+ *
+ * Builtin buildid-cache command: Manages build-id cache
+ *
+ * Copyright (C) 2010, Red Hat Inc.
+ * Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+#include <dirent.h>
+#include <errno.h>
+#include <unistd.h>
+#include "builtin.h"
+#include "perf.h"
+#include "namespaces.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include "util/header.h"
+#include <subcmd/parse-options.h>
+#include "util/strlist.h"
+#include "util/build-id.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/time-utils.h"
+
+static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
+{
+	char root_dir[PATH_MAX];
+	char *p;
+
+	strlcpy(root_dir, proc_dir, sizeof(root_dir));
+
+	p = strrchr(root_dir, '/');
+	if (!p)
+		return -1;
+	*p = '\0';
+	return sysfs__sprintf_build_id(root_dir, sbuildid);
+}
+
+static int build_id_cache__kcore_dir(char *dir, size_t sz)
+{
+	return fetch_current_timestamp(dir, sz);
+}
+
+static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
+{
+	char from[PATH_MAX];
+	char to[PATH_MAX];
+	const char *name;
+	u64 addr1 = 0, addr2 = 0;
+	int i, err = -1;
+
+	scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
+	scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
+
+	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+		err = kallsyms__get_function_start(from, name, &addr1);
+		if (!err)
+			break;
+	}
+
+	if (err)
+		return false;
+
+	if (kallsyms__get_function_start(to, name, &addr2))
+		return false;
+
+	return addr1 == addr2;
+}
+
+static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
+					  size_t to_dir_sz)
+{
+	char from[PATH_MAX];
+	char to[PATH_MAX];
+	char to_subdir[PATH_MAX];
+	struct dirent *dent;
+	int ret = -1;
+	DIR *d;
+
+	d = opendir(to_dir);
+	if (!d)
+		return -1;
+
+	scnprintf(from, sizeof(from), "%s/modules", from_dir);
+
+	while (1) {
+		dent = readdir(d);
+		if (!dent)
+			break;
+		if (dent->d_type != DT_DIR)
+			continue;
+		scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
+			  dent->d_name);
+		scnprintf(to_subdir, sizeof(to_subdir), "%s/%s",
+			  to_dir, dent->d_name);
+		if (!compare_proc_modules(from, to) &&
+		    same_kallsyms_reloc(from_dir, to_subdir)) {
+			strlcpy(to_dir, to_subdir, to_dir_sz);
+			ret = 0;
+			break;
+		}
+	}
+
+	closedir(d);
+
+	return ret;
+}
+
+static int build_id_cache__add_kcore(const char *filename, bool force)
+{
+	char dir[32], sbuildid[SBUILD_ID_SIZE];
+	char from_dir[PATH_MAX], to_dir[PATH_MAX];
+	char *p;
+
+	strlcpy(from_dir, filename, sizeof(from_dir));
+
+	p = strrchr(from_dir, '/');
+	if (!p || strcmp(p + 1, "kcore"))
+		return -1;
+	*p = '\0';
+
+	if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0)
+		return -1;
+
+	scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s",
+		  buildid_dir, DSO__NAME_KCORE, sbuildid);
+
+	if (!force &&
+	    !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
+		pr_debug("same kcore found in %s\n", to_dir);
+		return 0;
+	}
+
+	if (build_id_cache__kcore_dir(dir, sizeof(dir)))
+		return -1;
+
+	scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s/%s",
+		  buildid_dir, DSO__NAME_KCORE, sbuildid, dir);
+
+	if (mkdir_p(to_dir, 0755))
+		return -1;
+
+	if (kcore_copy(from_dir, to_dir)) {
+		/* Remove YYYYmmddHHMMSShh directory */
+		if (!rmdir(to_dir)) {
+			p = strrchr(to_dir, '/');
+			if (p)
+				*p = '\0';
+			/* Try to remove buildid directory */
+			if (!rmdir(to_dir)) {
+				p = strrchr(to_dir, '/');
+				if (p)
+					*p = '\0';
+				/* Try to remove [kernel.kcore] directory */
+				rmdir(to_dir);
+			}
+		}
+		return -1;
+	}
+
+	pr_debug("kcore added to build-id cache directory %s\n", to_dir);
+
+	return 0;
+}
+
+static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+	u8 build_id[BUILD_ID_SIZE];
+	int err;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+	nsinfo__mountns_exit(&nsc);
+	if (err < 0) {
+		pr_debug("Couldn't read a build-id in %s\n", filename);
+		return -1;
+	}
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	err = build_id_cache__add_s(sbuild_id, filename, nsi,
+				    false, false);
+	pr_debug("Adding %s %s: %s\n", sbuild_id, filename,
+		 err ? "FAIL" : "Ok");
+	return err;
+}
+
+static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi)
+{
+	u8 build_id[BUILD_ID_SIZE];
+	char sbuild_id[SBUILD_ID_SIZE];
+	struct nscookie nsc;
+
+	int err;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+	nsinfo__mountns_exit(&nsc);
+	if (err < 0) {
+		pr_debug("Couldn't read a build-id in %s\n", filename);
+		return -1;
+	}
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	err = build_id_cache__remove_s(sbuild_id);
+	pr_debug("Removing %s %s: %s\n", sbuild_id, filename,
+		 err ? "FAIL" : "Ok");
+
+	return err;
+}
+
+static int build_id_cache__purge_path(const char *pathname, struct nsinfo *nsi)
+{
+	struct strlist *list;
+	struct str_node *pos;
+	int err;
+
+	err = build_id_cache__list_build_ids(pathname, nsi, &list);
+	if (err)
+		goto out;
+
+	strlist__for_each_entry(pos, list) {
+		err = build_id_cache__remove_s(pos->s);
+		pr_debug("Removing %s %s: %s\n", pos->s, pathname,
+			 err ? "FAIL" : "Ok");
+		if (err)
+			break;
+	}
+	strlist__delete(list);
+
+out:
+	pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok");
+
+	return err;
+}
+
+static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
+{
+	char filename[PATH_MAX];
+	u8 build_id[BUILD_ID_SIZE];
+
+	if (dso__build_id_filename(dso, filename, sizeof(filename), false) &&
+	    filename__read_build_id(filename, build_id,
+				    sizeof(build_id)) != sizeof(build_id)) {
+		if (errno == ENOENT)
+			return false;
+
+		pr_warning("Problems with %s file, consider removing it from the cache\n",
+			   filename);
+	} else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
+		pr_warning("Problems with %s file, consider removing it from the cache\n",
+			   filename);
+	}
+
+	return true;
+}
+
+static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *fp)
+{
+	perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
+	return 0;
+}
+
+static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
+{
+	u8 build_id[BUILD_ID_SIZE];
+	char sbuild_id[SBUILD_ID_SIZE];
+	struct nscookie nsc;
+
+	int err;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+	nsinfo__mountns_exit(&nsc);
+	if (err < 0) {
+		pr_debug("Couldn't read a build-id in %s\n", filename);
+		return -1;
+	}
+	err = 0;
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	if (build_id_cache__cached(sbuild_id))
+		err = build_id_cache__remove_s(sbuild_id);
+
+	if (!err)
+		err = build_id_cache__add_s(sbuild_id, filename, nsi, false,
+					    false);
+
+	pr_debug("Updating %s %s: %s\n", sbuild_id, filename,
+		 err ? "FAIL" : "Ok");
+
+	return err;
+}
+
+int cmd_buildid_cache(int argc, const char **argv)
+{
+	struct strlist *list;
+	struct str_node *pos;
+	int ret = 0;
+	int ns_id = -1;
+	bool force = false;
+	char const *add_name_list_str = NULL,
+		   *remove_name_list_str = NULL,
+		   *purge_name_list_str = NULL,
+		   *missing_filename = NULL,
+		   *update_name_list_str = NULL,
+		   *kcore_filename = NULL;
+	char sbuf[STRERR_BUFSIZE];
+
+	struct perf_data data = {
+		.mode  = PERF_DATA_MODE_READ,
+	};
+	struct perf_session *session = NULL;
+	struct nsinfo *nsi = NULL;
+
+	const struct option buildid_cache_options[] = {
+	OPT_STRING('a', "add", &add_name_list_str,
+		   "file list", "file(s) to add"),
+	OPT_STRING('k', "kcore", &kcore_filename,
+		   "file", "kcore file to add"),
+	OPT_STRING('r', "remove", &remove_name_list_str, "file list",
+		    "file(s) to remove"),
+	OPT_STRING('p', "purge", &purge_name_list_str, "file list",
+		    "file(s) to remove (remove old caches too)"),
+	OPT_STRING('M', "missing", &missing_filename, "file",
+		   "to find missing build ids in the cache"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_STRING('u', "update", &update_name_list_str, "file list",
+		    "file(s) to update"),
+	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+	OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
+	OPT_END()
+	};
+	const char * const buildid_cache_usage[] = {
+		"perf buildid-cache [<options>]",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, buildid_cache_options,
+			     buildid_cache_usage, 0);
+
+	if (argc || (!add_name_list_str && !kcore_filename &&
+		     !remove_name_list_str && !purge_name_list_str &&
+		     !missing_filename && !update_name_list_str))
+		usage_with_options(buildid_cache_usage, buildid_cache_options);
+
+	if (ns_id > 0)
+		nsi = nsinfo__new(ns_id);
+
+	if (missing_filename) {
+		data.file.path = missing_filename;
+		data.force     = force;
+
+		session = perf_session__new(&data, false, NULL);
+		if (session == NULL)
+			return -1;
+	}
+
+	if (symbol__init(session ? &session->header.env : NULL) < 0)
+		goto out;
+
+	setup_pager();
+
+	if (add_name_list_str) {
+		list = strlist__new(add_name_list_str, NULL);
+		if (list) {
+			strlist__for_each_entry(pos, list)
+				if (build_id_cache__add_file(pos->s, nsi)) {
+					if (errno == EEXIST) {
+						pr_debug("%s already in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't add %s: %s\n",
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	if (remove_name_list_str) {
+		list = strlist__new(remove_name_list_str, NULL);
+		if (list) {
+			strlist__for_each_entry(pos, list)
+				if (build_id_cache__remove_file(pos->s, nsi)) {
+					if (errno == ENOENT) {
+						pr_debug("%s wasn't in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't remove %s: %s\n",
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	if (purge_name_list_str) {
+		list = strlist__new(purge_name_list_str, NULL);
+		if (list) {
+			strlist__for_each_entry(pos, list)
+				if (build_id_cache__purge_path(pos->s, nsi)) {
+					if (errno == ENOENT) {
+						pr_debug("%s wasn't in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't remove %s: %s\n",
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	if (missing_filename)
+		ret = build_id_cache__fprintf_missing(session, stdout);
+
+	if (update_name_list_str) {
+		list = strlist__new(update_name_list_str, NULL);
+		if (list) {
+			strlist__for_each_entry(pos, list)
+				if (build_id_cache__update_file(pos->s, nsi)) {
+					if (errno == ENOENT) {
+						pr_debug("%s wasn't in the cache\n",
+							 pos->s);
+						continue;
+					}
+					pr_warning("Couldn't update %s: %s\n",
+						   pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+				}
+
+			strlist__delete(list);
+		}
+	}
+
+	if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force))
+		pr_warning("Couldn't add %s\n", kcore_filename);
+
+out:
+	perf_session__delete(session);
+	nsinfo__zput(nsi);
+
+	return ret;
+}
diff --git a/builtin-buildid-list.c b/builtin-buildid-list.c
new file mode 100644
index 0000000..78abbe8
--- /dev/null
+++ b/builtin-buildid-list.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-buildid-list.c
+ *
+ * Builtin buildid-list command: list buildids in perf.data, in the running
+ * kernel and in ELF files.
+ *
+ * Copyright (C) 2009, Red Hat Inc.
+ * Copyright (C) 2009, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+#include "perf.h"
+#include "util/build-id.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/data.h"
+#include <errno.h>
+
+static int sysfs__fprintf_build_id(FILE *fp)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+	int ret;
+
+	ret = sysfs__sprintf_build_id("/", sbuild_id);
+	if (ret != sizeof(sbuild_id))
+		return ret < 0 ? ret : -EINVAL;
+
+	return fprintf(fp, "%s\n", sbuild_id);
+}
+
+static int filename__fprintf_build_id(const char *name, FILE *fp)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+	int ret;
+
+	ret = filename__sprintf_build_id(name, sbuild_id);
+	if (ret != sizeof(sbuild_id))
+		return ret < 0 ? ret : -EINVAL;
+
+	return fprintf(fp, "%s\n", sbuild_id);
+}
+
+static bool dso__skip_buildid(struct dso *dso, int with_hits)
+{
+	return with_hits && !dso->hit;
+}
+
+static int perf_session__list_build_ids(bool force, bool with_hits)
+{
+	struct perf_session *session;
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = force,
+	};
+
+	symbol__elf_init();
+	/*
+	 * See if this is an ELF file first:
+	 */
+	if (filename__fprintf_build_id(input_name, stdout) > 0)
+		goto out;
+
+	session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops);
+	if (session == NULL)
+		return -1;
+
+	/*
+	 * We take all buildids when the file contains AUX area tracing data
+	 * because we do not decode the trace because it would take too long.
+	 */
+	if (!perf_data__is_pipe(&data) &&
+	    perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+		with_hits = false;
+
+	/*
+	 * in pipe-mode, the only way to get the buildids is to parse
+	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
+	 */
+	if (with_hits || perf_data__is_pipe(&data))
+		perf_session__process_events(session);
+
+	perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
+	perf_session__delete(session);
+out:
+	return 0;
+}
+
+int cmd_buildid_list(int argc, const char **argv)
+{
+	bool show_kernel = false;
+	bool with_hits = false;
+	bool force = false;
+	const struct option options[] = {
+	OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_BOOLEAN('k', "kernel", &show_kernel, "Show current kernel build id"),
+	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+	OPT_END()
+	};
+	const char * const buildid_list_usage[] = {
+		"perf buildid-list [<options>]",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, options, buildid_list_usage, 0);
+	setup_pager();
+
+	if (show_kernel)
+		return !(sysfs__fprintf_build_id(stdout) > 0);
+
+	return perf_session__list_build_ids(force, with_hits);
+}
diff --git a/builtin-c2c.c b/builtin-c2c.c
new file mode 100644
index 0000000..2126bfb
--- /dev/null
+++ b/builtin-c2c.c
@@ -0,0 +1,2972 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is rewrite of original c2c tool introduced in here:
+ *   http://lwn.net/Articles/588866/
+ *
+ * The original tool was changed to fit in current perf state.
+ *
+ * Original authors:
+ *   Don Zickus <dzickus@redhat.com>
+ *   Dick Fowles <fowles@inreach.com>
+ *   Joe Mario <jmario@redhat.com>
+ */
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+#include <asm/bug.h>
+#include <sys/param.h>
+#include "util.h"
+#include "debug.h"
+#include "builtin.h"
+#include <subcmd/parse-options.h>
+#include "mem-events.h"
+#include "session.h"
+#include "hist.h"
+#include "sort.h"
+#include "tool.h"
+#include "data.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "ui/browsers/hists.h"
+#include "thread.h"
+#include "mem2node.h"
+
+struct c2c_hists {
+	struct hists		hists;
+	struct perf_hpp_list	list;
+	struct c2c_stats	stats;
+};
+
+struct compute_stats {
+	struct stats		 lcl_hitm;
+	struct stats		 rmt_hitm;
+	struct stats		 load;
+};
+
+struct c2c_hist_entry {
+	struct c2c_hists	*hists;
+	struct c2c_stats	 stats;
+	unsigned long		*cpuset;
+	unsigned long		*nodeset;
+	struct c2c_stats	*node_stats;
+	unsigned int		 cacheline_idx;
+
+	struct compute_stats	 cstats;
+
+	/*
+	 * must be at the end,
+	 * because of its callchain dynamic entry
+	 */
+	struct hist_entry	he;
+
+	unsigned long		 paddr;
+	unsigned long		 paddr_cnt;
+	bool			 paddr_zero;
+	char			*nodestr;
+};
+
+static char const *coalesce_default = "pid,iaddr";
+
+struct perf_c2c {
+	struct perf_tool	tool;
+	struct c2c_hists	hists;
+	struct mem2node		mem2node;
+
+	unsigned long		**nodes;
+	int			 nodes_cnt;
+	int			 cpus_cnt;
+	int			*cpu2node;
+	int			 node_info;
+
+	bool			 show_src;
+	bool			 show_all;
+	bool			 use_stdio;
+	bool			 stats_only;
+	bool			 symbol_full;
+
+	/* HITM shared clines stats */
+	struct c2c_stats	hitm_stats;
+	int			shared_clines;
+
+	int			 display;
+
+	const char		*coalesce;
+	char			*cl_sort;
+	char			*cl_resort;
+	char			*cl_output;
+};
+
+enum {
+	DISPLAY_LCL,
+	DISPLAY_RMT,
+	DISPLAY_TOT,
+	DISPLAY_MAX,
+};
+
+static const char *display_str[DISPLAY_MAX] = {
+	[DISPLAY_LCL] = "Local",
+	[DISPLAY_RMT] = "Remote",
+	[DISPLAY_TOT] = "Total",
+};
+
+static const struct option c2c_options[] = {
+	OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+};
+
+static struct perf_c2c c2c;
+
+static void *c2c_he_zalloc(size_t size)
+{
+	struct c2c_hist_entry *c2c_he;
+
+	c2c_he = zalloc(size + sizeof(*c2c_he));
+	if (!c2c_he)
+		return NULL;
+
+	c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+	if (!c2c_he->cpuset)
+		return NULL;
+
+	c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt);
+	if (!c2c_he->nodeset)
+		return NULL;
+
+	c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
+	if (!c2c_he->node_stats)
+		return NULL;
+
+	init_stats(&c2c_he->cstats.lcl_hitm);
+	init_stats(&c2c_he->cstats.rmt_hitm);
+	init_stats(&c2c_he->cstats.load);
+
+	return &c2c_he->he;
+}
+
+static void c2c_he_free(void *he)
+{
+	struct c2c_hist_entry *c2c_he;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	if (c2c_he->hists) {
+		hists__delete_entries(&c2c_he->hists->hists);
+		free(c2c_he->hists);
+	}
+
+	free(c2c_he->cpuset);
+	free(c2c_he->nodeset);
+	free(c2c_he->nodestr);
+	free(c2c_he->node_stats);
+	free(c2c_he);
+}
+
+static struct hist_entry_ops c2c_entry_ops = {
+	.new	= c2c_he_zalloc,
+	.free	= c2c_he_free,
+};
+
+static int c2c_hists__init(struct c2c_hists *hists,
+			   const char *sort,
+			   int nr_header_lines);
+
+static struct c2c_hists*
+he__get_c2c_hists(struct hist_entry *he,
+		  const char *sort,
+		  int nr_header_lines)
+{
+	struct c2c_hist_entry *c2c_he;
+	struct c2c_hists *hists;
+	int ret;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	if (c2c_he->hists)
+		return c2c_he->hists;
+
+	hists = c2c_he->hists = zalloc(sizeof(*hists));
+	if (!hists)
+		return NULL;
+
+	ret = c2c_hists__init(hists, sort, nr_header_lines);
+	if (ret) {
+		free(hists);
+		return NULL;
+	}
+
+	return hists;
+}
+
+static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
+			    struct perf_sample *sample)
+{
+	if (WARN_ONCE(sample->cpu == (unsigned int) -1,
+		      "WARNING: no sample cpu value"))
+		return;
+
+	set_bit(sample->cpu, c2c_he->cpuset);
+}
+
+static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
+			     struct perf_sample *sample)
+{
+	int node;
+
+	if (!sample->phys_addr) {
+		c2c_he->paddr_zero = true;
+		return;
+	}
+
+	node = mem2node__node(&c2c.mem2node, sample->phys_addr);
+	if (WARN_ONCE(node < 0, "WARNING: failed to find node\n"))
+		return;
+
+	set_bit(node, c2c_he->nodeset);
+
+	if (c2c_he->paddr != sample->phys_addr) {
+		c2c_he->paddr_cnt++;
+		c2c_he->paddr = sample->phys_addr;
+	}
+}
+
+static void compute_stats(struct c2c_hist_entry *c2c_he,
+			  struct c2c_stats *stats,
+			  u64 weight)
+{
+	struct compute_stats *cstats = &c2c_he->cstats;
+
+	if (stats->rmt_hitm)
+		update_stats(&cstats->rmt_hitm, weight);
+	else if (stats->lcl_hitm)
+		update_stats(&cstats->lcl_hitm, weight);
+	else if (stats->load)
+		update_stats(&cstats->load, weight);
+}
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct c2c_hists *c2c_hists = &c2c.hists;
+	struct c2c_hist_entry *c2c_he;
+	struct c2c_stats stats = { .nr_entries = 0, };
+	struct hist_entry *he;
+	struct addr_location al;
+	struct mem_info *mi, *mi_dup;
+	int ret;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
+	ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+					evsel, &al, sysctl_perf_event_max_stack);
+	if (ret)
+		goto out;
+
+	mi = sample__resolve_mem(sample, &al);
+	if (mi == NULL)
+		return -ENOMEM;
+
+	/*
+	 * The mi object is released in hists__add_entry_ops,
+	 * if it gets sorted out into existing data, so we need
+	 * to take the copy now.
+	 */
+	mi_dup = mem_info__get(mi);
+
+	c2c_decode_stats(&stats, mi);
+
+	he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
+				  &al, NULL, NULL, mi,
+				  sample, true);
+	if (he == NULL)
+		goto free_mi;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	c2c_add_stats(&c2c_he->stats, &stats);
+	c2c_add_stats(&c2c_hists->stats, &stats);
+
+	c2c_he__set_cpu(c2c_he, sample);
+	c2c_he__set_node(c2c_he, sample);
+
+	hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
+	ret = hist_entry__append_callchain(he, sample);
+
+	if (!ret) {
+		/*
+		 * There's already been warning about missing
+		 * sample's cpu value. Let's account all to
+		 * node 0 in this case, without any further
+		 * warning.
+		 *
+		 * Doing node stats only for single callchain data.
+		 */
+		int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
+		int node = c2c.cpu2node[cpu];
+
+		mi = mi_dup;
+
+		c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
+		if (!c2c_hists)
+			goto free_mi;
+
+		he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
+					  &al, NULL, NULL, mi,
+					  sample, true);
+		if (he == NULL)
+			goto free_mi;
+
+		c2c_he = container_of(he, struct c2c_hist_entry, he);
+		c2c_add_stats(&c2c_he->stats, &stats);
+		c2c_add_stats(&c2c_hists->stats, &stats);
+		c2c_add_stats(&c2c_he->node_stats[node], &stats);
+
+		compute_stats(c2c_he, &stats, sample->weight);
+
+		c2c_he__set_cpu(c2c_he, sample);
+		c2c_he__set_node(c2c_he, sample);
+
+		hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
+		ret = hist_entry__append_callchain(he, sample);
+	}
+
+out:
+	addr_location__put(&al);
+	return ret;
+
+free_mi:
+	mem_info__put(mi_dup);
+	mem_info__put(mi);
+	ret = -ENOMEM;
+	goto out;
+}
+
+static struct perf_c2c c2c = {
+	.tool = {
+		.sample		= process_sample_event,
+		.mmap		= perf_event__process_mmap,
+		.mmap2		= perf_event__process_mmap2,
+		.comm		= perf_event__process_comm,
+		.exit		= perf_event__process_exit,
+		.fork		= perf_event__process_fork,
+		.lost		= perf_event__process_lost,
+		.ordered_events	= true,
+		.ordering_requires_timestamps = true,
+	},
+};
+
+static const char * const c2c_usage[] = {
+	"perf c2c {record|report}",
+	NULL
+};
+
+static const char * const __usage_report[] = {
+	"perf c2c report",
+	NULL
+};
+
+static const char * const *report_c2c_usage = __usage_report;
+
+#define C2C_HEADER_MAX 2
+
+struct c2c_header {
+	struct {
+		const char *text;
+		int	    span;
+	} line[C2C_HEADER_MAX];
+};
+
+struct c2c_dimension {
+	struct c2c_header	 header;
+	const char		*name;
+	int			 width;
+	struct sort_entry	*se;
+
+	int64_t (*cmp)(struct perf_hpp_fmt *fmt,
+		       struct hist_entry *, struct hist_entry *);
+	int   (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hist_entry *he);
+	int   (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hist_entry *he);
+};
+
+struct c2c_fmt {
+	struct perf_hpp_fmt	 fmt;
+	struct c2c_dimension	*dim;
+};
+
+#define SYMBOL_WIDTH 30
+
+static struct c2c_dimension dim_symbol;
+static struct c2c_dimension dim_srcline;
+
+static int symbol_width(struct hists *hists, struct sort_entry *se)
+{
+	int width = hists__col_len(hists, se->se_width_idx);
+
+	if (!c2c.symbol_full)
+		width = MIN(width, SYMBOL_WIDTH);
+
+	return width;
+}
+
+static int c2c_width(struct perf_hpp_fmt *fmt,
+		     struct perf_hpp *hpp __maybe_unused,
+		     struct hists *hists)
+{
+	struct c2c_fmt *c2c_fmt;
+	struct c2c_dimension *dim;
+
+	c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+	dim = c2c_fmt->dim;
+
+	if (dim == &dim_symbol || dim == &dim_srcline)
+		return symbol_width(hists, dim->se);
+
+	return dim->se ? hists__col_len(hists, dim->se->se_width_idx) :
+			 c2c_fmt->dim->width;
+}
+
+static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hists *hists, int line, int *span)
+{
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	struct c2c_fmt *c2c_fmt;
+	struct c2c_dimension *dim;
+	const char *text = NULL;
+	int width = c2c_width(fmt, hpp, hists);
+
+	c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+	dim = c2c_fmt->dim;
+
+	if (dim->se) {
+		text = dim->header.line[line].text;
+		/* Use the last line from sort_entry if not defined. */
+		if (!text && (line == hpp_list->nr_header_lines - 1))
+			text = dim->se->se_header;
+	} else {
+		text = dim->header.line[line].text;
+
+		if (*span) {
+			(*span)--;
+			return 0;
+		} else {
+			*span = dim->header.line[line].span;
+		}
+	}
+
+	if (text == NULL)
+		text = "";
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
+}
+
+#define HEX_STR(__s, __v)				\
+({							\
+	scnprintf(__s, sizeof(__s), "0x%" PRIx64, __v);	\
+	__s;						\
+})
+
+static int64_t
+dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	       struct hist_entry *left, struct hist_entry *right)
+{
+	return sort__dcacheline_cmp(left, right);
+}
+
+static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			    struct hist_entry *he)
+{
+	uint64_t addr = 0;
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[20];
+
+	if (he->mem_info)
+		addr = cl_address(he->mem_info->daddr.addr);
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
+static int
+dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	if (WARN_ON_ONCE(!c2c_he->nodestr))
+		return 0;
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr);
+}
+
+static int
+dcacheline_node_count(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	return scnprintf(hpp->buf, hpp->size, "%*lu", width, c2c_he->paddr_cnt);
+}
+
+static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			struct hist_entry *he)
+{
+	uint64_t addr = 0;
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[20];
+
+	if (he->mem_info)
+		addr = cl_offset(he->mem_info->daddr.al_addr);
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
+static int64_t
+offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	   struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = cl_offset(left->mem_info->daddr.addr);
+	if (right->mem_info)
+		r = cl_offset(right->mem_info->daddr.addr);
+
+	return (int64_t)(r - l);
+}
+
+static int
+iaddr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	    struct hist_entry *he)
+{
+	uint64_t addr = 0;
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[20];
+
+	if (he->mem_info)
+		addr = he->mem_info->iaddr.addr;
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
+static int64_t
+iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	  struct hist_entry *left, struct hist_entry *right)
+{
+	return sort__iaddr_cmp(left, right);
+}
+
+static int
+tot_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	       struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	unsigned int tot_hitm;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	tot_hitm = c2c_he->stats.lcl_hitm + c2c_he->stats.rmt_hitm;
+
+	return scnprintf(hpp->buf, hpp->size, "%*u", width, tot_hitm);
+}
+
+static int64_t
+tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	     struct hist_entry *left, struct hist_entry *right)
+{
+	struct c2c_hist_entry *c2c_left;
+	struct c2c_hist_entry *c2c_right;
+	unsigned int tot_hitm_left;
+	unsigned int tot_hitm_right;
+
+	c2c_left  = container_of(left, struct c2c_hist_entry, he);
+	c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+	tot_hitm_left  = c2c_left->stats.lcl_hitm + c2c_left->stats.rmt_hitm;
+	tot_hitm_right = c2c_right->stats.lcl_hitm + c2c_right->stats.rmt_hitm;
+
+	return tot_hitm_left - tot_hitm_right;
+}
+
+#define STAT_FN_ENTRY(__f)					\
+static int							\
+__f ## _entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,	\
+	      struct hist_entry *he)				\
+{								\
+	struct c2c_hist_entry *c2c_he;				\
+	int width = c2c_width(fmt, hpp, he->hists);		\
+								\
+	c2c_he = container_of(he, struct c2c_hist_entry, he);	\
+	return scnprintf(hpp->buf, hpp->size, "%*u", width,	\
+			 c2c_he->stats.__f);			\
+}
+
+#define STAT_FN_CMP(__f)						\
+static int64_t								\
+__f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused,			\
+	    struct hist_entry *left, struct hist_entry *right)		\
+{									\
+	struct c2c_hist_entry *c2c_left, *c2c_right;			\
+									\
+	c2c_left  = container_of(left, struct c2c_hist_entry, he);	\
+	c2c_right = container_of(right, struct c2c_hist_entry, he);	\
+	return c2c_left->stats.__f - c2c_right->stats.__f;		\
+}
+
+#define STAT_FN(__f)		\
+	STAT_FN_ENTRY(__f)	\
+	STAT_FN_CMP(__f)
+
+STAT_FN(rmt_hitm)
+STAT_FN(lcl_hitm)
+STAT_FN(store)
+STAT_FN(st_l1hit)
+STAT_FN(st_l1miss)
+STAT_FN(ld_fbhit)
+STAT_FN(ld_l1hit)
+STAT_FN(ld_l2hit)
+STAT_FN(ld_llchit)
+STAT_FN(rmt_hit)
+
+static uint64_t llc_miss(struct c2c_stats *stats)
+{
+	uint64_t llcmiss;
+
+	llcmiss = stats->lcl_dram +
+		  stats->rmt_dram +
+		  stats->rmt_hitm +
+		  stats->rmt_hit;
+
+	return llcmiss;
+}
+
+static int
+ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		 struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+	return scnprintf(hpp->buf, hpp->size, "%*lu", width,
+			 llc_miss(&c2c_he->stats));
+}
+
+static int64_t
+ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	       struct hist_entry *left, struct hist_entry *right)
+{
+	struct c2c_hist_entry *c2c_left;
+	struct c2c_hist_entry *c2c_right;
+
+	c2c_left  = container_of(left, struct c2c_hist_entry, he);
+	c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+	return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats);
+}
+
+static uint64_t total_records(struct c2c_stats *stats)
+{
+	uint64_t lclmiss, ldcnt, total;
+
+	lclmiss  = stats->lcl_dram +
+		   stats->rmt_dram +
+		   stats->rmt_hitm +
+		   stats->rmt_hit;
+
+	ldcnt    = lclmiss +
+		   stats->ld_fbhit +
+		   stats->ld_l1hit +
+		   stats->ld_l2hit +
+		   stats->ld_llchit +
+		   stats->lcl_hitm;
+
+	total    = ldcnt +
+		   stats->st_l1hit +
+		   stats->st_l1miss;
+
+	return total;
+}
+
+static int
+tot_recs_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	uint64_t tot_recs;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	tot_recs = total_records(&c2c_he->stats);
+
+	return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
+}
+
+static int64_t
+tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	     struct hist_entry *left, struct hist_entry *right)
+{
+	struct c2c_hist_entry *c2c_left;
+	struct c2c_hist_entry *c2c_right;
+	uint64_t tot_recs_left;
+	uint64_t tot_recs_right;
+
+	c2c_left  = container_of(left, struct c2c_hist_entry, he);
+	c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+	tot_recs_left  = total_records(&c2c_left->stats);
+	tot_recs_right = total_records(&c2c_right->stats);
+
+	return tot_recs_left - tot_recs_right;
+}
+
+static uint64_t total_loads(struct c2c_stats *stats)
+{
+	uint64_t lclmiss, ldcnt;
+
+	lclmiss  = stats->lcl_dram +
+		   stats->rmt_dram +
+		   stats->rmt_hitm +
+		   stats->rmt_hit;
+
+	ldcnt    = lclmiss +
+		   stats->ld_fbhit +
+		   stats->ld_l1hit +
+		   stats->ld_l2hit +
+		   stats->ld_llchit +
+		   stats->lcl_hitm;
+
+	return ldcnt;
+}
+
+static int
+tot_loads_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	uint64_t tot_recs;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	tot_recs = total_loads(&c2c_he->stats);
+
+	return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
+}
+
+static int64_t
+tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	      struct hist_entry *left, struct hist_entry *right)
+{
+	struct c2c_hist_entry *c2c_left;
+	struct c2c_hist_entry *c2c_right;
+	uint64_t tot_recs_left;
+	uint64_t tot_recs_right;
+
+	c2c_left  = container_of(left, struct c2c_hist_entry, he);
+	c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+	tot_recs_left  = total_loads(&c2c_left->stats);
+	tot_recs_right = total_loads(&c2c_right->stats);
+
+	return tot_recs_left - tot_recs_right;
+}
+
+typedef double (get_percent_cb)(struct c2c_hist_entry *);
+
+static int
+percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	      struct hist_entry *he, get_percent_cb get_percent)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	double per;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	per = get_percent(c2c_he);
+
+#ifdef HAVE_SLANG_SUPPORT
+	if (use_browser)
+		return __hpp__slsmg_color_printf(hpp, "%*.2f%%", width - 1, per);
+#endif
+	return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
+}
+
+static double percent_hitm(struct c2c_hist_entry *c2c_he)
+{
+	struct c2c_hists *hists;
+	struct c2c_stats *stats;
+	struct c2c_stats *total;
+	int tot = 0, st = 0;
+	double p;
+
+	hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+	stats = &c2c_he->stats;
+	total = &hists->stats;
+
+	switch (c2c.display) {
+	case DISPLAY_RMT:
+		st  = stats->rmt_hitm;
+		tot = total->rmt_hitm;
+		break;
+	case DISPLAY_LCL:
+		st  = stats->lcl_hitm;
+		tot = total->lcl_hitm;
+		break;
+	case DISPLAY_TOT:
+		st  = stats->tot_hitm;
+		tot = total->tot_hitm;
+	default:
+		break;
+	}
+
+	p = tot ? (double) st / tot : 0;
+
+	return 100 * p;
+}
+
+#define PERC_STR(__s, __v)				\
+({							\
+	scnprintf(__s, sizeof(__s), "%.2F%%", __v);	\
+	__s;						\
+})
+
+static int
+percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		   struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[10];
+	double per;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	per = percent_hitm(c2c_he);
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		   struct hist_entry *he)
+{
+	return percent_color(fmt, hpp, he, percent_hitm);
+}
+
+static int64_t
+percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+		 struct hist_entry *left, struct hist_entry *right)
+{
+	struct c2c_hist_entry *c2c_left;
+	struct c2c_hist_entry *c2c_right;
+	double per_left;
+	double per_right;
+
+	c2c_left  = container_of(left, struct c2c_hist_entry, he);
+	c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+	per_left  = percent_hitm(c2c_left);
+	per_right = percent_hitm(c2c_right);
+
+	return per_left - per_right;
+}
+
+static struct c2c_stats *he_stats(struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	return &c2c_he->stats;
+}
+
+static struct c2c_stats *total_stats(struct hist_entry *he)
+{
+	struct c2c_hists *hists;
+
+	hists = container_of(he->hists, struct c2c_hists, hists);
+	return &hists->stats;
+}
+
+static double percent(int st, int tot)
+{
+	return tot ? 100. * (double) st / (double) tot : 0;
+}
+
+#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f)
+
+#define PERCENT_FN(__f)								\
+static double percent_ ## __f(struct c2c_hist_entry *c2c_he)			\
+{										\
+	struct c2c_hists *hists;						\
+										\
+	hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);	\
+	return percent(c2c_he->stats.__f, hists->stats.__f);			\
+}
+
+PERCENT_FN(rmt_hitm)
+PERCENT_FN(lcl_hitm)
+PERCENT_FN(st_l1hit)
+PERCENT_FN(st_l1miss)
+
+static int
+percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hist_entry *he)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+	double per = PERCENT(he, rmt_hitm);
+	char buf[10];
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hist_entry *he)
+{
+	return percent_color(fmt, hpp, he, percent_rmt_hitm);
+}
+
+static int64_t
+percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+		     struct hist_entry *left, struct hist_entry *right)
+{
+	double per_left;
+	double per_right;
+
+	per_left  = PERCENT(left, lcl_hitm);
+	per_right = PERCENT(right, lcl_hitm);
+
+	return per_left - per_right;
+}
+
+static int
+percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hist_entry *he)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+	double per = PERCENT(he, lcl_hitm);
+	char buf[10];
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hist_entry *he)
+{
+	return percent_color(fmt, hpp, he, percent_lcl_hitm);
+}
+
+static int64_t
+percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+		     struct hist_entry *left, struct hist_entry *right)
+{
+	double per_left;
+	double per_right;
+
+	per_left  = PERCENT(left, lcl_hitm);
+	per_right = PERCENT(right, lcl_hitm);
+
+	return per_left - per_right;
+}
+
+static int
+percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			   struct hist_entry *he)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+	double per = PERCENT(he, st_l1hit);
+	char buf[10];
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			   struct hist_entry *he)
+{
+	return percent_color(fmt, hpp, he, percent_st_l1hit);
+}
+
+static int64_t
+percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+			struct hist_entry *left, struct hist_entry *right)
+{
+	double per_left;
+	double per_right;
+
+	per_left  = PERCENT(left, st_l1hit);
+	per_right = PERCENT(right, st_l1hit);
+
+	return per_left - per_right;
+}
+
+static int
+percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			   struct hist_entry *he)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+	double per = PERCENT(he, st_l1miss);
+	char buf[10];
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_stores_l1miss_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			    struct hist_entry *he)
+{
+	return percent_color(fmt, hpp, he, percent_st_l1miss);
+}
+
+static int64_t
+percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+			  struct hist_entry *left, struct hist_entry *right)
+{
+	double per_left;
+	double per_right;
+
+	per_left  = PERCENT(left, st_l1miss);
+	per_right = PERCENT(right, st_l1miss);
+
+	return per_left - per_right;
+}
+
+STAT_FN(lcl_dram)
+STAT_FN(rmt_dram)
+
+static int
+pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	  struct hist_entry *he)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	return scnprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_);
+}
+
+static int64_t
+pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	struct hist_entry *left, struct hist_entry *right)
+{
+	return left->thread->pid_ - right->thread->pid_;
+}
+
+static int64_t
+empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+	  struct hist_entry *left __maybe_unused,
+	  struct hist_entry *right __maybe_unused)
+{
+	return 0;
+}
+
+static int
+node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+	   struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	bool first = true;
+	int node;
+	int ret = 0;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+	for (node = 0; node < c2c.nodes_cnt; node++) {
+		DECLARE_BITMAP(set, c2c.cpus_cnt);
+
+		bitmap_zero(set, c2c.cpus_cnt);
+		bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
+
+		if (!bitmap_weight(set, c2c.cpus_cnt)) {
+			if (c2c.node_info == 1) {
+				ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
+				advance_hpp(hpp, ret);
+			}
+			continue;
+		}
+
+		if (!first) {
+			ret = scnprintf(hpp->buf, hpp->size, " ");
+			advance_hpp(hpp, ret);
+		}
+
+		switch (c2c.node_info) {
+		case 0:
+			ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
+			advance_hpp(hpp, ret);
+			break;
+		case 1:
+		{
+			int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
+			struct c2c_stats *stats = &c2c_he->node_stats[node];
+
+			ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
+			advance_hpp(hpp, ret);
+
+		#define DISPLAY_HITM(__h)						\
+			if (c2c_he->stats.__h> 0) {					\
+				ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ",	\
+						percent(stats->__h, c2c_he->stats.__h));\
+			} else {							\
+				ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a");	\
+			}
+
+			switch (c2c.display) {
+			case DISPLAY_RMT:
+				DISPLAY_HITM(rmt_hitm);
+				break;
+			case DISPLAY_LCL:
+				DISPLAY_HITM(lcl_hitm);
+				break;
+			case DISPLAY_TOT:
+				DISPLAY_HITM(tot_hitm);
+			default:
+				break;
+			}
+
+		#undef DISPLAY_HITM
+
+			advance_hpp(hpp, ret);
+
+			if (c2c_he->stats.store > 0) {
+				ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
+						percent(stats->store, c2c_he->stats.store));
+			} else {
+				ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
+			}
+
+			advance_hpp(hpp, ret);
+			break;
+		}
+		case 2:
+			ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
+			advance_hpp(hpp, ret);
+
+			ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
+			advance_hpp(hpp, ret);
+
+			ret = scnprintf(hpp->buf, hpp->size, "}");
+			advance_hpp(hpp, ret);
+			break;
+		default:
+			break;
+		}
+
+		first = false;
+	}
+
+	return 0;
+}
+
+static int
+mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	   struct hist_entry *he, double mean)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[10];
+
+	scnprintf(buf, 10, "%6.0f", mean);
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+#define MEAN_ENTRY(__func, __val)						\
+static int									\
+__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he)	\
+{										\
+	struct c2c_hist_entry *c2c_he;						\
+	c2c_he = container_of(he, struct c2c_hist_entry, he);			\
+	return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val));	\
+}
+
+MEAN_ENTRY(mean_rmt_entry,  rmt_hitm);
+MEAN_ENTRY(mean_lcl_entry,  lcl_hitm);
+MEAN_ENTRY(mean_load_entry, load);
+
+static int
+cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	     struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[10];
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+	scnprintf(buf, 10, "%d", bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt));
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	     struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	int width = c2c_width(fmt, hpp, he->hists);
+	char buf[10];
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+	scnprintf(buf, 10, "%u", c2c_he->cacheline_idx);
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		   struct hist_entry *he)
+{
+	int width = c2c_width(fmt, hpp, he->hists);
+
+	return scnprintf(hpp->buf, hpp->size, "%*s", width, "");
+}
+
+#define HEADER_LOW(__h)			\
+	{				\
+		.line[1] = {		\
+			.text = __h,	\
+		},			\
+	}
+
+#define HEADER_BOTH(__h0, __h1)		\
+	{				\
+		.line[0] = {		\
+			.text = __h0,	\
+		},			\
+		.line[1] = {		\
+			.text = __h1,	\
+		},			\
+	}
+
+#define HEADER_SPAN(__h0, __h1, __s)	\
+	{				\
+		.line[0] = {		\
+			.text = __h0,	\
+			.span = __s,	\
+		},			\
+		.line[1] = {		\
+			.text = __h1,	\
+		},			\
+	}
+
+#define HEADER_SPAN_LOW(__h)		\
+	{				\
+		.line[1] = {		\
+			.text = __h,	\
+		},			\
+	}
+
+static struct c2c_dimension dim_dcacheline = {
+	.header		= HEADER_SPAN("--- Cacheline ----", "Address", 2),
+	.name		= "dcacheline",
+	.cmp		= dcacheline_cmp,
+	.entry		= dcacheline_entry,
+	.width		= 18,
+};
+
+static struct c2c_dimension dim_dcacheline_node = {
+	.header		= HEADER_LOW("Node"),
+	.name		= "dcacheline_node",
+	.cmp		= empty_cmp,
+	.entry		= dcacheline_node_entry,
+	.width		= 4,
+};
+
+static struct c2c_dimension dim_dcacheline_count = {
+	.header		= HEADER_LOW("PA cnt"),
+	.name		= "dcacheline_count",
+	.cmp		= empty_cmp,
+	.entry		= dcacheline_node_count,
+	.width		= 6,
+};
+
+static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 2);
+
+static struct c2c_dimension dim_offset = {
+	.header		= HEADER_SPAN("--- Data address -", "Offset", 2),
+	.name		= "offset",
+	.cmp		= offset_cmp,
+	.entry		= offset_entry,
+	.width		= 18,
+};
+
+static struct c2c_dimension dim_offset_node = {
+	.header		= HEADER_LOW("Node"),
+	.name		= "offset_node",
+	.cmp		= empty_cmp,
+	.entry		= dcacheline_node_entry,
+	.width		= 4,
+};
+
+static struct c2c_dimension dim_iaddr = {
+	.header		= HEADER_LOW("Code address"),
+	.name		= "iaddr",
+	.cmp		= iaddr_cmp,
+	.entry		= iaddr_entry,
+	.width		= 18,
+};
+
+static struct c2c_dimension dim_tot_hitm = {
+	.header		= HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2),
+	.name		= "tot_hitm",
+	.cmp		= tot_hitm_cmp,
+	.entry		= tot_hitm_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_lcl_hitm = {
+	.header		= HEADER_SPAN_LOW("Lcl"),
+	.name		= "lcl_hitm",
+	.cmp		= lcl_hitm_cmp,
+	.entry		= lcl_hitm_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_rmt_hitm = {
+	.header		= HEADER_SPAN_LOW("Rmt"),
+	.name		= "rmt_hitm",
+	.cmp		= rmt_hitm_cmp,
+	.entry		= rmt_hitm_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_cl_rmt_hitm = {
+	.header		= HEADER_SPAN("----- HITM -----", "Rmt", 1),
+	.name		= "cl_rmt_hitm",
+	.cmp		= rmt_hitm_cmp,
+	.entry		= rmt_hitm_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_cl_lcl_hitm = {
+	.header		= HEADER_SPAN_LOW("Lcl"),
+	.name		= "cl_lcl_hitm",
+	.cmp		= lcl_hitm_cmp,
+	.entry		= lcl_hitm_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_stores = {
+	.header		= HEADER_SPAN("---- Store Reference ----", "Total", 2),
+	.name		= "stores",
+	.cmp		= store_cmp,
+	.entry		= store_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_stores_l1hit = {
+	.header		= HEADER_SPAN_LOW("L1Hit"),
+	.name		= "stores_l1hit",
+	.cmp		= st_l1hit_cmp,
+	.entry		= st_l1hit_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_stores_l1miss = {
+	.header		= HEADER_SPAN_LOW("L1Miss"),
+	.name		= "stores_l1miss",
+	.cmp		= st_l1miss_cmp,
+	.entry		= st_l1miss_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_cl_stores_l1hit = {
+	.header		= HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+	.name		= "cl_stores_l1hit",
+	.cmp		= st_l1hit_cmp,
+	.entry		= st_l1hit_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_cl_stores_l1miss = {
+	.header		= HEADER_SPAN_LOW("L1 Miss"),
+	.name		= "cl_stores_l1miss",
+	.cmp		= st_l1miss_cmp,
+	.entry		= st_l1miss_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_ld_fbhit = {
+	.header		= HEADER_SPAN("----- Core Load Hit -----", "FB", 2),
+	.name		= "ld_fbhit",
+	.cmp		= ld_fbhit_cmp,
+	.entry		= ld_fbhit_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_ld_l1hit = {
+	.header		= HEADER_SPAN_LOW("L1"),
+	.name		= "ld_l1hit",
+	.cmp		= ld_l1hit_cmp,
+	.entry		= ld_l1hit_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_ld_l2hit = {
+	.header		= HEADER_SPAN_LOW("L2"),
+	.name		= "ld_l2hit",
+	.cmp		= ld_l2hit_cmp,
+	.entry		= ld_l2hit_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_ld_llchit = {
+	.header		= HEADER_SPAN("-- LLC Load Hit --", "Llc", 1),
+	.name		= "ld_lclhit",
+	.cmp		= ld_llchit_cmp,
+	.entry		= ld_llchit_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_ld_rmthit = {
+	.header		= HEADER_SPAN_LOW("Rmt"),
+	.name		= "ld_rmthit",
+	.cmp		= rmt_hit_cmp,
+	.entry		= rmt_hit_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_ld_llcmiss = {
+	.header		= HEADER_BOTH("LLC", "Ld Miss"),
+	.name		= "ld_llcmiss",
+	.cmp		= ld_llcmiss_cmp,
+	.entry		= ld_llcmiss_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_tot_recs = {
+	.header		= HEADER_BOTH("Total", "records"),
+	.name		= "tot_recs",
+	.cmp		= tot_recs_cmp,
+	.entry		= tot_recs_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_tot_loads = {
+	.header		= HEADER_BOTH("Total", "Loads"),
+	.name		= "tot_loads",
+	.cmp		= tot_loads_cmp,
+	.entry		= tot_loads_entry,
+	.width		= 7,
+};
+
+static struct c2c_header percent_hitm_header[] = {
+	[DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
+	[DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
+	[DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
+};
+
+static struct c2c_dimension dim_percent_hitm = {
+	.name		= "percent_hitm",
+	.cmp		= percent_hitm_cmp,
+	.entry		= percent_hitm_entry,
+	.color		= percent_hitm_color,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_percent_rmt_hitm = {
+	.header		= HEADER_SPAN("----- HITM -----", "Rmt", 1),
+	.name		= "percent_rmt_hitm",
+	.cmp		= percent_rmt_hitm_cmp,
+	.entry		= percent_rmt_hitm_entry,
+	.color		= percent_rmt_hitm_color,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_percent_lcl_hitm = {
+	.header		= HEADER_SPAN_LOW("Lcl"),
+	.name		= "percent_lcl_hitm",
+	.cmp		= percent_lcl_hitm_cmp,
+	.entry		= percent_lcl_hitm_entry,
+	.color		= percent_lcl_hitm_color,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1hit = {
+	.header		= HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+	.name		= "percent_stores_l1hit",
+	.cmp		= percent_stores_l1hit_cmp,
+	.entry		= percent_stores_l1hit_entry,
+	.color		= percent_stores_l1hit_color,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1miss = {
+	.header		= HEADER_SPAN_LOW("L1 Miss"),
+	.name		= "percent_stores_l1miss",
+	.cmp		= percent_stores_l1miss_cmp,
+	.entry		= percent_stores_l1miss_entry,
+	.color		= percent_stores_l1miss_color,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_dram_lcl = {
+	.header		= HEADER_SPAN("--- Load Dram ----", "Lcl", 1),
+	.name		= "dram_lcl",
+	.cmp		= lcl_dram_cmp,
+	.entry		= lcl_dram_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_dram_rmt = {
+	.header		= HEADER_SPAN_LOW("Rmt"),
+	.name		= "dram_rmt",
+	.cmp		= rmt_dram_cmp,
+	.entry		= rmt_dram_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_pid = {
+	.header		= HEADER_LOW("Pid"),
+	.name		= "pid",
+	.cmp		= pid_cmp,
+	.entry		= pid_entry,
+	.width		= 7,
+};
+
+static struct c2c_dimension dim_tid = {
+	.header		= HEADER_LOW("Tid"),
+	.name		= "tid",
+	.se		= &sort_thread,
+};
+
+static struct c2c_dimension dim_symbol = {
+	.name		= "symbol",
+	.se		= &sort_sym,
+};
+
+static struct c2c_dimension dim_dso = {
+	.header		= HEADER_BOTH("Shared", "Object"),
+	.name		= "dso",
+	.se		= &sort_dso,
+};
+
+static struct c2c_header header_node[3] = {
+	HEADER_LOW("Node"),
+	HEADER_LOW("Node{cpus %hitms %stores}"),
+	HEADER_LOW("Node{cpu list}"),
+};
+
+static struct c2c_dimension dim_node = {
+	.name		= "node",
+	.cmp		= empty_cmp,
+	.entry		= node_entry,
+	.width		= 4,
+};
+
+static struct c2c_dimension dim_mean_rmt = {
+	.header		= HEADER_SPAN("---------- cycles ----------", "rmt hitm", 2),
+	.name		= "mean_rmt",
+	.cmp		= empty_cmp,
+	.entry		= mean_rmt_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_mean_lcl = {
+	.header		= HEADER_SPAN_LOW("lcl hitm"),
+	.name		= "mean_lcl",
+	.cmp		= empty_cmp,
+	.entry		= mean_lcl_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_mean_load = {
+	.header		= HEADER_SPAN_LOW("load"),
+	.name		= "mean_load",
+	.cmp		= empty_cmp,
+	.entry		= mean_load_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_cpucnt = {
+	.header		= HEADER_BOTH("cpu", "cnt"),
+	.name		= "cpucnt",
+	.cmp		= empty_cmp,
+	.entry		= cpucnt_entry,
+	.width		= 8,
+};
+
+static struct c2c_dimension dim_srcline = {
+	.name		= "cl_srcline",
+	.se		= &sort_srcline,
+};
+
+static struct c2c_dimension dim_dcacheline_idx = {
+	.header		= HEADER_LOW("Index"),
+	.name		= "cl_idx",
+	.cmp		= empty_cmp,
+	.entry		= cl_idx_entry,
+	.width		= 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num = {
+	.header		= HEADER_LOW("Num"),
+	.name		= "cl_num",
+	.cmp		= empty_cmp,
+	.entry		= cl_idx_entry,
+	.width		= 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num_empty = {
+	.header		= HEADER_LOW("Num"),
+	.name		= "cl_num_empty",
+	.cmp		= empty_cmp,
+	.entry		= cl_idx_empty_entry,
+	.width		= 5,
+};
+
+static struct c2c_dimension *dimensions[] = {
+	&dim_dcacheline,
+	&dim_dcacheline_node,
+	&dim_dcacheline_count,
+	&dim_offset,
+	&dim_offset_node,
+	&dim_iaddr,
+	&dim_tot_hitm,
+	&dim_lcl_hitm,
+	&dim_rmt_hitm,
+	&dim_cl_lcl_hitm,
+	&dim_cl_rmt_hitm,
+	&dim_stores,
+	&dim_stores_l1hit,
+	&dim_stores_l1miss,
+	&dim_cl_stores_l1hit,
+	&dim_cl_stores_l1miss,
+	&dim_ld_fbhit,
+	&dim_ld_l1hit,
+	&dim_ld_l2hit,
+	&dim_ld_llchit,
+	&dim_ld_rmthit,
+	&dim_ld_llcmiss,
+	&dim_tot_recs,
+	&dim_tot_loads,
+	&dim_percent_hitm,
+	&dim_percent_rmt_hitm,
+	&dim_percent_lcl_hitm,
+	&dim_percent_stores_l1hit,
+	&dim_percent_stores_l1miss,
+	&dim_dram_lcl,
+	&dim_dram_rmt,
+	&dim_pid,
+	&dim_tid,
+	&dim_symbol,
+	&dim_dso,
+	&dim_node,
+	&dim_mean_rmt,
+	&dim_mean_lcl,
+	&dim_mean_load,
+	&dim_cpucnt,
+	&dim_srcline,
+	&dim_dcacheline_idx,
+	&dim_dcacheline_num,
+	&dim_dcacheline_num_empty,
+	NULL,
+};
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+	struct c2c_fmt *c2c_fmt;
+
+	c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+	free(c2c_fmt);
+}
+
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct c2c_fmt *c2c_a = container_of(a, struct c2c_fmt, fmt);
+	struct c2c_fmt *c2c_b = container_of(b, struct c2c_fmt, fmt);
+
+	return c2c_a->dim == c2c_b->dim;
+}
+
+static struct c2c_dimension *get_dimension(const char *name)
+{
+	unsigned int i;
+
+	for (i = 0; dimensions[i]; i++) {
+		struct c2c_dimension *dim = dimensions[i];
+
+		if (!strcmp(dim->name, name))
+			return dim;
+	};
+
+	return NULL;
+}
+
+static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			struct hist_entry *he)
+{
+	struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+	struct c2c_dimension *dim = c2c_fmt->dim;
+	size_t len = fmt->user_len;
+
+	if (!len) {
+		len = hists__col_len(he->hists, dim->se->se_width_idx);
+
+		if (dim == &dim_symbol || dim == &dim_srcline)
+			len = symbol_width(he->hists, dim->se);
+	}
+
+	return dim->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
+			  struct hist_entry *a, struct hist_entry *b)
+{
+	struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+	struct c2c_dimension *dim = c2c_fmt->dim;
+
+	return dim->se->se_cmp(a, b);
+}
+
+static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
+			       struct hist_entry *a, struct hist_entry *b)
+{
+	struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+	struct c2c_dimension *dim = c2c_fmt->dim;
+	int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+	collapse_fn = dim->se->se_collapse ?: dim->se->se_cmp;
+	return collapse_fn(a, b);
+}
+
+static struct c2c_fmt *get_format(const char *name)
+{
+	struct c2c_dimension *dim = get_dimension(name);
+	struct c2c_fmt *c2c_fmt;
+	struct perf_hpp_fmt *fmt;
+
+	if (!dim)
+		return NULL;
+
+	c2c_fmt = zalloc(sizeof(*c2c_fmt));
+	if (!c2c_fmt)
+		return NULL;
+
+	c2c_fmt->dim = dim;
+
+	fmt = &c2c_fmt->fmt;
+	INIT_LIST_HEAD(&fmt->list);
+	INIT_LIST_HEAD(&fmt->sort_list);
+
+	fmt->cmp	= dim->se ? c2c_se_cmp   : dim->cmp;
+	fmt->sort	= dim->se ? c2c_se_cmp   : dim->cmp;
+	fmt->color	= dim->se ? NULL	 : dim->color;
+	fmt->entry	= dim->se ? c2c_se_entry : dim->entry;
+	fmt->header	= c2c_header;
+	fmt->width	= c2c_width;
+	fmt->collapse	= dim->se ? c2c_se_collapse : dim->cmp;
+	fmt->equal	= fmt_equal;
+	fmt->free	= fmt_free;
+
+	return c2c_fmt;
+}
+
+static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name)
+{
+	struct c2c_fmt *c2c_fmt = get_format(name);
+
+	if (!c2c_fmt) {
+		reset_dimensions();
+		return output_field_add(hpp_list, name);
+	}
+
+	perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt);
+	return 0;
+}
+
+static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
+{
+	struct c2c_fmt *c2c_fmt = get_format(name);
+	struct c2c_dimension *dim;
+
+	if (!c2c_fmt) {
+		reset_dimensions();
+		return sort_dimension__add(hpp_list, name, NULL, 0);
+	}
+
+	dim = c2c_fmt->dim;
+	if (dim == &dim_dso)
+		hpp_list->dso = 1;
+
+	perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
+	return 0;
+}
+
+#define PARSE_LIST(_list, _fn)							\
+	do {									\
+		char *tmp, *tok;						\
+		ret = 0;							\
+										\
+		if (!_list)							\
+			break;							\
+										\
+		for (tok = strtok_r((char *)_list, ", ", &tmp);			\
+				tok; tok = strtok_r(NULL, ", ", &tmp)) {	\
+			ret = _fn(hpp_list, tok);				\
+			if (ret == -EINVAL) {					\
+				pr_err("Invalid --fields key: `%s'", tok);	\
+				break;						\
+			} else if (ret == -ESRCH) {				\
+				pr_err("Unknown --fields key: `%s'", tok);	\
+				break;						\
+			}							\
+		}								\
+	} while (0)
+
+static int hpp_list__parse(struct perf_hpp_list *hpp_list,
+			   const char *output_,
+			   const char *sort_)
+{
+	char *output = output_ ? strdup(output_) : NULL;
+	char *sort   = sort_   ? strdup(sort_) : NULL;
+	int ret;
+
+	PARSE_LIST(output, c2c_hists__init_output);
+	PARSE_LIST(sort,   c2c_hists__init_sort);
+
+	/* copy sort keys to output fields */
+	perf_hpp__setup_output_field(hpp_list);
+
+	/*
+	 * We dont need other sorting keys other than those
+	 * we already specified. It also really slows down
+	 * the processing a lot with big number of output
+	 * fields, so switching this off for c2c.
+	 */
+
+#if 0
+	/* and then copy output fields to sort keys */
+	perf_hpp__append_sort_keys(&hists->list);
+#endif
+
+	free(output);
+	free(sort);
+	return ret;
+}
+
+static int c2c_hists__init(struct c2c_hists *hists,
+			   const char *sort,
+			   int nr_header_lines)
+{
+	__hists__init(&hists->hists, &hists->list);
+
+	/*
+	 * Initialize only with sort fields, we need to resort
+	 * later anyway, and that's where we add output fields
+	 * as well.
+	 */
+	perf_hpp_list__init(&hists->list);
+
+	/* Overload number of header lines.*/
+	hists->list.nr_header_lines = nr_header_lines;
+
+	return hpp_list__parse(&hists->list, NULL, sort);
+}
+
+static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
+			     const char *output,
+			     const char *sort)
+{
+	perf_hpp__reset_output_field(&c2c_hists->list);
+	return hpp_list__parse(&c2c_hists->list, output, sort);
+}
+
+#define DISPLAY_LINE_LIMIT  0.0005
+
+static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
+{
+	struct c2c_hist_entry *c2c_he;
+	double ld_dist;
+
+	if (c2c.show_all)
+		return true;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+#define FILTER_HITM(__h)						\
+	if (stats->__h) {						\
+		ld_dist = ((double)c2c_he->stats.__h / stats->__h);	\
+		if (ld_dist < DISPLAY_LINE_LIMIT)			\
+			he->filtered = HIST_FILTER__C2C;		\
+	} else {							\
+		he->filtered = HIST_FILTER__C2C;			\
+	}
+
+	switch (c2c.display) {
+	case DISPLAY_LCL:
+		FILTER_HITM(lcl_hitm);
+		break;
+	case DISPLAY_RMT:
+		FILTER_HITM(rmt_hitm);
+		break;
+	case DISPLAY_TOT:
+		FILTER_HITM(tot_hitm);
+	default:
+		break;
+	};
+
+#undef FILTER_HITM
+
+	return he->filtered == 0;
+}
+
+static inline int valid_hitm_or_store(struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	bool has_hitm;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
+		   c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
+						c2c_he->stats.rmt_hitm;
+	return has_hitm || c2c_he->stats.store;
+}
+
+static void set_node_width(struct c2c_hist_entry *c2c_he, int len)
+{
+	struct c2c_dimension *dim;
+
+	dim = &c2c.hists == c2c_he->hists ?
+	      &dim_dcacheline_node : &dim_offset_node;
+
+	if (len > dim->width)
+		dim->width = len;
+}
+
+static int set_nodestr(struct c2c_hist_entry *c2c_he)
+{
+	char buf[30];
+	int len;
+
+	if (c2c_he->nodestr)
+		return 0;
+
+	if (bitmap_weight(c2c_he->nodeset, c2c.nodes_cnt)) {
+		len = bitmap_scnprintf(c2c_he->nodeset, c2c.nodes_cnt,
+				      buf, sizeof(buf));
+	} else {
+		len = scnprintf(buf, sizeof(buf), "N/A");
+	}
+
+	set_node_width(c2c_he, len);
+	c2c_he->nodestr = strdup(buf);
+	return c2c_he->nodestr ? 0 : -ENOMEM;
+}
+
+static void calc_width(struct c2c_hist_entry *c2c_he)
+{
+	struct c2c_hists *c2c_hists;
+
+	c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+	hists__calc_col_len(&c2c_hists->hists, &c2c_he->he);
+	set_nodestr(c2c_he);
+}
+
+static int filter_cb(struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+	if (c2c.show_src && !he->srcline)
+		he->srcline = hist_entry__get_srcline(he);
+
+	calc_width(c2c_he);
+
+	if (!valid_hitm_or_store(he))
+		he->filtered = HIST_FILTER__C2C;
+
+	return 0;
+}
+
+static int resort_cl_cb(struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	struct c2c_hists *c2c_hists;
+	bool display = he__display(he, &c2c.hitm_stats);
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	c2c_hists = c2c_he->hists;
+
+	if (display && c2c_hists) {
+		static unsigned int idx;
+
+		c2c_he->cacheline_idx = idx++;
+		calc_width(c2c_he);
+
+		c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);
+
+		hists__collapse_resort(&c2c_hists->hists, NULL);
+		hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
+	}
+
+	return 0;
+}
+
+static void setup_nodes_header(void)
+{
+	dim_node.header = header_node[c2c.node_info];
+}
+
+static int setup_nodes(struct perf_session *session)
+{
+	struct numa_node *n;
+	unsigned long **nodes;
+	int node, cpu;
+	int *cpu2node;
+
+	if (c2c.node_info > 2)
+		c2c.node_info = 2;
+
+	c2c.nodes_cnt = session->header.env.nr_numa_nodes;
+	c2c.cpus_cnt  = session->header.env.nr_cpus_online;
+
+	n = session->header.env.numa_nodes;
+	if (!n)
+		return -EINVAL;
+
+	nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
+	if (!nodes)
+		return -ENOMEM;
+
+	c2c.nodes = nodes;
+
+	cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
+	if (!cpu2node)
+		return -ENOMEM;
+
+	for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
+		cpu2node[cpu] = -1;
+
+	c2c.cpu2node = cpu2node;
+
+	for (node = 0; node < c2c.nodes_cnt; node++) {
+		struct cpu_map *map = n[node].map;
+		unsigned long *set;
+
+		set = bitmap_alloc(c2c.cpus_cnt);
+		if (!set)
+			return -ENOMEM;
+
+		for (cpu = 0; cpu < map->nr; cpu++) {
+			set_bit(map->map[cpu], set);
+
+			if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+				return -EINVAL;
+
+			cpu2node[map->map[cpu]] = node;
+		}
+
+		nodes[node] = set;
+	}
+
+	setup_nodes_header();
+	return 0;
+}
+
+#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
+
+static int resort_hitm_cb(struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+	if (HAS_HITMS(c2c_he)) {
+		c2c.shared_clines++;
+		c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats);
+	}
+
+	return 0;
+}
+
+static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb)
+{
+	struct rb_node *next = rb_first(&hists->entries);
+	int ret = 0;
+
+	while (next) {
+		struct hist_entry *he;
+
+		he = rb_entry(next, struct hist_entry, rb_node);
+		ret = cb(he);
+		if (ret)
+			break;
+		next = rb_next(&he->rb_node);
+	}
+
+	return ret;
+}
+
+static void print_c2c__display_stats(FILE *out)
+{
+	int llc_misses;
+	struct c2c_stats *stats = &c2c.hists.stats;
+
+	llc_misses = stats->lcl_dram +
+		     stats->rmt_dram +
+		     stats->rmt_hit +
+		     stats->rmt_hitm;
+
+	fprintf(out, "=================================================\n");
+	fprintf(out, "            Trace Event Information              \n");
+	fprintf(out, "=================================================\n");
+	fprintf(out, "  Total records                     : %10d\n", stats->nr_entries);
+	fprintf(out, "  Locked Load/Store Operations      : %10d\n", stats->locks);
+	fprintf(out, "  Load Operations                   : %10d\n", stats->load);
+	fprintf(out, "  Loads - uncacheable               : %10d\n", stats->ld_uncache);
+	fprintf(out, "  Loads - IO                        : %10d\n", stats->ld_io);
+	fprintf(out, "  Loads - Miss                      : %10d\n", stats->ld_miss);
+	fprintf(out, "  Loads - no mapping                : %10d\n", stats->ld_noadrs);
+	fprintf(out, "  Load Fill Buffer Hit              : %10d\n", stats->ld_fbhit);
+	fprintf(out, "  Load L1D hit                      : %10d\n", stats->ld_l1hit);
+	fprintf(out, "  Load L2D hit                      : %10d\n", stats->ld_l2hit);
+	fprintf(out, "  Load LLC hit                      : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+	fprintf(out, "  Load Local HITM                   : %10d\n", stats->lcl_hitm);
+	fprintf(out, "  Load Remote HITM                  : %10d\n", stats->rmt_hitm);
+	fprintf(out, "  Load Remote HIT                   : %10d\n", stats->rmt_hit);
+	fprintf(out, "  Load Local DRAM                   : %10d\n", stats->lcl_dram);
+	fprintf(out, "  Load Remote DRAM                  : %10d\n", stats->rmt_dram);
+	fprintf(out, "  Load MESI State Exclusive         : %10d\n", stats->ld_excl);
+	fprintf(out, "  Load MESI State Shared            : %10d\n", stats->ld_shared);
+	fprintf(out, "  Load LLC Misses                   : %10d\n", llc_misses);
+	fprintf(out, "  LLC Misses to Local DRAM          : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
+	fprintf(out, "  LLC Misses to Remote DRAM         : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
+	fprintf(out, "  LLC Misses to Remote cache (HIT)  : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
+	fprintf(out, "  LLC Misses to Remote cache (HITM) : %10.1f%%\n", ((double)stats->rmt_hitm/(double)llc_misses) * 100.);
+	fprintf(out, "  Store Operations                  : %10d\n", stats->store);
+	fprintf(out, "  Store - uncacheable               : %10d\n", stats->st_uncache);
+	fprintf(out, "  Store - no mapping                : %10d\n", stats->st_noadrs);
+	fprintf(out, "  Store L1D Hit                     : %10d\n", stats->st_l1hit);
+	fprintf(out, "  Store L1D Miss                    : %10d\n", stats->st_l1miss);
+	fprintf(out, "  No Page Map Rejects               : %10d\n", stats->nomap);
+	fprintf(out, "  Unable to parse data source       : %10d\n", stats->noparse);
+}
+
+static void print_shared_cacheline_info(FILE *out)
+{
+	struct c2c_stats *stats = &c2c.hitm_stats;
+	int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm;
+
+	fprintf(out, "=================================================\n");
+	fprintf(out, "    Global Shared Cache Line Event Information   \n");
+	fprintf(out, "=================================================\n");
+	fprintf(out, "  Total Shared Cache Lines          : %10d\n", c2c.shared_clines);
+	fprintf(out, "  Load HITs on shared lines         : %10d\n", stats->load);
+	fprintf(out, "  Fill Buffer Hits on shared lines  : %10d\n", stats->ld_fbhit);
+	fprintf(out, "  L1D hits on shared lines          : %10d\n", stats->ld_l1hit);
+	fprintf(out, "  L2D hits on shared lines          : %10d\n", stats->ld_l2hit);
+	fprintf(out, "  LLC hits on shared lines          : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+	fprintf(out, "  Locked Access on shared lines     : %10d\n", stats->locks);
+	fprintf(out, "  Store HITs on shared lines        : %10d\n", stats->store);
+	fprintf(out, "  Store L1D hits on shared lines    : %10d\n", stats->st_l1hit);
+	fprintf(out, "  Total Merged records              : %10d\n", hitm_cnt + stats->store);
+}
+
+static void print_cacheline(struct c2c_hists *c2c_hists,
+			    struct hist_entry *he_cl,
+			    struct perf_hpp_list *hpp_list,
+			    FILE *out)
+{
+	char bf[1000];
+	struct perf_hpp hpp = {
+		.buf            = bf,
+		.size           = 1000,
+	};
+	static bool once;
+
+	if (!once) {
+		hists__fprintf_headers(&c2c_hists->hists, out);
+		once = true;
+	} else {
+		fprintf(out, "\n");
+	}
+
+	fprintf(out, "  -------------------------------------------------------------\n");
+	__hist_entry__snprintf(he_cl, &hpp, hpp_list);
+	fprintf(out, "%s\n", bf);
+	fprintf(out, "  -------------------------------------------------------------\n");
+
+	hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true);
+}
+
+static void print_pareto(FILE *out)
+{
+	struct perf_hpp_list hpp_list;
+	struct rb_node *nd;
+	int ret;
+
+	perf_hpp_list__init(&hpp_list);
+	ret = hpp_list__parse(&hpp_list,
+				"cl_num,"
+				"cl_rmt_hitm,"
+				"cl_lcl_hitm,"
+				"cl_stores_l1hit,"
+				"cl_stores_l1miss,"
+				"dcacheline",
+				NULL);
+
+	if (WARN_ONCE(ret, "failed to setup sort entries\n"))
+		return;
+
+	nd = rb_first(&c2c.hists.hists.entries);
+
+	for (; nd; nd = rb_next(nd)) {
+		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+		struct c2c_hist_entry *c2c_he;
+
+		if (he->filtered)
+			continue;
+
+		c2c_he = container_of(he, struct c2c_hist_entry, he);
+		print_cacheline(c2c_he->hists, he, &hpp_list, out);
+	}
+}
+
+static void print_c2c_info(FILE *out, struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+	bool first = true;
+
+	fprintf(out, "=================================================\n");
+	fprintf(out, "                 c2c details                     \n");
+	fprintf(out, "=================================================\n");
+
+	evlist__for_each_entry(evlist, evsel) {
+		fprintf(out, "%-36s: %s\n", first ? "  Events" : "",
+			perf_evsel__name(evsel));
+		first = false;
+	}
+	fprintf(out, "  Cachelines sort on                : %s HITMs\n",
+		display_str[c2c.display]);
+	fprintf(out, "  Cacheline data grouping           : %s\n", c2c.cl_sort);
+}
+
+static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
+{
+	setup_pager();
+
+	print_c2c__display_stats(out);
+	fprintf(out, "\n");
+	print_shared_cacheline_info(out);
+	fprintf(out, "\n");
+	print_c2c_info(out, session);
+
+	if (c2c.stats_only)
+		return;
+
+	fprintf(out, "\n");
+	fprintf(out, "=================================================\n");
+	fprintf(out, "           Shared Data Cache Line Table          \n");
+	fprintf(out, "=================================================\n");
+	fprintf(out, "#\n");
+
+	hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, false);
+
+	fprintf(out, "\n");
+	fprintf(out, "=================================================\n");
+	fprintf(out, "      Shared Cache Line Distribution Pareto      \n");
+	fprintf(out, "=================================================\n");
+	fprintf(out, "#\n");
+
+	print_pareto(out);
+}
+
+#ifdef HAVE_SLANG_SUPPORT
+static void c2c_browser__update_nr_entries(struct hist_browser *hb)
+{
+	u64 nr_entries = 0;
+	struct rb_node *nd = rb_first(&hb->hists->entries);
+
+	while (nd) {
+		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (!he->filtered)
+			nr_entries++;
+
+		nd = rb_next(nd);
+	}
+
+	hb->nr_non_filtered_entries = nr_entries;
+}
+
+struct c2c_cacheline_browser {
+	struct hist_browser	 hb;
+	struct hist_entry	*he;
+};
+
+static int
+perf_c2c_cacheline_browser__title(struct hist_browser *browser,
+				  char *bf, size_t size)
+{
+	struct c2c_cacheline_browser *cl_browser;
+	struct hist_entry *he;
+	uint64_t addr = 0;
+
+	cl_browser = container_of(browser, struct c2c_cacheline_browser, hb);
+	he = cl_browser->he;
+
+	if (he->mem_info)
+		addr = cl_address(he->mem_info->daddr.addr);
+
+	scnprintf(bf, size, "Cacheline 0x%lx", addr);
+	return 0;
+}
+
+static struct c2c_cacheline_browser*
+c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he)
+{
+	struct c2c_cacheline_browser *browser;
+
+	browser = zalloc(sizeof(*browser));
+	if (browser) {
+		hist_browser__init(&browser->hb, hists);
+		browser->hb.c2c_filter	= true;
+		browser->hb.title	= perf_c2c_cacheline_browser__title;
+		browser->he		= he;
+	}
+
+	return browser;
+}
+
+static int perf_c2c__browse_cacheline(struct hist_entry *he)
+{
+	struct c2c_hist_entry *c2c_he;
+	struct c2c_hists *c2c_hists;
+	struct c2c_cacheline_browser *cl_browser;
+	struct hist_browser *browser;
+	int key = -1;
+	const char help[] =
+	" ENTER         Toggle callchains (if present) \n"
+	" n             Toggle Node details info \n"
+	" s             Toggle full length of symbol and source line columns \n"
+	" q             Return back to cacheline list \n";
+
+	/* Display compact version first. */
+	c2c.symbol_full = false;
+
+	c2c_he = container_of(he, struct c2c_hist_entry, he);
+	c2c_hists = c2c_he->hists;
+
+	cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he);
+	if (cl_browser == NULL)
+		return -1;
+
+	browser = &cl_browser->hb;
+
+	/* reset abort key so that it can get Ctrl-C as a key */
+	SLang_reset_tty();
+	SLang_init_tty(0, 0, 0);
+
+	c2c_browser__update_nr_entries(browser);
+
+	while (1) {
+		key = hist_browser__run(browser, "? - help", true);
+
+		switch (key) {
+		case 's':
+			c2c.symbol_full = !c2c.symbol_full;
+			break;
+		case 'n':
+			c2c.node_info = (c2c.node_info + 1) % 3;
+			setup_nodes_header();
+			break;
+		case 'q':
+			goto out;
+		case '?':
+			ui_browser__help_window(&browser->b, help);
+			break;
+		default:
+			break;
+		}
+	}
+
+out:
+	free(cl_browser);
+	return 0;
+}
+
+static int perf_c2c_browser__title(struct hist_browser *browser,
+				   char *bf, size_t size)
+{
+	scnprintf(bf, size,
+		  "Shared Data Cache Line Table     "
+		  "(%lu entries, sorted on %s HITMs)",
+		  browser->nr_non_filtered_entries,
+		  display_str[c2c.display]);
+	return 0;
+}
+
+static struct hist_browser*
+perf_c2c_browser__new(struct hists *hists)
+{
+	struct hist_browser *browser = hist_browser__new(hists);
+
+	if (browser) {
+		browser->title = perf_c2c_browser__title;
+		browser->c2c_filter = true;
+	}
+
+	return browser;
+}
+
+static int perf_c2c__hists_browse(struct hists *hists)
+{
+	struct hist_browser *browser;
+	int key = -1;
+	const char help[] =
+	" d             Display cacheline details \n"
+	" ENTER         Toggle callchains (if present) \n"
+	" q             Quit \n";
+
+	browser = perf_c2c_browser__new(hists);
+	if (browser == NULL)
+		return -1;
+
+	/* reset abort key so that it can get Ctrl-C as a key */
+	SLang_reset_tty();
+	SLang_init_tty(0, 0, 0);
+
+	c2c_browser__update_nr_entries(browser);
+
+	while (1) {
+		key = hist_browser__run(browser, "? - help", true);
+
+		switch (key) {
+		case 'q':
+			goto out;
+		case 'd':
+			perf_c2c__browse_cacheline(browser->he_selection);
+			break;
+		case '?':
+			ui_browser__help_window(&browser->b, help);
+			break;
+		default:
+			break;
+		}
+	}
+
+out:
+	hist_browser__delete(browser);
+	return 0;
+}
+
+static void perf_c2c_display(struct perf_session *session)
+{
+	if (use_browser == 0)
+		perf_c2c__hists_fprintf(stdout, session);
+	else
+		perf_c2c__hists_browse(&c2c.hists.hists);
+}
+#else
+static void perf_c2c_display(struct perf_session *session)
+{
+	use_browser = 0;
+	perf_c2c__hists_fprintf(stdout, session);
+}
+#endif /* HAVE_SLANG_SUPPORT */
+
+static char *fill_line(const char *orig, int len)
+{
+	int i, j, olen = strlen(orig);
+	char *buf;
+
+	buf = zalloc(len + 1);
+	if (!buf)
+		return NULL;
+
+	j = len / 2 - olen / 2;
+
+	for (i = 0; i < j - 1; i++)
+		buf[i] = '-';
+
+	buf[i++] = ' ';
+
+	strcpy(buf + i, orig);
+
+	i += olen;
+
+	buf[i++] = ' ';
+
+	for (; i < len; i++)
+		buf[i] = '-';
+
+	return buf;
+}
+
+static int ui_quirks(void)
+{
+	const char *nodestr = "Data address";
+	char *buf;
+
+	if (!c2c.use_stdio) {
+		dim_offset.width  = 5;
+		dim_offset.header = header_offset_tui;
+		nodestr = "CL";
+	}
+
+	dim_percent_hitm.header = percent_hitm_header[c2c.display];
+
+	/* Fix the zero line for dcacheline column. */
+	buf = fill_line("Cacheline", dim_dcacheline.width +
+				     dim_dcacheline_node.width +
+				     dim_dcacheline_count.width + 4);
+	if (!buf)
+		return -ENOMEM;
+
+	dim_dcacheline.header.line[0].text = buf;
+
+	/* Fix the zero line for offset column. */
+	buf = fill_line(nodestr, dim_offset.width +
+			         dim_offset_node.width +
+				 dim_dcacheline_count.width + 4);
+	if (!buf)
+		return -ENOMEM;
+
+	dim_offset.header.line[0].text = buf;
+
+	return 0;
+}
+
+#define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function,percent"
+
+const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+				CALLCHAIN_REPORT_HELP
+				"\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+	struct callchain_param *callchain = opt->value;
+
+	callchain->enabled = !unset;
+	/*
+	 * --no-call-graph
+	 */
+	if (unset) {
+		symbol_conf.use_callchain = false;
+		callchain->mode = CHAIN_NONE;
+		return 0;
+	}
+
+	return parse_callchain_report_opt(arg);
+}
+
+static int setup_callchain(struct perf_evlist *evlist)
+{
+	u64 sample_type = perf_evlist__combined_sample_type(evlist);
+	enum perf_call_graph_mode mode = CALLCHAIN_NONE;
+
+	if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+	    (sample_type & PERF_SAMPLE_STACK_USER)) {
+		mode = CALLCHAIN_DWARF;
+		dwarf_callchain_users = true;
+	} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+		mode = CALLCHAIN_LBR;
+	else if (sample_type & PERF_SAMPLE_CALLCHAIN)
+		mode = CALLCHAIN_FP;
+
+	if (!callchain_param.enabled &&
+	    callchain_param.mode != CHAIN_NONE &&
+	    mode != CALLCHAIN_NONE) {
+		symbol_conf.use_callchain = true;
+		if (callchain_register_param(&callchain_param) < 0) {
+			ui__error("Can't register callchain params.\n");
+			return -EINVAL;
+		}
+	}
+
+	callchain_param.record_mode = mode;
+	callchain_param.min_percent = 0;
+	return 0;
+}
+
+static int setup_display(const char *str)
+{
+	const char *display = str ?: "tot";
+
+	if (!strcmp(display, "tot"))
+		c2c.display = DISPLAY_TOT;
+	else if (!strcmp(display, "rmt"))
+		c2c.display = DISPLAY_RMT;
+	else if (!strcmp(display, "lcl"))
+		c2c.display = DISPLAY_LCL;
+	else {
+		pr_err("failed: unknown display type: %s\n", str);
+		return -1;
+	}
+
+	return 0;
+}
+
+#define for_each_token(__tok, __buf, __sep, __tmp)		\
+	for (__tok = strtok_r(__buf, __sep, &__tmp); __tok;	\
+	     __tok = strtok_r(NULL,  __sep, &__tmp))
+
+static int build_cl_output(char *cl_sort, bool no_source)
+{
+	char *tok, *tmp, *buf = strdup(cl_sort);
+	bool add_pid   = false;
+	bool add_tid   = false;
+	bool add_iaddr = false;
+	bool add_sym   = false;
+	bool add_dso   = false;
+	bool add_src   = false;
+
+	if (!buf)
+		return -ENOMEM;
+
+	for_each_token(tok, buf, ",", tmp) {
+		if (!strcmp(tok, "tid")) {
+			add_tid = true;
+		} else if (!strcmp(tok, "pid")) {
+			add_pid = true;
+		} else if (!strcmp(tok, "iaddr")) {
+			add_iaddr = true;
+			add_sym   = true;
+			add_dso   = true;
+			add_src   = no_source ? false : true;
+		} else if (!strcmp(tok, "dso")) {
+			add_dso = true;
+		} else if (strcmp(tok, "offset")) {
+			pr_err("unrecognized sort token: %s\n", tok);
+			return -EINVAL;
+		}
+	}
+
+	if (asprintf(&c2c.cl_output,
+		"%s%s%s%s%s%s%s%s%s%s",
+		c2c.use_stdio ? "cl_num_empty," : "",
+		"percent_rmt_hitm,"
+		"percent_lcl_hitm,"
+		"percent_stores_l1hit,"
+		"percent_stores_l1miss,"
+		"offset,offset_node,dcacheline_count,",
+		add_pid   ? "pid," : "",
+		add_tid   ? "tid," : "",
+		add_iaddr ? "iaddr," : "",
+		"mean_rmt,"
+		"mean_lcl,"
+		"mean_load,"
+		"tot_recs,"
+		"cpucnt,",
+		add_sym ? "symbol," : "",
+		add_dso ? "dso," : "",
+		add_src ? "cl_srcline," : "",
+		"node") < 0)
+		return -ENOMEM;
+
+	c2c.show_src = add_src;
+
+	free(buf);
+	return 0;
+}
+
+static int setup_coalesce(const char *coalesce, bool no_source)
+{
+	const char *c = coalesce ?: coalesce_default;
+
+	if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
+		return -ENOMEM;
+
+	if (build_cl_output(c2c.cl_sort, no_source))
+		return -1;
+
+	if (asprintf(&c2c.cl_resort, "offset,%s",
+		     c2c.display == DISPLAY_TOT ?
+		     "tot_hitm" :
+		     c2c.display == DISPLAY_RMT ?
+		     "rmt_hitm,lcl_hitm" :
+		     "lcl_hitm,rmt_hitm") < 0)
+		return -ENOMEM;
+
+	pr_debug("coalesce sort   fields: %s\n", c2c.cl_sort);
+	pr_debug("coalesce resort fields: %s\n", c2c.cl_resort);
+	pr_debug("coalesce output fields: %s\n", c2c.cl_output);
+	return 0;
+}
+
+static int perf_c2c__report(int argc, const char **argv)
+{
+	struct perf_session *session;
+	struct ui_progress prog;
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_READ,
+	};
+	char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
+	const char *display = NULL;
+	const char *coalesce = NULL;
+	bool no_source = false;
+	const struct option options[] = {
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_STRING('i', "input", &input_name, "file",
+		   "the input file to process"),
+	OPT_INCR('N', "node-info", &c2c.node_info,
+		 "show extra node info in report (repeat for more info)"),
+#ifdef HAVE_SLANG_SUPPORT
+	OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
+#endif
+	OPT_BOOLEAN(0, "stats", &c2c.stats_only,
+		    "Display only statistic tables (implies --stdio)"),
+	OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
+		    "Display full length of symbols"),
+	OPT_BOOLEAN(0, "no-source", &no_source,
+		    "Do not display Source Line column"),
+	OPT_BOOLEAN(0, "show-all", &c2c.show_all,
+		    "Show all captured HITM lines."),
+	OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+			     "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+			     callchain_help, &parse_callchain_opt,
+			     callchain_default_opt),
+	OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
+	OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
+		   "coalesce fields: pid,tid,iaddr,dso"),
+	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_PARENT(c2c_options),
+	OPT_END()
+	};
+	int err = 0;
+
+	argc = parse_options(argc, argv, options, report_c2c_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (argc)
+		usage_with_options(report_c2c_usage, options);
+
+	if (c2c.stats_only)
+		c2c.use_stdio = true;
+
+	if (!input_name || !strlen(input_name))
+		input_name = "perf.data";
+
+	data.file.path = input_name;
+	data.force     = symbol_conf.force;
+
+	err = setup_display(display);
+	if (err)
+		goto out;
+
+	err = setup_coalesce(coalesce, no_source);
+	if (err) {
+		pr_debug("Failed to initialize hists\n");
+		goto out;
+	}
+
+	err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
+	if (err) {
+		pr_debug("Failed to initialize hists\n");
+		goto out;
+	}
+
+	session = perf_session__new(&data, 0, &c2c.tool);
+	if (session == NULL) {
+		pr_debug("No memory for session\n");
+		goto out;
+	}
+
+	err = setup_nodes(session);
+	if (err) {
+		pr_err("Failed setup nodes\n");
+		goto out;
+	}
+
+	err = mem2node__init(&c2c.mem2node, &session->header.env);
+	if (err)
+		goto out_session;
+
+	err = setup_callchain(session->evlist);
+	if (err)
+		goto out_mem2node;
+
+	if (symbol__init(&session->header.env) < 0)
+		goto out_mem2node;
+
+	/* No pipe support at the moment. */
+	if (perf_data__is_pipe(session->data)) {
+		pr_debug("No pipe support at the moment.\n");
+		goto out_mem2node;
+	}
+
+	if (c2c.use_stdio)
+		use_browser = 0;
+	else
+		use_browser = 1;
+
+	setup_browser(false);
+
+	err = perf_session__process_events(session);
+	if (err) {
+		pr_err("failed to process sample\n");
+		goto out_mem2node;
+	}
+
+	c2c_hists__reinit(&c2c.hists,
+			"cl_idx,"
+			"dcacheline,"
+			"dcacheline_node,"
+			"dcacheline_count,"
+			"tot_recs,"
+			"percent_hitm,"
+			"tot_hitm,lcl_hitm,rmt_hitm,"
+			"stores,stores_l1hit,stores_l1miss,"
+			"dram_lcl,dram_rmt,"
+			"ld_llcmiss,"
+			"tot_loads,"
+			"ld_fbhit,ld_l1hit,ld_l2hit,"
+			"ld_lclhit,ld_rmthit",
+			c2c.display == DISPLAY_TOT ? "tot_hitm" :
+			c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
+			);
+
+	ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
+
+	hists__collapse_resort(&c2c.hists.hists, NULL);
+	hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb);
+	hists__iterate_cb(&c2c.hists.hists, resort_cl_cb);
+
+	ui_progress__finish();
+
+	if (ui_quirks()) {
+		pr_err("failed to setup UI\n");
+		goto out_mem2node;
+	}
+
+	perf_c2c_display(session);
+
+out_mem2node:
+	mem2node__exit(&c2c.mem2node);
+out_session:
+	perf_session__delete(session);
+out:
+	return err;
+}
+
+static int parse_record_events(const struct option *opt,
+			       const char *str, int unset __maybe_unused)
+{
+	bool *event_set = (bool *) opt->value;
+
+	*event_set = true;
+	return perf_mem_events__parse(str);
+}
+
+
+static const char * const __usage_record[] = {
+	"perf c2c record [<options>] [<command>]",
+	"perf c2c record [<options>] -- <command> [<options>]",
+	NULL
+};
+
+static const char * const *record_mem_usage = __usage_record;
+
+static int perf_c2c__record(int argc, const char **argv)
+{
+	int rec_argc, i = 0, j;
+	const char **rec_argv;
+	int ret;
+	bool all_user = false, all_kernel = false;
+	bool event_set = false;
+	struct option options[] = {
+	OPT_CALLBACK('e', "event", &event_set, "event",
+		     "event selector. Use 'perf mem record -e list' to list available events",
+		     parse_record_events),
+	OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
+	OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
+	OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
+	OPT_PARENT(c2c_options),
+	OPT_END()
+	};
+
+	if (perf_mem_events__init()) {
+		pr_err("failed: memory events not supported\n");
+		return -1;
+	}
+
+	argc = parse_options(argc, argv, options, record_mem_usage,
+			     PARSE_OPT_KEEP_UNKNOWN);
+
+	rec_argc = argc + 11; /* max number of arguments */
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	if (!rec_argv)
+		return -1;
+
+	rec_argv[i++] = "record";
+
+	if (!event_set) {
+		perf_mem_events[PERF_MEM_EVENTS__LOAD].record  = true;
+		perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+	}
+
+	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+		rec_argv[i++] = "-W";
+
+	rec_argv[i++] = "-d";
+	rec_argv[i++] = "--phys-data";
+	rec_argv[i++] = "--sample-cpu";
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		if (!perf_mem_events[j].record)
+			continue;
+
+		if (!perf_mem_events[j].supported) {
+			pr_err("failed: event '%s' not supported\n",
+			       perf_mem_events[j].name);
+			free(rec_argv);
+			return -1;
+		}
+
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = perf_mem_events__name(j);
+	};
+
+	if (all_user)
+		rec_argv[i++] = "--all-user";
+
+	if (all_kernel)
+		rec_argv[i++] = "--all-kernel";
+
+	for (j = 0; j < argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	if (verbose > 0) {
+		pr_debug("calling: ");
+
+		j = 0;
+
+		while (rec_argv[j]) {
+			pr_debug("%s ", rec_argv[j]);
+			j++;
+		}
+		pr_debug("\n");
+	}
+
+	ret = cmd_record(i, rec_argv);
+	free(rec_argv);
+	return ret;
+}
+
+int cmd_c2c(int argc, const char **argv)
+{
+	argc = parse_options(argc, argv, c2c_options, c2c_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (!argc)
+		usage_with_options(c2c_usage, c2c_options);
+
+	if (!strncmp(argv[0], "rec", 3)) {
+		return perf_c2c__record(argc, argv);
+	} else if (!strncmp(argv[0], "rep", 3)) {
+		return perf_c2c__report(argc, argv);
+	} else {
+		usage_with_options(c2c_usage, c2c_options);
+	}
+
+	return 0;
+}
diff --git a/builtin-config.c b/builtin-config.c
new file mode 100644
index 0000000..514f70f
--- /dev/null
+++ b/builtin-config.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-config.c
+ *
+ * Copyright (C) 2015, Taeung Song <treeze.taeung@gmail.com>
+ *
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/cache.h"
+#include <subcmd/parse-options.h>
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/config.h"
+#include <linux/string.h>
+
+static bool use_system_config, use_user_config;
+
+static const char * const config_usage[] = {
+	"perf config [<file-option>] [options] [section.name[=value] ...]",
+	NULL
+};
+
+enum actions {
+	ACTION_LIST = 1
+} actions;
+
+static struct option config_options[] = {
+	OPT_SET_UINT('l', "list", &actions,
+		     "show current config variables", ACTION_LIST),
+	OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+	OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"),
+	OPT_END()
+};
+
+static int set_config(struct perf_config_set *set, const char *file_name)
+{
+	struct perf_config_section *section = NULL;
+	struct perf_config_item *item = NULL;
+	const char *first_line = "# this file is auto-generated.";
+	FILE *fp;
+
+	if (set == NULL)
+		return -1;
+
+	fp = fopen(file_name, "w");
+	if (!fp)
+		return -1;
+
+	fprintf(fp, "%s\n", first_line);
+
+	/* overwrite configvariables */
+	perf_config_items__for_each_entry(&set->sections, section) {
+		if (!use_system_config && section->from_system_config)
+			continue;
+		fprintf(fp, "[%s]\n", section->name);
+
+		perf_config_items__for_each_entry(&section->items, item) {
+			if (!use_system_config && item->from_system_config)
+				continue;
+			if (item->value)
+				fprintf(fp, "\t%s = %s\n",
+					item->name, item->value);
+		}
+	}
+	fclose(fp);
+
+	return 0;
+}
+
+static int show_spec_config(struct perf_config_set *set, const char *var)
+{
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	if (set == NULL)
+		return -1;
+
+	perf_config_items__for_each_entry(&set->sections, section) {
+		if (!strstarts(var, section->name))
+			continue;
+
+		perf_config_items__for_each_entry(&section->items, item) {
+			const char *name = var + strlen(section->name) + 1;
+
+			if (strcmp(name, item->name) == 0) {
+				char *value = item->value;
+
+				if (value) {
+					printf("%s=%s\n", var, value);
+					return 0;
+				}
+			}
+
+		}
+	}
+
+	return 0;
+}
+
+static int show_config(struct perf_config_set *set)
+{
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	if (set == NULL)
+		return -1;
+
+	perf_config_set__for_each_entry(set, section, item) {
+		char *value = item->value;
+
+		if (value)
+			printf("%s.%s=%s\n", section->name,
+			       item->name, value);
+	}
+
+	return 0;
+}
+
+static int parse_config_arg(char *arg, char **var, char **value)
+{
+	const char *last_dot = strchr(arg, '.');
+
+	/*
+	 * Since "var" actually contains the section name and the real
+	 * config variable name separated by a dot, we have to know where the dot is.
+	 */
+	if (last_dot == NULL || last_dot == arg) {
+		pr_err("The config variable does not contain a section name: %s\n", arg);
+		return -1;
+	}
+	if (!last_dot[1]) {
+		pr_err("The config variable does not contain a variable name: %s\n", arg);
+		return -1;
+	}
+
+	*value = strchr(arg, '=');
+	if (*value == NULL)
+		*var = arg;
+	else if (!strcmp(*value, "=")) {
+		pr_err("The config variable does not contain a value: %s\n", arg);
+		return -1;
+	} else {
+		*value = *value + 1; /* excluding a first character '=' */
+		*var = strsep(&arg, "=");
+		if (*var[0] == '\0') {
+			pr_err("invalid config variable: %s\n", arg);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int cmd_config(int argc, const char **argv)
+{
+	int i, ret = -1;
+	struct perf_config_set *set;
+	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
+	const char *config_filename;
+	bool changed = false;
+
+	argc = parse_options(argc, argv, config_options, config_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (use_system_config && use_user_config) {
+		pr_err("Error: only one config file at a time\n");
+		parse_options_usage(config_usage, config_options, "user", 0);
+		parse_options_usage(NULL, config_options, "system", 0);
+		return -1;
+	}
+
+	if (use_system_config)
+		config_exclusive_filename = perf_etc_perfconfig();
+	else if (use_user_config)
+		config_exclusive_filename = user_config;
+
+	if (!config_exclusive_filename)
+		config_filename = user_config;
+	else
+		config_filename = config_exclusive_filename;
+
+	/*
+	 * At only 'config' sub-command, individually use the config set
+	 * because of reinitializing with options config file location.
+	 */
+	set = perf_config_set__new();
+	if (!set)
+		goto out_err;
+
+	switch (actions) {
+	case ACTION_LIST:
+		if (argc) {
+			pr_err("Error: takes no arguments\n");
+			parse_options_usage(config_usage, config_options, "l", 1);
+		} else {
+			if (show_config(set) < 0) {
+				pr_err("Nothing configured, "
+				       "please check your %s \n", config_filename);
+				goto out_err;
+			}
+		}
+		break;
+	default:
+		if (!argc) {
+			usage_with_options(config_usage, config_options);
+			break;
+		}
+
+		for (i = 0; argv[i]; i++) {
+			char *var, *value;
+			char *arg = strdup(argv[i]);
+
+			if (!arg) {
+				pr_err("%s: strdup failed\n", __func__);
+				goto out_err;
+			}
+
+			if (parse_config_arg(arg, &var, &value) < 0) {
+				free(arg);
+				goto out_err;
+			}
+
+			if (value == NULL) {
+				if (show_spec_config(set, var) < 0) {
+					pr_err("%s is not configured: %s\n",
+					       var, config_filename);
+					free(arg);
+					goto out_err;
+				}
+			} else {
+				if (perf_config_set__collect(set, config_filename,
+							     var, value) < 0) {
+					pr_err("Failed to add '%s=%s'\n",
+					       var, value);
+					free(arg);
+					goto out_err;
+				}
+				changed = true;
+			}
+			free(arg);
+		}
+
+		if (!changed)
+			break;
+
+		if (set_config(set, config_filename) < 0) {
+			pr_err("Failed to set the configs on %s\n",
+			       config_filename);
+			goto out_err;
+		}
+	}
+
+	ret = 0;
+out_err:
+	perf_config_set__delete(set);
+	return ret;
+}
diff --git a/builtin-data.c b/builtin-data.c
new file mode 100644
index 0000000..dde25d4
--- /dev/null
+++ b/builtin-data.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "builtin.h"
+#include "perf.h"
+#include "debug.h"
+#include <subcmd/parse-options.h>
+#include "data-convert.h"
+#include "data-convert-bt.h"
+
+typedef int (*data_cmd_fn_t)(int argc, const char **argv);
+
+struct data_cmd {
+	const char	*name;
+	const char	*summary;
+	data_cmd_fn_t	fn;
+};
+
+static struct data_cmd data_cmds[];
+
+#define for_each_cmd(cmd) \
+	for (cmd = data_cmds; cmd && cmd->name; cmd++)
+
+static const struct option data_options[] = {
+	OPT_END()
+};
+
+static const char * const data_subcommands[] = { "convert", NULL };
+
+static const char *data_usage[] = {
+	"perf data [<common options>] <command> [<options>]",
+	NULL
+};
+
+static void print_usage(void)
+{
+	struct data_cmd *cmd;
+
+	printf("Usage:\n");
+	printf("\t%s\n\n", data_usage[0]);
+	printf("\tAvailable commands:\n");
+
+	for_each_cmd(cmd) {
+		printf("\t %s\t- %s\n", cmd->name, cmd->summary);
+	}
+
+	printf("\n");
+}
+
+static const char * const data_convert_usage[] = {
+	"perf data convert [<options>]",
+	NULL
+};
+
+static int cmd_data_convert(int argc, const char **argv)
+{
+	const char *to_ctf     = NULL;
+	struct perf_data_convert_opts opts = {
+		.force = false,
+		.all = false,
+	};
+	const struct option options[] = {
+		OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+		OPT_STRING('i', "input", &input_name, "file", "input file name"),
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+		OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+#endif
+		OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
+		OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
+		OPT_END()
+	};
+
+#ifndef HAVE_LIBBABELTRACE_SUPPORT
+	pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
+	return -1;
+#endif
+
+	argc = parse_options(argc, argv, options,
+			     data_convert_usage, 0);
+	if (argc) {
+		usage_with_options(data_convert_usage, options);
+		return -1;
+	}
+
+	if (to_ctf) {
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+		return bt_convert__perf2ctf(input_name, to_ctf, &opts);
+#else
+		pr_err("The libbabeltrace support is not compiled in.\n");
+		return -1;
+#endif
+	}
+
+	return 0;
+}
+
+static struct data_cmd data_cmds[] = {
+	{ "convert", "converts data file between formats", cmd_data_convert },
+	{ .name = NULL, },
+};
+
+int cmd_data(int argc, const char **argv)
+{
+	struct data_cmd *cmd;
+	const char *cmdstr;
+
+	/* No command specified. */
+	if (argc < 2)
+		goto usage;
+
+	argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (argc < 1)
+		goto usage;
+
+	cmdstr = argv[0];
+
+	for_each_cmd(cmd) {
+		if (strcmp(cmd->name, cmdstr))
+			continue;
+
+		return cmd->fn(argc, argv);
+	}
+
+	pr_err("Unknown command: %s\n", cmdstr);
+usage:
+	print_usage();
+	return -1;
+}
diff --git a/builtin-diff.c b/builtin-diff.c
new file mode 100644
index 0000000..d660cb7
--- /dev/null
+++ b/builtin-diff.c
@@ -0,0 +1,1363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-diff.c
+ *
+ * Builtin diff command: Analyze two perf.data input files, look up and read
+ * DSOs and symbol information, sort them and produce a diff.
+ */
+#include "builtin.h"
+
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/hist.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/sort.h"
+#include "util/symbol.h"
+#include "util/util.h"
+#include "util/data.h"
+#include "util/config.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Diff command specific HPP columns. */
+enum {
+	PERF_HPP_DIFF__BASELINE,
+	PERF_HPP_DIFF__PERIOD,
+	PERF_HPP_DIFF__PERIOD_BASELINE,
+	PERF_HPP_DIFF__DELTA,
+	PERF_HPP_DIFF__RATIO,
+	PERF_HPP_DIFF__WEIGHTED_DIFF,
+	PERF_HPP_DIFF__FORMULA,
+	PERF_HPP_DIFF__DELTA_ABS,
+
+	PERF_HPP_DIFF__MAX_INDEX
+};
+
+struct diff_hpp_fmt {
+	struct perf_hpp_fmt	 fmt;
+	int			 idx;
+	char			*header;
+	int			 header_width;
+};
+
+struct data__file {
+	struct perf_session	*session;
+	struct perf_data	 data;
+	int			 idx;
+	struct hists		*hists;
+	struct diff_hpp_fmt	 fmt[PERF_HPP_DIFF__MAX_INDEX];
+};
+
+static struct data__file *data__files;
+static int data__files_cnt;
+
+#define data__for_each_file_start(i, d, s)	\
+	for (i = s, d = &data__files[s];	\
+	     i < data__files_cnt;		\
+	     i++, d = &data__files[i])
+
+#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
+#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
+
+static bool force;
+static bool show_period;
+static bool show_formula;
+static bool show_baseline_only;
+static unsigned int sort_compute = 1;
+
+static s64 compute_wdiff_w1;
+static s64 compute_wdiff_w2;
+
+enum {
+	COMPUTE_DELTA,
+	COMPUTE_RATIO,
+	COMPUTE_WEIGHTED_DIFF,
+	COMPUTE_DELTA_ABS,
+	COMPUTE_MAX,
+};
+
+const char *compute_names[COMPUTE_MAX] = {
+	[COMPUTE_DELTA] = "delta",
+	[COMPUTE_DELTA_ABS] = "delta-abs",
+	[COMPUTE_RATIO] = "ratio",
+	[COMPUTE_WEIGHTED_DIFF] = "wdiff",
+};
+
+static int compute = COMPUTE_DELTA_ABS;
+
+static int compute_2_hpp[COMPUTE_MAX] = {
+	[COMPUTE_DELTA]		= PERF_HPP_DIFF__DELTA,
+	[COMPUTE_DELTA_ABS]	= PERF_HPP_DIFF__DELTA_ABS,
+	[COMPUTE_RATIO]		= PERF_HPP_DIFF__RATIO,
+	[COMPUTE_WEIGHTED_DIFF]	= PERF_HPP_DIFF__WEIGHTED_DIFF,
+};
+
+#define MAX_COL_WIDTH 70
+
+static struct header_column {
+	const char *name;
+	int width;
+} columns[PERF_HPP_DIFF__MAX_INDEX] = {
+	[PERF_HPP_DIFF__BASELINE] = {
+		.name  = "Baseline",
+	},
+	[PERF_HPP_DIFF__PERIOD] = {
+		.name  = "Period",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__PERIOD_BASELINE] = {
+		.name  = "Base period",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__DELTA] = {
+		.name  = "Delta",
+		.width = 7,
+	},
+	[PERF_HPP_DIFF__DELTA_ABS] = {
+		.name  = "Delta Abs",
+		.width = 7,
+	},
+	[PERF_HPP_DIFF__RATIO] = {
+		.name  = "Ratio",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__WEIGHTED_DIFF] = {
+		.name  = "Weighted diff",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__FORMULA] = {
+		.name  = "Formula",
+		.width = MAX_COL_WIDTH,
+	}
+};
+
+static int setup_compute_opt_wdiff(char *opt)
+{
+	char *w1_str = opt;
+	char *w2_str;
+
+	int ret = -EINVAL;
+
+	if (!opt)
+		goto out;
+
+	w2_str = strchr(opt, ',');
+	if (!w2_str)
+		goto out;
+
+	*w2_str++ = 0x0;
+	if (!*w2_str)
+		goto out;
+
+	compute_wdiff_w1 = strtol(w1_str, NULL, 10);
+	compute_wdiff_w2 = strtol(w2_str, NULL, 10);
+
+	if (!compute_wdiff_w1 || !compute_wdiff_w2)
+		goto out;
+
+	pr_debug("compute wdiff w1(%" PRId64 ") w2(%" PRId64 ")\n",
+		  compute_wdiff_w1, compute_wdiff_w2);
+
+	ret = 0;
+
+ out:
+	if (ret)
+		pr_err("Failed: wrong weight data, use 'wdiff:w1,w2'\n");
+
+	return ret;
+}
+
+static int setup_compute_opt(char *opt)
+{
+	if (compute == COMPUTE_WEIGHTED_DIFF)
+		return setup_compute_opt_wdiff(opt);
+
+	if (opt) {
+		pr_err("Failed: extra option specified '%s'", opt);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int setup_compute(const struct option *opt, const char *str,
+			 int unset __maybe_unused)
+{
+	int *cp = (int *) opt->value;
+	char *cstr = (char *) str;
+	char buf[50];
+	unsigned i;
+	char *option;
+
+	if (!str) {
+		*cp = COMPUTE_DELTA;
+		return 0;
+	}
+
+	option = strchr(str, ':');
+	if (option) {
+		unsigned len = option++ - str;
+
+		/*
+		 * The str data are not writeable, so we need
+		 * to use another buffer.
+		 */
+
+		/* No option value is longer. */
+		if (len >= sizeof(buf))
+			return -EINVAL;
+
+		strncpy(buf, str, len);
+		buf[len] = 0x0;
+		cstr = buf;
+	}
+
+	for (i = 0; i < COMPUTE_MAX; i++)
+		if (!strcmp(cstr, compute_names[i])) {
+			*cp = i;
+			return setup_compute_opt(option);
+		}
+
+	pr_err("Failed: '%s' is not computation method "
+	       "(use 'delta','ratio' or 'wdiff')\n", str);
+	return -EINVAL;
+}
+
+static double period_percent(struct hist_entry *he, u64 period)
+{
+	u64 total = hists__total_period(he->hists);
+
+	return (period * 100.0) / total;
+}
+
+static double compute_delta(struct hist_entry *he, struct hist_entry *pair)
+{
+	double old_percent = period_percent(he, he->stat.period);
+	double new_percent = period_percent(pair, pair->stat.period);
+
+	pair->diff.period_ratio_delta = new_percent - old_percent;
+	pair->diff.computed = true;
+	return pair->diff.period_ratio_delta;
+}
+
+static double compute_ratio(struct hist_entry *he, struct hist_entry *pair)
+{
+	double old_period = he->stat.period ?: 1;
+	double new_period = pair->stat.period;
+
+	pair->diff.computed = true;
+	pair->diff.period_ratio = new_period / old_period;
+	return pair->diff.period_ratio;
+}
+
+static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
+{
+	u64 old_period = he->stat.period;
+	u64 new_period = pair->stat.period;
+
+	pair->diff.computed = true;
+	pair->diff.wdiff = new_period * compute_wdiff_w2 -
+			   old_period * compute_wdiff_w1;
+
+	return pair->diff.wdiff;
+}
+
+static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
+			 char *buf, size_t size)
+{
+	u64 he_total = he->hists->stats.total_period;
+	u64 pair_total = pair->hists->stats.total_period;
+
+	if (symbol_conf.filter_relative) {
+		he_total = he->hists->stats.total_non_filtered_period;
+		pair_total = pair->hists->stats.total_non_filtered_period;
+	}
+	return scnprintf(buf, size,
+			 "(%" PRIu64 " * 100 / %" PRIu64 ") - "
+			 "(%" PRIu64 " * 100 / %" PRIu64 ")",
+			 pair->stat.period, pair_total,
+			 he->stat.period, he_total);
+}
+
+static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
+			 char *buf, size_t size)
+{
+	double old_period = he->stat.period;
+	double new_period = pair->stat.period;
+
+	return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
+}
+
+static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
+			 char *buf, size_t size)
+{
+	u64 old_period = he->stat.period;
+	u64 new_period = pair->stat.period;
+
+	return scnprintf(buf, size,
+		  "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
+		  new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
+}
+
+static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
+			   char *buf, size_t size)
+{
+	switch (compute) {
+	case COMPUTE_DELTA:
+	case COMPUTE_DELTA_ABS:
+		return formula_delta(he, pair, buf, size);
+	case COMPUTE_RATIO:
+		return formula_ratio(he, pair, buf, size);
+	case COMPUTE_WEIGHTED_DIFF:
+		return formula_wdiff(he, pair, buf, size);
+	default:
+		BUG_ON(1);
+	}
+
+	return -1;
+}
+
+static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
+				      union perf_event *event,
+				      struct perf_sample *sample,
+				      struct perf_evsel *evsel,
+				      struct machine *machine)
+{
+	struct addr_location al;
+	struct hists *hists = evsel__hists(evsel);
+	int ret = -1;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_warning("problem processing %d event, skipping it.\n",
+			   event->header.type);
+		return -1;
+	}
+
+	if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) {
+		pr_warning("problem incrementing symbol period, skipping event\n");
+		goto out_put;
+	}
+
+	/*
+	 * The total_period is updated here before going to the output
+	 * tree since normally only the baseline hists will call
+	 * hists__output_resort() and precompute needs the total
+	 * period in order to sort entries by percentage delta.
+	 */
+	hists->stats.total_period += sample->period;
+	if (!al.filtered)
+		hists->stats.total_non_filtered_period += sample->period;
+	ret = 0;
+out_put:
+	addr_location__put(&al);
+	return ret;
+}
+
+static struct perf_tool tool = {
+	.sample	= diff__process_sample_event,
+	.mmap	= perf_event__process_mmap,
+	.mmap2	= perf_event__process_mmap2,
+	.comm	= perf_event__process_comm,
+	.exit	= perf_event__process_exit,
+	.fork	= perf_event__process_fork,
+	.lost	= perf_event__process_lost,
+	.namespaces = perf_event__process_namespaces,
+	.ordered_events = true,
+	.ordering_requires_timestamps = true,
+};
+
+static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
+				      struct perf_evlist *evlist)
+{
+	struct perf_evsel *e;
+
+	evlist__for_each_entry(evlist, e) {
+		if (perf_evsel__match2(evsel, e))
+			return e;
+	}
+
+	return NULL;
+}
+
+static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		struct hists *hists = evsel__hists(evsel);
+
+		hists__collapse_resort(hists, NULL);
+	}
+}
+
+static struct data__file *fmt_to_data_file(struct perf_hpp_fmt *fmt)
+{
+	struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt);
+	void *ptr = dfmt - dfmt->idx;
+	struct data__file *d = container_of(ptr, struct data__file, fmt);
+
+	return d;
+}
+
+static struct hist_entry*
+get_pair_data(struct hist_entry *he, struct data__file *d)
+{
+	if (hist_entry__has_pairs(he)) {
+		struct hist_entry *pair;
+
+		list_for_each_entry(pair, &he->pairs.head, pairs.node)
+			if (pair->hists == d->hists)
+				return pair;
+	}
+
+	return NULL;
+}
+
+static struct hist_entry*
+get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt)
+{
+	struct data__file *d = fmt_to_data_file(&dfmt->fmt);
+
+	return get_pair_data(he, d);
+}
+
+static void hists__baseline_only(struct hists *hists)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	next = rb_first(root);
+	while (next != NULL) {
+		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
+
+		next = rb_next(&he->rb_node_in);
+		if (!hist_entry__next_pair(he)) {
+			rb_erase(&he->rb_node_in, root);
+			hist_entry__delete(he);
+		}
+	}
+}
+
+static void hists__precompute(struct hists *hists)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	next = rb_first(root);
+	while (next != NULL) {
+		struct hist_entry *he, *pair;
+		struct data__file *d;
+		int i;
+
+		he   = rb_entry(next, struct hist_entry, rb_node_in);
+		next = rb_next(&he->rb_node_in);
+
+		data__for_each_file_new(i, d) {
+			pair = get_pair_data(he, d);
+			if (!pair)
+				continue;
+
+			switch (compute) {
+			case COMPUTE_DELTA:
+			case COMPUTE_DELTA_ABS:
+				compute_delta(he, pair);
+				break;
+			case COMPUTE_RATIO:
+				compute_ratio(he, pair);
+				break;
+			case COMPUTE_WEIGHTED_DIFF:
+				compute_wdiff(he, pair);
+				break;
+			default:
+				BUG_ON(1);
+			}
+		}
+	}
+}
+
+static int64_t cmp_doubles(double l, double r)
+{
+	if (l > r)
+		return -1;
+	else if (l < r)
+		return 1;
+	else
+		return 0;
+}
+
+static int64_t
+__hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+			int c)
+{
+	switch (c) {
+	case COMPUTE_DELTA:
+	{
+		double l = left->diff.period_ratio_delta;
+		double r = right->diff.period_ratio_delta;
+
+		return cmp_doubles(l, r);
+	}
+	case COMPUTE_DELTA_ABS:
+	{
+		double l = fabs(left->diff.period_ratio_delta);
+		double r = fabs(right->diff.period_ratio_delta);
+
+		return cmp_doubles(l, r);
+	}
+	case COMPUTE_RATIO:
+	{
+		double l = left->diff.period_ratio;
+		double r = right->diff.period_ratio;
+
+		return cmp_doubles(l, r);
+	}
+	case COMPUTE_WEIGHTED_DIFF:
+	{
+		s64 l = left->diff.wdiff;
+		s64 r = right->diff.wdiff;
+
+		return r - l;
+	}
+	default:
+		BUG_ON(1);
+	}
+
+	return 0;
+}
+
+static int64_t
+hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+			int c, int sort_idx)
+{
+	bool pairs_left  = hist_entry__has_pairs(left);
+	bool pairs_right = hist_entry__has_pairs(right);
+	struct hist_entry *p_right, *p_left;
+
+	if (!pairs_left && !pairs_right)
+		return 0;
+
+	if (!pairs_left || !pairs_right)
+		return pairs_left ? -1 : 1;
+
+	p_left  = get_pair_data(left,  &data__files[sort_idx]);
+	p_right = get_pair_data(right, &data__files[sort_idx]);
+
+	if (!p_left && !p_right)
+		return 0;
+
+	if (!p_left || !p_right)
+		return p_left ? -1 : 1;
+
+	/*
+	 * We have 2 entries of same kind, let's
+	 * make the data comparison.
+	 */
+	return __hist_entry__cmp_compute(p_left, p_right, c);
+}
+
+static int64_t
+hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right,
+			    int c, int sort_idx)
+{
+	struct hist_entry *p_right, *p_left;
+
+	p_left  = get_pair_data(left,  &data__files[sort_idx]);
+	p_right = get_pair_data(right, &data__files[sort_idx]);
+
+	if (!p_left && !p_right)
+		return 0;
+
+	if (!p_left || !p_right)
+		return p_left ? -1 : 1;
+
+	if (c != COMPUTE_DELTA && c != COMPUTE_DELTA_ABS) {
+		/*
+		 * The delta can be computed without the baseline, but
+		 * others are not.  Put those entries which have no
+		 * values below.
+		 */
+		if (left->dummy && right->dummy)
+			return 0;
+
+		if (left->dummy || right->dummy)
+			return left->dummy ? 1 : -1;
+	}
+
+	return __hist_entry__cmp_compute(p_left, p_right, c);
+}
+
+static int64_t
+hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused,
+		    struct hist_entry *left __maybe_unused,
+		    struct hist_entry *right __maybe_unused)
+{
+	return 0;
+}
+
+static int64_t
+hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
+			 struct hist_entry *left, struct hist_entry *right)
+{
+	if (left->stat.period == right->stat.period)
+		return 0;
+	return left->stat.period > right->stat.period ? 1 : -1;
+}
+
+static int64_t
+hist_entry__cmp_delta(struct perf_hpp_fmt *fmt,
+		      struct hist_entry *left, struct hist_entry *right)
+{
+	struct data__file *d = fmt_to_data_file(fmt);
+
+	return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_delta_abs(struct perf_hpp_fmt *fmt,
+		      struct hist_entry *left, struct hist_entry *right)
+{
+	struct data__file *d = fmt_to_data_file(fmt);
+
+	return hist_entry__cmp_compute(right, left, COMPUTE_DELTA_ABS, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt,
+		      struct hist_entry *left, struct hist_entry *right)
+{
+	struct data__file *d = fmt_to_data_file(fmt);
+
+	return hist_entry__cmp_compute(right, left, COMPUTE_RATIO, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt,
+		      struct hist_entry *left, struct hist_entry *right)
+{
+	struct data__file *d = fmt_to_data_file(fmt);
+
+	return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+			  struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA,
+					   sort_compute);
+}
+
+static int64_t
+hist_entry__cmp_delta_abs_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+			      struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA_ABS,
+					   sort_compute);
+}
+
+static int64_t
+hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+			  struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_compute_idx(right, left, COMPUTE_RATIO,
+					   sort_compute);
+}
+
+static int64_t
+hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+			  struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_compute_idx(right, left, COMPUTE_WEIGHTED_DIFF,
+					   sort_compute);
+}
+
+static void hists__process(struct hists *hists)
+{
+	if (show_baseline_only)
+		hists__baseline_only(hists);
+
+	hists__precompute(hists);
+	hists__output_resort(hists, NULL);
+
+	hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
+		       symbol_conf.use_callchain);
+}
+
+static void data__fprintf(void)
+{
+	struct data__file *d;
+	int i;
+
+	fprintf(stdout, "# Data files:\n");
+
+	data__for_each_file(i, d)
+		fprintf(stdout, "#  [%d] %s %s\n",
+			d->idx, d->data.file.path,
+			!d->idx ? "(Baseline)" : "");
+
+	fprintf(stdout, "#\n");
+}
+
+static void data_process(void)
+{
+	struct perf_evlist *evlist_base = data__files[0].session->evlist;
+	struct perf_evsel *evsel_base;
+	bool first = true;
+
+	evlist__for_each_entry(evlist_base, evsel_base) {
+		struct hists *hists_base = evsel__hists(evsel_base);
+		struct data__file *d;
+		int i;
+
+		data__for_each_file_new(i, d) {
+			struct perf_evlist *evlist = d->session->evlist;
+			struct perf_evsel *evsel;
+			struct hists *hists;
+
+			evsel = evsel_match(evsel_base, evlist);
+			if (!evsel)
+				continue;
+
+			hists = evsel__hists(evsel);
+			d->hists = hists;
+
+			hists__match(hists_base, hists);
+
+			if (!show_baseline_only)
+				hists__link(hists_base, hists);
+		}
+
+		if (!quiet) {
+			fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+				perf_evsel__name(evsel_base));
+		}
+
+		first = false;
+
+		if (verbose > 0 || ((data__files_cnt > 2) && !quiet))
+			data__fprintf();
+
+		/* Don't sort callchain for perf diff */
+		perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
+		hists__process(hists_base);
+	}
+}
+
+static void data__free(struct data__file *d)
+{
+	int col;
+
+	for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) {
+		struct diff_hpp_fmt *fmt = &d->fmt[col];
+
+		zfree(&fmt->header);
+	}
+}
+
+static int __cmd_diff(void)
+{
+	struct data__file *d;
+	int ret = -EINVAL, i;
+
+	data__for_each_file(i, d) {
+		d->session = perf_session__new(&d->data, false, &tool);
+		if (!d->session) {
+			pr_err("Failed to open %s\n", d->data.file.path);
+			ret = -1;
+			goto out_delete;
+		}
+
+		ret = perf_session__process_events(d->session);
+		if (ret) {
+			pr_err("Failed to process %s\n", d->data.file.path);
+			goto out_delete;
+		}
+
+		perf_evlist__collapse_resort(d->session->evlist);
+	}
+
+	data_process();
+
+ out_delete:
+	data__for_each_file(i, d) {
+		perf_session__delete(d->session);
+		data__free(d);
+	}
+
+	free(data__files);
+	return ret;
+}
+
+static const char * const diff_usage[] = {
+	"perf diff [<options>] [old_file] [new_file]",
+	NULL,
+};
+
+static const struct option options[] = {
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
+		    "Show only items with match in baseline"),
+	OPT_CALLBACK('c', "compute", &compute,
+		     "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)",
+		     "Entries differential computation selection",
+		     setup_compute),
+	OPT_BOOLEAN('p', "period", &show_period,
+		    "Show period values."),
+	OPT_BOOLEAN('F', "formula", &show_formula,
+		    "Show formula."),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+		   "file", "kallsyms pathname"),
+	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
+	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+		   "only consider symbols in these dsos"),
+	OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+		   "only consider symbols in these comms"),
+	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+		   "only consider these symbols"),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+		   " Please refer the man page for the complete list."),
+	OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
+		   "separator for columns, no spaces will be added between "
+		   "columns '.' is reserved."),
+	OPT_CALLBACK(0, "symfs", NULL, "directory",
+		     "Look for files with symbols relative to this directory",
+		     symbol__config_symfs),
+	OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
+	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+		     "How to display percentage of filtered entries", parse_filter_percentage),
+	OPT_END()
+};
+
+static double baseline_percent(struct hist_entry *he)
+{
+	u64 total = hists__total_period(he->hists);
+
+	return 100.0 * he->stat.period / total;
+}
+
+static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
+			       struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+	double percent = baseline_percent(he);
+	char pfmt[20] = " ";
+
+	if (!he->dummy) {
+		scnprintf(pfmt, 20, "%%%d.2f%%%%", dfmt->header_width - 1);
+		return percent_color_snprintf(hpp->buf, hpp->size,
+					      pfmt, percent);
+	} else
+		return scnprintf(hpp->buf, hpp->size, "%*s",
+				 dfmt->header_width, pfmt);
+}
+
+static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size)
+{
+	double percent = baseline_percent(he);
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+	int ret = 0;
+
+	if (!he->dummy)
+		ret = scnprintf(buf, size, fmt, percent);
+
+	return ret;
+}
+
+static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
+				struct perf_hpp *hpp, struct hist_entry *he,
+				int comparison_method)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+	struct hist_entry *pair = get_pair_fmt(he, dfmt);
+	double diff;
+	s64 wdiff;
+	char pfmt[20] = " ";
+
+	if (!pair)
+		goto no_print;
+
+	switch (comparison_method) {
+	case COMPUTE_DELTA:
+		if (pair->diff.computed)
+			diff = pair->diff.period_ratio_delta;
+		else
+			diff = compute_delta(he, pair);
+
+		scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1);
+		return percent_color_snprintf(hpp->buf, hpp->size,
+					pfmt, diff);
+	case COMPUTE_RATIO:
+		if (he->dummy)
+			goto dummy_print;
+		if (pair->diff.computed)
+			diff = pair->diff.period_ratio;
+		else
+			diff = compute_ratio(he, pair);
+
+		scnprintf(pfmt, 20, "%%%d.6f", dfmt->header_width);
+		return value_color_snprintf(hpp->buf, hpp->size,
+					pfmt, diff);
+	case COMPUTE_WEIGHTED_DIFF:
+		if (he->dummy)
+			goto dummy_print;
+		if (pair->diff.computed)
+			wdiff = pair->diff.wdiff;
+		else
+			wdiff = compute_wdiff(he, pair);
+
+		scnprintf(pfmt, 20, "%%14ld", dfmt->header_width);
+		return color_snprintf(hpp->buf, hpp->size,
+				get_percent_color(wdiff),
+				pfmt, wdiff);
+	default:
+		BUG_ON(1);
+	}
+dummy_print:
+	return scnprintf(hpp->buf, hpp->size, "%*s",
+			dfmt->header_width, "N/A");
+no_print:
+	return scnprintf(hpp->buf, hpp->size, "%*s",
+			dfmt->header_width, pfmt);
+}
+
+static int hpp__color_delta(struct perf_hpp_fmt *fmt,
+			struct perf_hpp *hpp, struct hist_entry *he)
+{
+	return __hpp__color_compare(fmt, hpp, he, COMPUTE_DELTA);
+}
+
+static int hpp__color_ratio(struct perf_hpp_fmt *fmt,
+			struct perf_hpp *hpp, struct hist_entry *he)
+{
+	return __hpp__color_compare(fmt, hpp, he, COMPUTE_RATIO);
+}
+
+static int hpp__color_wdiff(struct perf_hpp_fmt *fmt,
+			struct perf_hpp *hpp, struct hist_entry *he)
+{
+	return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF);
+}
+
+static void
+hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
+{
+	switch (idx) {
+	case PERF_HPP_DIFF__PERIOD_BASELINE:
+		scnprintf(buf, size, "%" PRIu64, he->stat.period);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void
+hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
+		int idx, char *buf, size_t size)
+{
+	double diff;
+	double ratio;
+	s64 wdiff;
+
+	switch (idx) {
+	case PERF_HPP_DIFF__DELTA:
+	case PERF_HPP_DIFF__DELTA_ABS:
+		if (pair->diff.computed)
+			diff = pair->diff.period_ratio_delta;
+		else
+			diff = compute_delta(he, pair);
+
+		scnprintf(buf, size, "%+4.2F%%", diff);
+		break;
+
+	case PERF_HPP_DIFF__RATIO:
+		/* No point for ratio number if we are dummy.. */
+		if (he->dummy) {
+			scnprintf(buf, size, "N/A");
+			break;
+		}
+
+		if (pair->diff.computed)
+			ratio = pair->diff.period_ratio;
+		else
+			ratio = compute_ratio(he, pair);
+
+		if (ratio > 0.0)
+			scnprintf(buf, size, "%14.6F", ratio);
+		break;
+
+	case PERF_HPP_DIFF__WEIGHTED_DIFF:
+		/* No point for wdiff number if we are dummy.. */
+		if (he->dummy) {
+			scnprintf(buf, size, "N/A");
+			break;
+		}
+
+		if (pair->diff.computed)
+			wdiff = pair->diff.wdiff;
+		else
+			wdiff = compute_wdiff(he, pair);
+
+		if (wdiff != 0)
+			scnprintf(buf, size, "%14ld", wdiff);
+		break;
+
+	case PERF_HPP_DIFF__FORMULA:
+		formula_fprintf(he, pair, buf, size);
+		break;
+
+	case PERF_HPP_DIFF__PERIOD:
+		scnprintf(buf, size, "%" PRIu64, pair->stat.period);
+		break;
+
+	default:
+		BUG_ON(1);
+	};
+}
+
+static void
+__hpp__entry_global(struct hist_entry *he, struct diff_hpp_fmt *dfmt,
+		    char *buf, size_t size)
+{
+	struct hist_entry *pair = get_pair_fmt(he, dfmt);
+	int idx = dfmt->idx;
+
+	/* baseline is special */
+	if (idx == PERF_HPP_DIFF__BASELINE)
+		hpp__entry_baseline(he, buf, size);
+	else {
+		if (pair)
+			hpp__entry_pair(he, pair, idx, buf, size);
+		else
+			hpp__entry_unpair(he, idx, buf, size);
+	}
+}
+
+static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(_fmt, struct diff_hpp_fmt, fmt);
+	char buf[MAX_COL_WIDTH] = " ";
+
+	__hpp__entry_global(he, dfmt, buf, MAX_COL_WIDTH);
+
+	if (symbol_conf.field_sep)
+		return scnprintf(hpp->buf, hpp->size, "%s", buf);
+	else
+		return scnprintf(hpp->buf, hpp->size, "%*s",
+				 dfmt->header_width, buf);
+}
+
+static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		       struct hists *hists __maybe_unused,
+		       int line __maybe_unused,
+		       int *span __maybe_unused)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+
+	BUG_ON(!dfmt->header);
+	return scnprintf(hpp->buf, hpp->size, dfmt->header);
+}
+
+static int hpp__width(struct perf_hpp_fmt *fmt,
+		      struct perf_hpp *hpp __maybe_unused,
+		      struct hists *hists __maybe_unused)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+
+	BUG_ON(dfmt->header_width <= 0);
+	return dfmt->header_width;
+}
+
+static void init_header(struct data__file *d, struct diff_hpp_fmt *dfmt)
+{
+#define MAX_HEADER_NAME 100
+	char buf_indent[MAX_HEADER_NAME];
+	char buf[MAX_HEADER_NAME];
+	const char *header = NULL;
+	int width = 0;
+
+	BUG_ON(dfmt->idx >= PERF_HPP_DIFF__MAX_INDEX);
+	header = columns[dfmt->idx].name;
+	width  = columns[dfmt->idx].width;
+
+	/* Only our defined HPP fmts should appear here. */
+	BUG_ON(!header);
+
+	if (data__files_cnt > 2)
+		scnprintf(buf, MAX_HEADER_NAME, "%s/%d", header, d->idx);
+
+#define NAME (data__files_cnt > 2 ? buf : header)
+	dfmt->header_width = width;
+	width = (int) strlen(NAME);
+	if (dfmt->header_width < width)
+		dfmt->header_width = width;
+
+	scnprintf(buf_indent, MAX_HEADER_NAME, "%*s",
+		  dfmt->header_width, NAME);
+
+	dfmt->header = strdup(buf_indent);
+#undef MAX_HEADER_NAME
+#undef NAME
+}
+
+static void data__hpp_register(struct data__file *d, int idx)
+{
+	struct diff_hpp_fmt *dfmt = &d->fmt[idx];
+	struct perf_hpp_fmt *fmt = &dfmt->fmt;
+
+	dfmt->idx = idx;
+
+	fmt->header = hpp__header;
+	fmt->width  = hpp__width;
+	fmt->entry  = hpp__entry_global;
+	fmt->cmp    = hist_entry__cmp_nop;
+	fmt->collapse = hist_entry__cmp_nop;
+
+	/* TODO more colors */
+	switch (idx) {
+	case PERF_HPP_DIFF__BASELINE:
+		fmt->color = hpp__color_baseline;
+		fmt->sort  = hist_entry__cmp_baseline;
+		break;
+	case PERF_HPP_DIFF__DELTA:
+		fmt->color = hpp__color_delta;
+		fmt->sort  = hist_entry__cmp_delta;
+		break;
+	case PERF_HPP_DIFF__RATIO:
+		fmt->color = hpp__color_ratio;
+		fmt->sort  = hist_entry__cmp_ratio;
+		break;
+	case PERF_HPP_DIFF__WEIGHTED_DIFF:
+		fmt->color = hpp__color_wdiff;
+		fmt->sort  = hist_entry__cmp_wdiff;
+		break;
+	case PERF_HPP_DIFF__DELTA_ABS:
+		fmt->color = hpp__color_delta;
+		fmt->sort  = hist_entry__cmp_delta_abs;
+		break;
+	default:
+		fmt->sort  = hist_entry__cmp_nop;
+		break;
+	}
+
+	init_header(d, dfmt);
+	perf_hpp__column_register(fmt);
+	perf_hpp__register_sort_field(fmt);
+}
+
+static int ui_init(void)
+{
+	struct data__file *d;
+	struct perf_hpp_fmt *fmt;
+	int i;
+
+	data__for_each_file(i, d) {
+
+		/*
+		 * Baseline or compute realted columns:
+		 *
+		 *   PERF_HPP_DIFF__BASELINE
+		 *   PERF_HPP_DIFF__DELTA
+		 *   PERF_HPP_DIFF__RATIO
+		 *   PERF_HPP_DIFF__WEIGHTED_DIFF
+		 */
+		data__hpp_register(d, i ? compute_2_hpp[compute] :
+					  PERF_HPP_DIFF__BASELINE);
+
+		/*
+		 * And the rest:
+		 *
+		 * PERF_HPP_DIFF__FORMULA
+		 * PERF_HPP_DIFF__PERIOD
+		 * PERF_HPP_DIFF__PERIOD_BASELINE
+		 */
+		if (show_formula && i)
+			data__hpp_register(d, PERF_HPP_DIFF__FORMULA);
+
+		if (show_period)
+			data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD :
+						  PERF_HPP_DIFF__PERIOD_BASELINE);
+	}
+
+	if (!sort_compute)
+		return 0;
+
+	/*
+	 * Prepend an fmt to sort on columns at 'sort_compute' first.
+	 * This fmt is added only to the sort list but not to the
+	 * output fields list.
+	 *
+	 * Note that this column (data) can be compared twice - one
+	 * for this 'sort_compute' fmt and another for the normal
+	 * diff_hpp_fmt.  But it shouldn't a problem as most entries
+	 * will be sorted out by first try or baseline and comparing
+	 * is not a costly operation.
+	 */
+	fmt = zalloc(sizeof(*fmt));
+	if (fmt == NULL) {
+		pr_err("Memory allocation failed\n");
+		return -1;
+	}
+
+	fmt->cmp      = hist_entry__cmp_nop;
+	fmt->collapse = hist_entry__cmp_nop;
+
+	switch (compute) {
+	case COMPUTE_DELTA:
+		fmt->sort = hist_entry__cmp_delta_idx;
+		break;
+	case COMPUTE_RATIO:
+		fmt->sort = hist_entry__cmp_ratio_idx;
+		break;
+	case COMPUTE_WEIGHTED_DIFF:
+		fmt->sort = hist_entry__cmp_wdiff_idx;
+		break;
+	case COMPUTE_DELTA_ABS:
+		fmt->sort = hist_entry__cmp_delta_abs_idx;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	perf_hpp__prepend_sort_field(fmt);
+	return 0;
+}
+
+static int data_init(int argc, const char **argv)
+{
+	struct data__file *d;
+	static const char *defaults[] = {
+		"perf.data.old",
+		"perf.data",
+	};
+	bool use_default = true;
+	int i;
+
+	data__files_cnt = 2;
+
+	if (argc) {
+		if (argc == 1)
+			defaults[1] = argv[0];
+		else {
+			data__files_cnt = argc;
+			use_default = false;
+		}
+	} else if (perf_guest) {
+		defaults[0] = "perf.data.host";
+		defaults[1] = "perf.data.guest";
+	}
+
+	if (sort_compute >= (unsigned int) data__files_cnt) {
+		pr_err("Order option out of limit.\n");
+		return -EINVAL;
+	}
+
+	data__files = zalloc(sizeof(*data__files) * data__files_cnt);
+	if (!data__files)
+		return -ENOMEM;
+
+	data__for_each_file(i, d) {
+		struct perf_data *data = &d->data;
+
+		data->file.path = use_default ? defaults[i] : argv[i];
+		data->mode      = PERF_DATA_MODE_READ,
+		data->force     = force,
+
+		d->idx  = i;
+	}
+
+	return 0;
+}
+
+static int diff__config(const char *var, const char *value,
+			void *cb __maybe_unused)
+{
+	if (!strcmp(var, "diff.order")) {
+		int ret;
+		if (perf_config_int(&ret, var, value) < 0)
+			return -1;
+		sort_compute = ret;
+		return 0;
+	}
+	if (!strcmp(var, "diff.compute")) {
+		if (!strcmp(value, "delta")) {
+			compute = COMPUTE_DELTA;
+		} else if (!strcmp(value, "delta-abs")) {
+			compute = COMPUTE_DELTA_ABS;
+		} else if (!strcmp(value, "ratio")) {
+			compute = COMPUTE_RATIO;
+		} else if (!strcmp(value, "wdiff")) {
+			compute = COMPUTE_WEIGHTED_DIFF;
+		} else {
+			pr_err("Invalid compute method: %s\n", value);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int cmd_diff(int argc, const char **argv)
+{
+	int ret = hists__init();
+
+	if (ret < 0)
+		return ret;
+
+	perf_config(diff__config, NULL);
+
+	argc = parse_options(argc, argv, options, diff_usage, 0);
+
+	if (quiet)
+		perf_quiet_option();
+
+	if (symbol__init(NULL) < 0)
+		return -1;
+
+	if (data_init(argc, argv) < 0)
+		return -1;
+
+	if (ui_init() < 0)
+		return -1;
+
+	sort__mode = SORT_MODE__DIFF;
+
+	if (setup_sorting(NULL) < 0)
+		usage_with_options(diff_usage, options);
+
+	setup_pager();
+
+	sort__setup_elide(NULL);
+
+	return __cmd_diff();
+}
diff --git a/builtin-evlist.c b/builtin-evlist.c
new file mode 100644
index 0000000..e06e822
--- /dev/null
+++ b/builtin-evlist.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Builtin evlist command: Show the list of event selectors present
+ * in a perf.data file.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include <linux/list.h>
+
+#include "perf.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include <subcmd/parse-options.h>
+#include "util/session.h"
+#include "util/data.h"
+#include "util/debug.h"
+
+static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
+{
+	struct perf_session *session;
+	struct perf_evsel *pos;
+	struct perf_data data = {
+		.file      = {
+			.path = file_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = details->force,
+	};
+	bool has_tracepoint = false;
+
+	session = perf_session__new(&data, 0, NULL);
+	if (session == NULL)
+		return -1;
+
+	evlist__for_each_entry(session->evlist, pos) {
+		perf_evsel__fprintf(pos, details, stdout);
+
+		if (pos->attr.type == PERF_TYPE_TRACEPOINT)
+			has_tracepoint = true;
+	}
+
+	if (has_tracepoint && !details->trace_fields)
+		printf("# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events\n");
+
+	perf_session__delete(session);
+	return 0;
+}
+
+int cmd_evlist(int argc, const char **argv)
+{
+	struct perf_attr_details details = { .verbose = false, };
+	const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "Input file name"),
+	OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
+	OPT_BOOLEAN('v', "verbose", &details.verbose,
+		    "Show all event attr details"),
+	OPT_BOOLEAN('g', "group", &details.event_group,
+		    "Show event group information"),
+	OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"),
+	OPT_BOOLEAN(0, "trace-fields", &details.trace_fields, "Show tracepoint fields"),
+	OPT_END()
+	};
+	const char * const evlist_usage[] = {
+		"perf evlist [<options>]",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, options, evlist_usage, 0);
+	if (argc)
+		usage_with_options(evlist_usage, options);
+
+	if (details.event_group && (details.verbose || details.freq)) {
+		usage_with_options_msg(evlist_usage, options,
+			"--group option is not compatible with other options\n");
+	}
+
+	return __cmd_evlist(input_name, &details);
+}
diff --git a/builtin-ftrace.c b/builtin-ftrace.c
new file mode 100644
index 0000000..f42f228
--- /dev/null
+++ b/builtin-ftrace.c
@@ -0,0 +1,521 @@
+/*
+ * builtin-ftrace.c
+ *
+ * Copyright (c) 2013  LG Electronics,  Namhyung Kim <namhyung@kernel.org>
+ *
+ * Released under the GPL v2.
+ */
+
+#include "builtin.h"
+#include "perf.h"
+
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <poll.h>
+
+#include "debug.h"
+#include <subcmd/parse-options.h>
+#include <api/fs/tracing_path.h>
+#include "evlist.h"
+#include "target.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "util/config.h"
+
+
+#define DEFAULT_TRACER  "function_graph"
+
+struct perf_ftrace {
+	struct perf_evlist	*evlist;
+	struct target		target;
+	const char		*tracer;
+	struct list_head	filters;
+	struct list_head	notrace;
+	struct list_head	graph_funcs;
+	struct list_head	nograph_funcs;
+	int			graph_depth;
+};
+
+struct filter_entry {
+	struct list_head	list;
+	char			name[];
+};
+
+static bool done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+	done = true;
+}
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * we asked by setting its exec_error to the function below,
+ * ftrace__workload_exec_failed_signal.
+ *
+ * XXX We need to handle this more appropriately, emitting an error, etc.
+ */
+static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
+						siginfo_t *info __maybe_unused,
+						void *ucontext __maybe_unused)
+{
+	/* workload_exec_errno = info->si_value.sival_int; */
+	done = true;
+}
+
+static int __write_tracing_file(const char *name, const char *val, bool append)
+{
+	char *file;
+	int fd, ret = -1;
+	ssize_t size = strlen(val);
+	int flags = O_WRONLY;
+	char errbuf[512];
+	char *val_copy;
+
+	file = get_tracing_file(name);
+	if (!file) {
+		pr_debug("cannot get tracing file: %s\n", name);
+		return -1;
+	}
+
+	if (append)
+		flags |= O_APPEND;
+	else
+		flags |= O_TRUNC;
+
+	fd = open(file, flags);
+	if (fd < 0) {
+		pr_debug("cannot open tracing file: %s: %s\n",
+			 name, str_error_r(errno, errbuf, sizeof(errbuf)));
+		goto out;
+	}
+
+	/*
+	 * Copy the original value and append a '\n'. Without this,
+	 * the kernel can hide possible errors.
+	 */
+	val_copy = strdup(val);
+	if (!val_copy)
+		goto out_close;
+	val_copy[size] = '\n';
+
+	if (write(fd, val_copy, size + 1) == size + 1)
+		ret = 0;
+	else
+		pr_debug("write '%s' to tracing/%s failed: %s\n",
+			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
+
+	free(val_copy);
+out_close:
+	close(fd);
+out:
+	put_tracing_file(file);
+	return ret;
+}
+
+static int write_tracing_file(const char *name, const char *val)
+{
+	return __write_tracing_file(name, val, false);
+}
+
+static int append_tracing_file(const char *name, const char *val)
+{
+	return __write_tracing_file(name, val, true);
+}
+
+static int reset_tracing_cpu(void);
+static void reset_tracing_filters(void);
+
+static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
+{
+	if (write_tracing_file("tracing_on", "0") < 0)
+		return -1;
+
+	if (write_tracing_file("current_tracer", "nop") < 0)
+		return -1;
+
+	if (write_tracing_file("set_ftrace_pid", " ") < 0)
+		return -1;
+
+	if (reset_tracing_cpu() < 0)
+		return -1;
+
+	if (write_tracing_file("max_graph_depth", "0") < 0)
+		return -1;
+
+	reset_tracing_filters();
+	return 0;
+}
+
+static int set_tracing_pid(struct perf_ftrace *ftrace)
+{
+	int i;
+	char buf[16];
+
+	if (target__has_cpu(&ftrace->target))
+		return 0;
+
+	for (i = 0; i < thread_map__nr(ftrace->evlist->threads); i++) {
+		scnprintf(buf, sizeof(buf), "%d",
+			  ftrace->evlist->threads->map[i]);
+		if (append_tracing_file("set_ftrace_pid", buf) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static int set_tracing_cpumask(struct cpu_map *cpumap)
+{
+	char *cpumask;
+	size_t mask_size;
+	int ret;
+	int last_cpu;
+
+	last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1);
+	mask_size = (last_cpu + 3) / 4 + 1;
+	mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
+
+	cpumask = malloc(mask_size);
+	if (cpumask == NULL) {
+		pr_debug("failed to allocate cpu mask\n");
+		return -1;
+	}
+
+	cpu_map__snprint_mask(cpumap, cpumask, mask_size);
+
+	ret = write_tracing_file("tracing_cpumask", cpumask);
+
+	free(cpumask);
+	return ret;
+}
+
+static int set_tracing_cpu(struct perf_ftrace *ftrace)
+{
+	struct cpu_map *cpumap = ftrace->evlist->cpus;
+
+	if (!target__has_cpu(&ftrace->target))
+		return 0;
+
+	return set_tracing_cpumask(cpumap);
+}
+
+static int reset_tracing_cpu(void)
+{
+	struct cpu_map *cpumap = cpu_map__new(NULL);
+	int ret;
+
+	ret = set_tracing_cpumask(cpumap);
+	cpu_map__put(cpumap);
+	return ret;
+}
+
+static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
+{
+	struct filter_entry *pos;
+
+	list_for_each_entry(pos, funcs, list) {
+		if (append_tracing_file(filter_file, pos->name) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int set_tracing_filters(struct perf_ftrace *ftrace)
+{
+	int ret;
+
+	ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
+	if (ret < 0)
+		return ret;
+
+	ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
+	if (ret < 0)
+		return ret;
+
+	ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
+	if (ret < 0)
+		return ret;
+
+	/* old kernels do not have this filter */
+	__set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
+
+	return ret;
+}
+
+static void reset_tracing_filters(void)
+{
+	write_tracing_file("set_ftrace_filter", " ");
+	write_tracing_file("set_ftrace_notrace", " ");
+	write_tracing_file("set_graph_function", " ");
+	write_tracing_file("set_graph_notrace", " ");
+}
+
+static int set_tracing_depth(struct perf_ftrace *ftrace)
+{
+	char buf[16];
+
+	if (ftrace->graph_depth == 0)
+		return 0;
+
+	if (ftrace->graph_depth < 0) {
+		pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
+		return -1;
+	}
+
+	snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+
+	if (write_tracing_file("max_graph_depth", buf) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
+{
+	char *trace_file;
+	int trace_fd;
+	char buf[4096];
+	struct pollfd pollfd = {
+		.events = POLLIN,
+	};
+
+	if (geteuid() != 0) {
+		pr_err("ftrace only works for root!\n");
+		return -1;
+	}
+
+	signal(SIGINT, sig_handler);
+	signal(SIGUSR1, sig_handler);
+	signal(SIGCHLD, sig_handler);
+	signal(SIGPIPE, sig_handler);
+
+	if (reset_tracing_files(ftrace) < 0) {
+		pr_err("failed to reset ftrace\n");
+		goto out;
+	}
+
+	/* reset ftrace buffer */
+	if (write_tracing_file("trace", "0") < 0)
+		goto out;
+
+	if (argc && perf_evlist__prepare_workload(ftrace->evlist,
+				&ftrace->target, argv, false,
+				ftrace__workload_exec_failed_signal) < 0) {
+		goto out;
+	}
+
+	if (set_tracing_pid(ftrace) < 0) {
+		pr_err("failed to set ftrace pid\n");
+		goto out_reset;
+	}
+
+	if (set_tracing_cpu(ftrace) < 0) {
+		pr_err("failed to set tracing cpumask\n");
+		goto out_reset;
+	}
+
+	if (set_tracing_filters(ftrace) < 0) {
+		pr_err("failed to set tracing filters\n");
+		goto out_reset;
+	}
+
+	if (set_tracing_depth(ftrace) < 0) {
+		pr_err("failed to set graph depth\n");
+		goto out_reset;
+	}
+
+	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
+		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
+		goto out_reset;
+	}
+
+	setup_pager();
+
+	trace_file = get_tracing_file("trace_pipe");
+	if (!trace_file) {
+		pr_err("failed to open trace_pipe\n");
+		goto out_reset;
+	}
+
+	trace_fd = open(trace_file, O_RDONLY);
+
+	put_tracing_file(trace_file);
+
+	if (trace_fd < 0) {
+		pr_err("failed to open trace_pipe\n");
+		goto out_reset;
+	}
+
+	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+	pollfd.fd = trace_fd;
+
+	if (write_tracing_file("tracing_on", "1") < 0) {
+		pr_err("can't enable tracing\n");
+		goto out_close_fd;
+	}
+
+	perf_evlist__start_workload(ftrace->evlist);
+
+	while (!done) {
+		if (poll(&pollfd, 1, -1) < 0)
+			break;
+
+		if (pollfd.revents & POLLIN) {
+			int n = read(trace_fd, buf, sizeof(buf));
+			if (n < 0)
+				break;
+			if (fwrite(buf, n, 1, stdout) != 1)
+				break;
+		}
+	}
+
+	write_tracing_file("tracing_on", "0");
+
+	/* read remaining buffer contents */
+	while (true) {
+		int n = read(trace_fd, buf, sizeof(buf));
+		if (n <= 0)
+			break;
+		if (fwrite(buf, n, 1, stdout) != 1)
+			break;
+	}
+
+out_close_fd:
+	close(trace_fd);
+out_reset:
+	reset_tracing_files(ftrace);
+out:
+	return done ? 0 : -1;
+}
+
+static int perf_ftrace_config(const char *var, const char *value, void *cb)
+{
+	struct perf_ftrace *ftrace = cb;
+
+	if (!strstarts(var, "ftrace."))
+		return 0;
+
+	if (strcmp(var, "ftrace.tracer"))
+		return -1;
+
+	if (!strcmp(value, "function_graph") ||
+	    !strcmp(value, "function")) {
+		ftrace->tracer = value;
+		return 0;
+	}
+
+	pr_err("Please select \"function_graph\" (default) or \"function\"\n");
+	return -1;
+}
+
+static int parse_filter_func(const struct option *opt, const char *str,
+			     int unset __maybe_unused)
+{
+	struct list_head *head = opt->value;
+	struct filter_entry *entry;
+
+	entry = malloc(sizeof(*entry) + strlen(str) + 1);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	strcpy(entry->name, str);
+	list_add_tail(&entry->list, head);
+
+	return 0;
+}
+
+static void delete_filter_func(struct list_head *head)
+{
+	struct filter_entry *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, head, list) {
+		list_del(&pos->list);
+		free(pos);
+	}
+}
+
+int cmd_ftrace(int argc, const char **argv)
+{
+	int ret;
+	struct perf_ftrace ftrace = {
+		.tracer = DEFAULT_TRACER,
+		.target = { .uid = UINT_MAX, },
+	};
+	const char * const ftrace_usage[] = {
+		"perf ftrace [<options>] [<command>]",
+		"perf ftrace [<options>] -- <command> [<options>]",
+		NULL
+	};
+	const struct option ftrace_options[] = {
+	OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
+		   "tracer to use: function_graph(default) or function"),
+	OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
+		   "trace on existing process id"),
+	OPT_INCR('v', "verbose", &verbose,
+		 "be more verbose"),
+	OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
+		    "system-wide collection from all CPUs"),
+	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
+		    "list of cpus to monitor"),
+	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+		     "trace given functions only", parse_filter_func),
+	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
+		     "do not trace given functions", parse_filter_func),
+	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
+		     "Set graph filter on given functions", parse_filter_func),
+	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
+		     "Set nograph filter on given functions", parse_filter_func),
+	OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
+		    "Max depth for function graph tracer"),
+	OPT_END()
+	};
+
+	INIT_LIST_HEAD(&ftrace.filters);
+	INIT_LIST_HEAD(&ftrace.notrace);
+	INIT_LIST_HEAD(&ftrace.graph_funcs);
+	INIT_LIST_HEAD(&ftrace.nograph_funcs);
+
+	ret = perf_config(perf_ftrace_config, &ftrace);
+	if (ret < 0)
+		return -1;
+
+	argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
+			    PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc && target__none(&ftrace.target))
+		usage_with_options(ftrace_usage, ftrace_options);
+
+	ret = target__validate(&ftrace.target);
+	if (ret) {
+		char errbuf[512];
+
+		target__strerror(&ftrace.target, ret, errbuf, 512);
+		pr_err("%s\n", errbuf);
+		goto out_delete_filters;
+	}
+
+	ftrace.evlist = perf_evlist__new();
+	if (ftrace.evlist == NULL) {
+		ret = -ENOMEM;
+		goto out_delete_filters;
+	}
+
+	ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target);
+	if (ret < 0)
+		goto out_delete_evlist;
+
+	ret = __cmd_ftrace(&ftrace, argc, argv);
+
+out_delete_evlist:
+	perf_evlist__delete(ftrace.evlist);
+
+out_delete_filters:
+	delete_filter_func(&ftrace.filters);
+	delete_filter_func(&ftrace.notrace);
+	delete_filter_func(&ftrace.graph_funcs);
+	delete_filter_func(&ftrace.nograph_funcs);
+
+	return ret;
+}
diff --git a/builtin-help.c b/builtin-help.c
new file mode 100644
index 0000000..1c41b4e
--- /dev/null
+++ b/builtin-help.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-help.c
+ *
+ * Builtin help command
+ */
+#include "perf.h"
+#include "util/config.h"
+#include "builtin.h"
+#include <subcmd/exec-cmd.h>
+#include "common-cmds.h"
+#include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
+#include <subcmd/help.h>
+#include "util/debug.h"
+#include <linux/kernel.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static struct man_viewer_list {
+	struct man_viewer_list *next;
+	char name[0];
+} *man_viewer_list;
+
+static struct man_viewer_info_list {
+	struct man_viewer_info_list *next;
+	const char *info;
+	char name[0];
+} *man_viewer_info_list;
+
+enum help_format {
+	HELP_FORMAT_NONE,
+	HELP_FORMAT_MAN,
+	HELP_FORMAT_INFO,
+	HELP_FORMAT_WEB,
+};
+
+static enum help_format parse_help_format(const char *format)
+{
+	if (!strcmp(format, "man"))
+		return HELP_FORMAT_MAN;
+	if (!strcmp(format, "info"))
+		return HELP_FORMAT_INFO;
+	if (!strcmp(format, "web") || !strcmp(format, "html"))
+		return HELP_FORMAT_WEB;
+
+	pr_err("unrecognized help format '%s'", format);
+	return HELP_FORMAT_NONE;
+}
+
+static const char *get_man_viewer_info(const char *name)
+{
+	struct man_viewer_info_list *viewer;
+
+	for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) {
+		if (!strcasecmp(name, viewer->name))
+			return viewer->info;
+	}
+	return NULL;
+}
+
+static int check_emacsclient_version(void)
+{
+	struct strbuf buffer = STRBUF_INIT;
+	struct child_process ec_process;
+	const char *argv_ec[] = { "emacsclient", "--version", NULL };
+	int version;
+	int ret = -1;
+
+	/* emacsclient prints its version number on stderr */
+	memset(&ec_process, 0, sizeof(ec_process));
+	ec_process.argv = argv_ec;
+	ec_process.err = -1;
+	ec_process.stdout_to_stderr = 1;
+	if (start_command(&ec_process)) {
+		fprintf(stderr, "Failed to start emacsclient.\n");
+		return -1;
+	}
+	if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
+		fprintf(stderr, "Failed to read emacsclient version\n");
+		goto out;
+	}
+	close(ec_process.err);
+
+	/*
+	 * Don't bother checking return value, because "emacsclient --version"
+	 * seems to always exits with code 1.
+	 */
+	finish_command(&ec_process);
+
+	if (!strstarts(buffer.buf, "emacsclient")) {
+		fprintf(stderr, "Failed to parse emacsclient version.\n");
+		goto out;
+	}
+
+	version = atoi(buffer.buf + strlen("emacsclient"));
+
+	if (version < 22) {
+		fprintf(stderr,
+			"emacsclient version '%d' too old (< 22).\n",
+			version);
+	} else
+		ret = 0;
+out:
+	strbuf_release(&buffer);
+	return ret;
+}
+
+static void exec_failed(const char *cmd)
+{
+	char sbuf[STRERR_BUFSIZE];
+	pr_warning("failed to exec '%s': %s", cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+}
+
+static void exec_woman_emacs(const char *path, const char *page)
+{
+	if (!check_emacsclient_version()) {
+		/* This works only with emacsclient version >= 22. */
+		char *man_page;
+
+		if (!path)
+			path = "emacsclient";
+		if (asprintf(&man_page, "(woman \"%s\")", page) > 0) {
+			execlp(path, "emacsclient", "-e", man_page, NULL);
+			free(man_page);
+		}
+		exec_failed(path);
+	}
+}
+
+static void exec_man_konqueror(const char *path, const char *page)
+{
+	const char *display = getenv("DISPLAY");
+
+	if (display && *display) {
+		char *man_page;
+		const char *filename = "kfmclient";
+
+		/* It's simpler to launch konqueror using kfmclient. */
+		if (path) {
+			const char *file = strrchr(path, '/');
+			if (file && !strcmp(file + 1, "konqueror")) {
+				char *new = strdup(path);
+				char *dest = strrchr(new, '/');
+
+				/* strlen("konqueror") == strlen("kfmclient") */
+				strcpy(dest + 1, "kfmclient");
+				path = new;
+			}
+			if (file)
+				filename = file;
+		} else
+			path = "kfmclient";
+		if (asprintf(&man_page, "man:%s(1)", page) > 0) {
+			execlp(path, filename, "newTab", man_page, NULL);
+			free(man_page);
+		}
+		exec_failed(path);
+	}
+}
+
+static void exec_man_man(const char *path, const char *page)
+{
+	if (!path)
+		path = "man";
+	execlp(path, "man", page, NULL);
+	exec_failed(path);
+}
+
+static void exec_man_cmd(const char *cmd, const char *page)
+{
+	char *shell_cmd;
+
+	if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
+		execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
+		free(shell_cmd);
+	}
+	exec_failed(cmd);
+}
+
+static void add_man_viewer(const char *name)
+{
+	struct man_viewer_list **p = &man_viewer_list;
+	size_t len = strlen(name);
+
+	while (*p)
+		p = &((*p)->next);
+	*p = zalloc(sizeof(**p) + len + 1);
+	strncpy((*p)->name, name, len);
+}
+
+static int supported_man_viewer(const char *name, size_t len)
+{
+	return (!strncasecmp("man", name, len) ||
+		!strncasecmp("woman", name, len) ||
+		!strncasecmp("konqueror", name, len));
+}
+
+static void do_add_man_viewer_info(const char *name,
+				   size_t len,
+				   const char *value)
+{
+	struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1);
+
+	strncpy(new->name, name, len);
+	new->info = strdup(value);
+	new->next = man_viewer_info_list;
+	man_viewer_info_list = new;
+}
+
+static void unsupported_man_viewer(const char *name, const char *var)
+{
+	pr_warning("'%s': path for unsupported man viewer.\n"
+		   "Please consider using 'man.<tool>.%s' instead.", name, var);
+}
+
+static int add_man_viewer_path(const char *name,
+			       size_t len,
+			       const char *value)
+{
+	if (supported_man_viewer(name, len))
+		do_add_man_viewer_info(name, len, value);
+	else
+		unsupported_man_viewer(name, "cmd");
+
+	return 0;
+}
+
+static int add_man_viewer_cmd(const char *name,
+			      size_t len,
+			      const char *value)
+{
+	if (supported_man_viewer(name, len))
+		unsupported_man_viewer(name, "path");
+	else
+		do_add_man_viewer_info(name, len, value);
+
+	return 0;
+}
+
+static int add_man_viewer_info(const char *var, const char *value)
+{
+	const char *name = var + 4;
+	const char *subkey = strrchr(name, '.');
+
+	if (!subkey) {
+		pr_err("Config with no key for man viewer: %s", name);
+		return -1;
+	}
+
+	if (!strcmp(subkey, ".path")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_path(name, subkey - name, value);
+	}
+	if (!strcmp(subkey, ".cmd")) {
+		if (!value)
+			return config_error_nonbool(var);
+		return add_man_viewer_cmd(name, subkey - name, value);
+	}
+
+	pr_warning("'%s': unsupported man viewer sub key.", subkey);
+	return 0;
+}
+
+static int perf_help_config(const char *var, const char *value, void *cb)
+{
+	enum help_format *help_formatp = cb;
+
+	if (!strcmp(var, "help.format")) {
+		if (!value)
+			return config_error_nonbool(var);
+		*help_formatp = parse_help_format(value);
+		if (*help_formatp == HELP_FORMAT_NONE)
+			return -1;
+		return 0;
+	}
+	if (!strcmp(var, "man.viewer")) {
+		if (!value)
+			return config_error_nonbool(var);
+		add_man_viewer(value);
+		return 0;
+	}
+	if (strstarts(var, "man."))
+		return add_man_viewer_info(var, value);
+
+	return 0;
+}
+
+static struct cmdnames main_cmds, other_cmds;
+
+void list_common_cmds_help(void)
+{
+	unsigned int i, longest = 0;
+
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		if (longest < strlen(common_cmds[i].name))
+			longest = strlen(common_cmds[i].name);
+	}
+
+	puts(" The most commonly used perf commands are:");
+	for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+		printf("   %-*s   ", longest, common_cmds[i].name);
+		puts(common_cmds[i].help);
+	}
+}
+
+static const char *cmd_to_page(const char *perf_cmd)
+{
+	char *s;
+
+	if (!perf_cmd)
+		return "perf";
+	else if (strstarts(perf_cmd, "perf"))
+		return perf_cmd;
+
+	return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
+}
+
+static void setup_man_path(void)
+{
+	char *new_path;
+	const char *old_path = getenv("MANPATH");
+
+	/* We should always put ':' after our path. If there is no
+	 * old_path, the ':' at the end will let 'man' to try
+	 * system-wide paths after ours to find the manual page. If
+	 * there is old_path, we need ':' as delimiter. */
+	if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) {
+		setenv("MANPATH", new_path, 1);
+		free(new_path);
+	} else {
+		pr_err("Unable to setup man path");
+	}
+}
+
+static void exec_viewer(const char *name, const char *page)
+{
+	const char *info = get_man_viewer_info(name);
+
+	if (!strcasecmp(name, "man"))
+		exec_man_man(info, page);
+	else if (!strcasecmp(name, "woman"))
+		exec_woman_emacs(info, page);
+	else if (!strcasecmp(name, "konqueror"))
+		exec_man_konqueror(info, page);
+	else if (info)
+		exec_man_cmd(info, page);
+	else
+		pr_warning("'%s': unknown man viewer.", name);
+}
+
+static int show_man_page(const char *perf_cmd)
+{
+	struct man_viewer_list *viewer;
+	const char *page = cmd_to_page(perf_cmd);
+	const char *fallback = getenv("PERF_MAN_VIEWER");
+
+	setup_man_path();
+	for (viewer = man_viewer_list; viewer; viewer = viewer->next)
+		exec_viewer(viewer->name, page); /* will return when unable */
+
+	if (fallback)
+		exec_viewer(fallback, page);
+	exec_viewer("man", page);
+
+	pr_err("no man viewer handled the request");
+	return -1;
+}
+
+static int show_info_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
+	execlp("info", "info", "perfman", page, NULL);
+	return -1;
+}
+
+static int get_html_page_path(char **page_path, const char *page)
+{
+	struct stat st;
+	const char *html_path = system_path(PERF_HTML_PATH);
+
+	/* Check that we have a perf documentation directory. */
+	if (stat(mkpath("%s/perf.html", html_path), &st)
+	    || !S_ISREG(st.st_mode)) {
+		pr_err("'%s': not a documentation directory.", html_path);
+		return -1;
+	}
+
+	return asprintf(page_path, "%s/%s.html", html_path, page);
+}
+
+/*
+ * If open_html is not defined in a platform-specific way (see for
+ * example compat/mingw.h), we use the script web--browse to display
+ * HTML.
+ */
+#ifndef open_html
+static void open_html(const char *path)
+{
+	execl_cmd("web--browse", "-c", "help.browser", path, NULL);
+}
+#endif
+
+static int show_html_page(const char *perf_cmd)
+{
+	const char *page = cmd_to_page(perf_cmd);
+	char *page_path; /* it leaks but we exec bellow */
+
+	if (get_html_page_path(&page_path, page) < 0)
+		return -1;
+
+	open_html(page_path);
+
+	return 0;
+}
+
+int cmd_help(int argc, const char **argv)
+{
+	bool show_all = false;
+	enum help_format help_format = HELP_FORMAT_MAN;
+	struct option builtin_help_options[] = {
+	OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
+	OPT_SET_UINT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
+	OPT_SET_UINT('w', "web", &help_format, "show manual in web browser",
+			HELP_FORMAT_WEB),
+	OPT_SET_UINT('i', "info", &help_format, "show info page",
+			HELP_FORMAT_INFO),
+	OPT_END(),
+	};
+	const char * const builtin_help_subcommands[] = {
+		"buildid-cache", "buildid-list", "diff", "evlist", "help", "list",
+		"record", "report", "bench", "stat", "timechart", "top", "annotate",
+		"script", "sched", "kallsyms", "kmem", "lock", "kvm", "test", "inject", "mem", "data",
+#ifdef HAVE_LIBELF_SUPPORT
+		"probe",
+#endif
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+		"trace",
+#endif
+	NULL };
+	const char *builtin_help_usage[] = {
+		"perf help [--all] [--man|--web|--info] [command]",
+		NULL
+	};
+	int rc;
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	rc = perf_config(perf_help_config, &help_format);
+	if (rc)
+		return rc;
+
+	argc = parse_options_subcommand(argc, argv, builtin_help_options,
+			builtin_help_subcommands, builtin_help_usage, 0);
+
+	if (show_all) {
+		printf("\n Usage: %s\n\n", perf_usage_string);
+		list_commands("perf commands", &main_cmds, &other_cmds);
+		printf(" %s\n\n", perf_more_info_string);
+		return 0;
+	}
+
+	if (!argv[0]) {
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n %s\n\n", perf_more_info_string);
+		return 0;
+	}
+
+	switch (help_format) {
+	case HELP_FORMAT_MAN:
+		rc = show_man_page(argv[0]);
+		break;
+	case HELP_FORMAT_INFO:
+		rc = show_info_page(argv[0]);
+		break;
+	case HELP_FORMAT_WEB:
+		rc = show_html_page(argv[0]);
+		break;
+	case HELP_FORMAT_NONE:
+		/* fall-through */
+	default:
+		rc = -1;
+		break;
+	}
+
+	return rc;
+}
diff --git a/builtin-inject.c b/builtin-inject.c
new file mode 100644
index 0000000..40fe919
--- /dev/null
+++ b/builtin-inject.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-inject.c
+ *
+ * Builtin inject command: Examine the live mode (stdin) event stream
+ * and repipe it to stdout while optionally injecting additional
+ * events into it.
+ */
+#include "builtin.h"
+
+#include "perf.h"
+#include "util/color.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/debug.h"
+#include "util/build-id.h"
+#include "util/data.h"
+#include "util/auxtrace.h"
+#include "util/jit.h"
+#include "util/thread.h"
+
+#include <subcmd/parse-options.h>
+
+#include <linux/list.h>
+#include <errno.h>
+#include <signal.h>
+
+struct perf_inject {
+	struct perf_tool	tool;
+	struct perf_session	*session;
+	bool			build_ids;
+	bool			sched_stat;
+	bool			have_auxtrace;
+	bool			strip;
+	bool			jit_mode;
+	const char		*input_name;
+	struct perf_data	output;
+	u64			bytes_written;
+	u64			aux_id;
+	struct list_head	samples;
+	struct itrace_synth_opts itrace_synth_opts;
+};
+
+struct event_entry {
+	struct list_head node;
+	u32		 tid;
+	union perf_event event[0];
+};
+
+static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
+{
+	ssize_t size;
+
+	size = perf_data__write(&inject->output, buf, sz);
+	if (size < 0)
+		return -errno;
+
+	inject->bytes_written += size;
+	return 0;
+}
+
+static int perf_event__repipe_synth(struct perf_tool *tool,
+				    union perf_event *event)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject,
+						  tool);
+
+	return output_bytes(inject, event, event->header.size);
+}
+
+static int perf_event__repipe_oe_synth(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct ordered_events *oe __maybe_unused)
+{
+	return perf_event__repipe_synth(tool, event);
+}
+
+#ifdef HAVE_JITDUMP
+static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event __maybe_unused,
+			       struct ordered_events *oe __maybe_unused)
+{
+	return 0;
+}
+#endif
+
+static int perf_event__repipe_op2_synth(struct perf_tool *tool,
+					union perf_event *event,
+					struct perf_session *session
+					__maybe_unused)
+{
+	return perf_event__repipe_synth(tool, event);
+}
+
+static int perf_event__repipe_attr(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_evlist **pevlist)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject,
+						  tool);
+	int ret;
+
+	ret = perf_event__process_attr(tool, event, pevlist);
+	if (ret)
+		return ret;
+
+	if (!inject->output.is_pipe)
+		return 0;
+
+	return perf_event__repipe_synth(tool, event);
+}
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
+{
+	char buf[4096];
+	ssize_t ssz;
+	int ret;
+
+	while (size > 0) {
+		ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
+		if (ssz < 0)
+			return -errno;
+		ret = output_bytes(inject, buf, ssz);
+		if (ret)
+			return ret;
+		size -= ssz;
+	}
+
+	return 0;
+}
+
+static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_session *session)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject,
+						  tool);
+	int ret;
+
+	inject->have_auxtrace = true;
+
+	if (!inject->output.is_pipe) {
+		off_t offset;
+
+		offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
+		if (offset == -1)
+			return -errno;
+		ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
+						     event, offset);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (perf_data__is_pipe(session->data) || !session->one_mmap) {
+		ret = output_bytes(inject, event, event->header.size);
+		if (ret < 0)
+			return ret;
+		ret = copy_bytes(inject, perf_data__fd(session->data),
+				 event->auxtrace.size);
+	} else {
+		ret = output_bytes(inject, event,
+				   event->header.size + event->auxtrace.size);
+	}
+	if (ret < 0)
+		return ret;
+
+	return event->auxtrace.size;
+}
+
+#else
+
+static s64
+perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_session *session __maybe_unused)
+{
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+#endif
+
+static int perf_event__repipe(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	return perf_event__repipe_synth(tool, event);
+}
+
+static int perf_event__drop(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_sample *sample __maybe_unused,
+			    struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static int perf_event__drop_aux(struct perf_tool *tool,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample,
+				struct machine *machine __maybe_unused)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+	if (!inject->aux_id)
+		inject->aux_id = sample->id;
+
+	return 0;
+}
+
+typedef int (*inject_handler)(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct perf_evsel *evsel,
+			      struct machine *machine);
+
+static int perf_event__repipe_sample(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel,
+				     struct machine *machine)
+{
+	if (evsel->handler) {
+		inject_handler f = evsel->handler;
+		return f(tool, event, sample, evsel, machine);
+	}
+
+	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+
+	return perf_event__repipe_synth(tool, event);
+}
+
+static int perf_event__repipe_mmap(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_mmap(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u64 n = 0;
+	int ret;
+
+	/*
+	 * if jit marker, then inject jit mmaps and generate ELF images
+	 */
+	ret = jit_process(inject->session, &inject->output, machine,
+			  event->mmap.filename, sample->pid, &n);
+	if (ret < 0)
+		return ret;
+	if (ret) {
+		inject->bytes_written += n;
+		return 0;
+	}
+	return perf_event__repipe_mmap(tool, event, sample, machine);
+}
+#endif
+
+static int perf_event__repipe_mmap2(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_mmap2(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
+					union perf_event *event,
+					struct perf_sample *sample,
+					struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u64 n = 0;
+	int ret;
+
+	/*
+	 * if jit marker, then inject jit mmaps and generate ELF images
+	 */
+	ret = jit_process(inject->session, &inject->output, machine,
+			  event->mmap2.filename, sample->pid, &n);
+	if (ret < 0)
+		return ret;
+	if (ret) {
+		inject->bytes_written += n;
+		return 0;
+	}
+	return perf_event__repipe_mmap2(tool, event, sample, machine);
+}
+#endif
+
+static int perf_event__repipe_fork(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_fork(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+static int perf_event__repipe_comm(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_comm(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+static int perf_event__repipe_namespaces(struct perf_tool *tool,
+					 union perf_event *event,
+					 struct perf_sample *sample,
+					 struct machine *machine)
+{
+	int err = perf_event__process_namespaces(tool, event, sample, machine);
+
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+static int perf_event__repipe_exit(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_exit(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+static int perf_event__repipe_tracing_data(struct perf_tool *tool,
+					   union perf_event *event,
+					   struct perf_session *session)
+{
+	int err;
+
+	perf_event__repipe_synth(tool, event);
+	err = perf_event__process_tracing_data(tool, event, session);
+
+	return err;
+}
+
+static int perf_event__repipe_id_index(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_session *session)
+{
+	int err;
+
+	perf_event__repipe_synth(tool, event);
+	err = perf_event__process_id_index(tool, event, session);
+
+	return err;
+}
+
+static int dso__read_build_id(struct dso *dso)
+{
+	if (dso->has_build_id)
+		return 0;
+
+	if (filename__read_build_id(dso->long_name, dso->build_id,
+				    sizeof(dso->build_id)) > 0) {
+		dso->has_build_id = true;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
+				struct machine *machine)
+{
+	u16 misc = PERF_RECORD_MISC_USER;
+	int err;
+
+	if (dso__read_build_id(dso) < 0) {
+		pr_debug("no build_id found for %s\n", dso->long_name);
+		return -1;
+	}
+
+	if (dso->kernel)
+		misc = PERF_RECORD_MISC_KERNEL;
+
+	err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe,
+					      machine);
+	if (err) {
+		pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int perf_event__inject_buildid(struct perf_tool *tool,
+				      union perf_event *event,
+				      struct perf_sample *sample,
+				      struct perf_evsel *evsel __maybe_unused,
+				      struct machine *machine)
+{
+	struct addr_location al;
+	struct thread *thread;
+
+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	if (thread == NULL) {
+		pr_err("problem processing %d event, skipping it.\n",
+		       event->header.type);
+		goto repipe;
+	}
+
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
+
+	if (al.map != NULL) {
+		if (!al.map->dso->hit) {
+			al.map->dso->hit = 1;
+			if (map__load(al.map) >= 0) {
+				dso__inject_build_id(al.map->dso, tool, machine);
+				/*
+				 * If this fails, too bad, let the other side
+				 * account this as unresolved.
+				 */
+			} else {
+#ifdef HAVE_LIBELF_SUPPORT
+				pr_warning("no symbols found in %s, maybe "
+					   "install a debug package?\n",
+					   al.map->dso->long_name);
+#endif
+			}
+		}
+	}
+
+	thread__put(thread);
+repipe:
+	perf_event__repipe(tool, event, sample, machine);
+	return 0;
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+					   union perf_event *event __maybe_unused,
+					   struct perf_sample *sample,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct machine *machine __maybe_unused)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	struct event_entry *ent;
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (sample->tid == ent->tid) {
+			list_del_init(&ent->node);
+			free(ent);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel,
+				     struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	struct event_entry *ent;
+
+	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
+
+	ent = malloc(event->header.size + sizeof(struct event_entry));
+	if (ent == NULL) {
+		color_fprintf(stderr, PERF_COLOR_RED,
+			     "Not enough memory to process sched switch event!");
+		return -1;
+	}
+
+	ent->tid = sample->tid;
+	memcpy(&ent->event, event, event->header.size);
+	list_add(&ent->node, &inject->samples);
+	return 0;
+}
+
+static int perf_inject__sched_stat(struct perf_tool *tool,
+				   union perf_event *event __maybe_unused,
+				   struct perf_sample *sample,
+				   struct perf_evsel *evsel,
+				   struct machine *machine)
+{
+	struct event_entry *ent;
+	union perf_event *event_sw;
+	struct perf_sample sample_sw;
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (pid == ent->tid)
+			goto found;
+	}
+
+	return 0;
+found:
+	event_sw = &ent->event[0];
+	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+	sample_sw.period = sample->period;
+	sample_sw.time	 = sample->time;
+	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+				      evsel->attr.read_format, &sample_sw);
+	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static void sig_handler(int sig __maybe_unused)
+{
+	session_done = 1;
+}
+
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+				   u64 sample_type, const char *sample_msg)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	const char *name = perf_evsel__name(evsel);
+
+	if (!(attr->sample_type & sample_type)) {
+		pr_err("Samples for %s event do not have %s attribute set.",
+			name, sample_msg);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int drop_sample(struct perf_tool *tool __maybe_unused,
+		       union perf_event *event __maybe_unused,
+		       struct perf_sample *sample __maybe_unused,
+		       struct perf_evsel *evsel __maybe_unused,
+		       struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static void strip_init(struct perf_inject *inject)
+{
+	struct perf_evlist *evlist = inject->session->evlist;
+	struct perf_evsel *evsel;
+
+	inject->tool.context_switch = perf_event__drop;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel->handler = drop_sample;
+}
+
+static bool has_tracking(struct perf_evsel *evsel)
+{
+	return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm ||
+	       evsel->attr.task;
+}
+
+#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
+		     PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
+
+/*
+ * In order that the perf.data file is parsable, tracking events like MMAP need
+ * their selected event to exist, except if there is only 1 selected event left
+ * and it has a compatible sample type.
+ */
+static bool ok_to_remove(struct perf_evlist *evlist,
+			 struct perf_evsel *evsel_to_remove)
+{
+	struct perf_evsel *evsel;
+	int cnt = 0;
+	bool ok = false;
+
+	if (!has_tracking(evsel_to_remove))
+		return true;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->handler != drop_sample) {
+			cnt += 1;
+			if ((evsel->attr.sample_type & COMPAT_MASK) ==
+			    (evsel_to_remove->attr.sample_type & COMPAT_MASK))
+				ok = true;
+		}
+	}
+
+	return ok && cnt == 1;
+}
+
+static void strip_fini(struct perf_inject *inject)
+{
+	struct perf_evlist *evlist = inject->session->evlist;
+	struct perf_evsel *evsel, *tmp;
+
+	/* Remove non-synthesized evsels if possible */
+	evlist__for_each_entry_safe(evlist, tmp, evsel) {
+		if (evsel->handler == drop_sample &&
+		    ok_to_remove(evlist, evsel)) {
+			pr_debug("Deleting %s\n", perf_evsel__name(evsel));
+			perf_evlist__remove(evlist, evsel);
+			perf_evsel__delete(evsel);
+		}
+	}
+}
+
+static int __cmd_inject(struct perf_inject *inject)
+{
+	int ret = -EINVAL;
+	struct perf_session *session = inject->session;
+	struct perf_data *data_out = &inject->output;
+	int fd = perf_data__fd(data_out);
+	u64 output_data_offset;
+
+	signal(SIGINT, sig_handler);
+
+	if (inject->build_ids || inject->sched_stat ||
+	    inject->itrace_synth_opts.set) {
+		inject->tool.mmap	  = perf_event__repipe_mmap;
+		inject->tool.mmap2	  = perf_event__repipe_mmap2;
+		inject->tool.fork	  = perf_event__repipe_fork;
+		inject->tool.tracing_data = perf_event__repipe_tracing_data;
+	}
+
+	output_data_offset = session->header.data_offset;
+
+	if (inject->build_ids) {
+		inject->tool.sample = perf_event__inject_buildid;
+	} else if (inject->sched_stat) {
+		struct perf_evsel *evsel;
+
+		evlist__for_each_entry(session->evlist, evsel) {
+			const char *name = perf_evsel__name(evsel);
+
+			if (!strcmp(name, "sched:sched_switch")) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__sched_switch;
+			} else if (!strcmp(name, "sched:sched_process_exit"))
+				evsel->handler = perf_inject__sched_process_exit;
+			else if (!strncmp(name, "sched:sched_stat_", 17))
+				evsel->handler = perf_inject__sched_stat;
+		}
+	} else if (inject->itrace_synth_opts.set) {
+		session->itrace_synth_opts = &inject->itrace_synth_opts;
+		inject->itrace_synth_opts.inject = true;
+		inject->tool.comm	    = perf_event__repipe_comm;
+		inject->tool.namespaces	    = perf_event__repipe_namespaces;
+		inject->tool.exit	    = perf_event__repipe_exit;
+		inject->tool.id_index	    = perf_event__repipe_id_index;
+		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
+		inject->tool.auxtrace	    = perf_event__process_auxtrace;
+		inject->tool.aux	    = perf_event__drop_aux;
+		inject->tool.itrace_start   = perf_event__drop_aux,
+		inject->tool.ordered_events = true;
+		inject->tool.ordering_requires_timestamps = true;
+		/* Allow space in the header for new attributes */
+		output_data_offset = 4096;
+		if (inject->strip)
+			strip_init(inject);
+	}
+
+	if (!inject->itrace_synth_opts.set)
+		auxtrace_index__free(&session->auxtrace_index);
+
+	if (!data_out->is_pipe)
+		lseek(fd, output_data_offset, SEEK_SET);
+
+	ret = perf_session__process_events(session);
+	if (ret)
+		return ret;
+
+	if (!data_out->is_pipe) {
+		if (inject->build_ids)
+			perf_header__set_feat(&session->header,
+					      HEADER_BUILD_ID);
+		/*
+		 * Keep all buildids when there is unprocessed AUX data because
+		 * it is not known which ones the AUX trace hits.
+		 */
+		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
+		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
+			dsos__hit_all(session);
+		/*
+		 * The AUX areas have been removed and replaced with
+		 * synthesized hardware events, so clear the feature flag and
+		 * remove the evsel.
+		 */
+		if (inject->itrace_synth_opts.set) {
+			struct perf_evsel *evsel;
+
+			perf_header__clear_feat(&session->header,
+						HEADER_AUXTRACE);
+			if (inject->itrace_synth_opts.last_branch)
+				perf_header__set_feat(&session->header,
+						      HEADER_BRANCH_STACK);
+			evsel = perf_evlist__id2evsel_strict(session->evlist,
+							     inject->aux_id);
+			if (evsel) {
+				pr_debug("Deleting %s\n",
+					 perf_evsel__name(evsel));
+				perf_evlist__remove(session->evlist, evsel);
+				perf_evsel__delete(evsel);
+			}
+			if (inject->strip)
+				strip_fini(inject);
+		}
+		session->header.data_offset = output_data_offset;
+		session->header.data_size = inject->bytes_written;
+		perf_session__write_header(session, session->evlist, fd, true);
+	}
+
+	return ret;
+}
+
+int cmd_inject(int argc, const char **argv)
+{
+	struct perf_inject inject = {
+		.tool = {
+			.sample		= perf_event__repipe_sample,
+			.mmap		= perf_event__repipe,
+			.mmap2		= perf_event__repipe,
+			.comm		= perf_event__repipe,
+			.fork		= perf_event__repipe,
+			.exit		= perf_event__repipe,
+			.lost		= perf_event__repipe,
+			.lost_samples	= perf_event__repipe,
+			.aux		= perf_event__repipe,
+			.itrace_start	= perf_event__repipe,
+			.context_switch	= perf_event__repipe,
+			.read		= perf_event__repipe_sample,
+			.throttle	= perf_event__repipe,
+			.unthrottle	= perf_event__repipe,
+			.attr		= perf_event__repipe_attr,
+			.tracing_data	= perf_event__repipe_op2_synth,
+			.auxtrace_info	= perf_event__repipe_op2_synth,
+			.auxtrace	= perf_event__repipe_auxtrace,
+			.auxtrace_error	= perf_event__repipe_op2_synth,
+			.time_conv	= perf_event__repipe_op2_synth,
+			.finished_round	= perf_event__repipe_oe_synth,
+			.build_id	= perf_event__repipe_op2_synth,
+			.id_index	= perf_event__repipe_op2_synth,
+			.feature	= perf_event__repipe_op2_synth,
+		},
+		.input_name  = "-",
+		.samples = LIST_HEAD_INIT(inject.samples),
+		.output = {
+			.file      = {
+				.path = "-",
+			},
+			.mode      = PERF_DATA_MODE_WRITE,
+		},
+	};
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_READ,
+	};
+	int ret;
+
+	struct option options[] = {
+		OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
+			    "Inject build-ids into the output stream"),
+		OPT_STRING('i', "input", &inject.input_name, "file",
+			   "input file name"),
+		OPT_STRING('o', "output", &inject.output.file.path, "file",
+			   "output file name"),
+		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
+			    "Merge sched-stat and sched-switch for getting events "
+			    "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
+		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
+		OPT_INCR('v', "verbose", &verbose,
+			 "be more verbose (show build ids, etc)"),
+		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
+			   "kallsyms pathname"),
+		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
+		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
+				    NULL, "opts", "Instruction Tracing options",
+				    itrace_parse_synth_opts),
+		OPT_BOOLEAN(0, "strip", &inject.strip,
+			    "strip non-synthesized events (use with --itrace)"),
+		OPT_END()
+	};
+	const char * const inject_usage[] = {
+		"perf inject [<options>]",
+		NULL
+	};
+#ifndef HAVE_JITDUMP
+	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
+	argc = parse_options(argc, argv, options, inject_usage, 0);
+
+	/*
+	 * Any (unrecognized) arguments left?
+	 */
+	if (argc)
+		usage_with_options(inject_usage, options);
+
+	if (inject.strip && !inject.itrace_synth_opts.set) {
+		pr_err("--strip option requires --itrace option\n");
+		return -1;
+	}
+
+	if (perf_data__open(&inject.output)) {
+		perror("failed to create output file");
+		return -1;
+	}
+
+	inject.tool.ordered_events = inject.sched_stat;
+
+	data.file.path = inject.input_name;
+	inject.session = perf_session__new(&data, true, &inject.tool);
+	if (inject.session == NULL)
+		return -1;
+
+	if (inject.build_ids) {
+		/*
+		 * to make sure the mmap records are ordered correctly
+		 * and so that the correct especially due to jitted code
+		 * mmaps. We cannot generate the buildid hit list and
+		 * inject the jit mmaps at the same time for now.
+		 */
+		inject.tool.ordered_events = true;
+		inject.tool.ordering_requires_timestamps = true;
+	}
+#ifdef HAVE_JITDUMP
+	if (inject.jit_mode) {
+		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
+		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
+		inject.tool.ordered_events = true;
+		inject.tool.ordering_requires_timestamps = true;
+		/*
+		 * JIT MMAP injection injects all MMAP events in one go, so it
+		 * does not obey finished_round semantics.
+		 */
+		inject.tool.finished_round = perf_event__drop_oe;
+	}
+#endif
+	ret = symbol__init(&inject.session->header.env);
+	if (ret < 0)
+		goto out_delete;
+
+	ret = __cmd_inject(&inject);
+
+out_delete:
+	perf_session__delete(inject.session);
+	return ret;
+}
diff --git a/builtin-kallsyms.c b/builtin-kallsyms.c
new file mode 100644
index 0000000..bcfb363
--- /dev/null
+++ b/builtin-kallsyms.c
@@ -0,0 +1,68 @@
+/*
+ * builtin-kallsyms.c
+ *
+ * Builtin command: Look for a symbol in the running kernel and its modules
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include <inttypes.h>
+#include "builtin.h"
+#include <linux/compiler.h>
+#include <subcmd/parse-options.h>
+#include "debug.h"
+#include "machine.h"
+#include "symbol.h"
+
+static int __cmd_kallsyms(int argc, const char **argv)
+{
+	int i;
+	struct machine *machine = machine__new_kallsyms();
+
+	if (machine == NULL) {
+		pr_err("Couldn't read /proc/kallsyms\n");
+		return -1;
+	}
+
+	for (i = 0; i < argc; ++i) {
+		struct map *map;
+		struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map);
+
+		if (symbol == NULL) {
+			printf("%s: not found\n", argv[i]);
+			continue;
+		}
+
+		printf("%s: %s %s %#" PRIx64 "-%#" PRIx64 " (%#" PRIx64 "-%#" PRIx64")\n",
+			symbol->name, map->dso->short_name, map->dso->long_name,
+			map->unmap_ip(map, symbol->start), map->unmap_ip(map, symbol->end),
+			symbol->start, symbol->end);
+	}
+
+	machine__delete(machine);
+	return 0;
+}
+
+int cmd_kallsyms(int argc, const char **argv)
+{
+	const struct option options[] = {
+	OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+	};
+	const char * const kallsyms_usage[] = {
+		"perf kallsyms [<options>] symbol_name",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, options, kallsyms_usage, 0);
+	if (argc < 1)
+		usage_with_options(kallsyms_usage, options);
+
+	symbol_conf.sort_by_name = true;
+	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+	if (symbol__init(NULL) < 0)
+		return -1;
+
+	return __cmd_kallsyms(argc, argv);
+}
diff --git a/builtin-kmem.c b/builtin-kmem.c
new file mode 100644
index 0000000..ae11e4c
--- /dev/null
+++ b/builtin-kmem.c
@@ -0,0 +1,2019 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/util.h"
+#include "util/config.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/callchain.h"
+#include "util/time-utils.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+#include "util/data.h"
+#include "util/cpumap.h"
+
+#include "util/debug.h"
+
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <regex.h>
+
+#include "sane_ctype.h"
+
+static int	kmem_slab;
+static int	kmem_page;
+
+static long	kmem_page_size;
+static enum {
+	KMEM_SLAB,
+	KMEM_PAGE,
+} kmem_default = KMEM_SLAB;  /* for backward compatibility */
+
+struct alloc_stat;
+typedef int (*sort_fn_t)(void *, void *);
+
+static int			alloc_flag;
+static int			caller_flag;
+
+static int			alloc_lines = -1;
+static int			caller_lines = -1;
+
+static bool			raw_ip;
+
+struct alloc_stat {
+	u64	call_site;
+	u64	ptr;
+	u64	bytes_req;
+	u64	bytes_alloc;
+	u64	last_alloc;
+	u32	hit;
+	u32	pingpong;
+
+	short	alloc_cpu;
+
+	struct rb_node node;
+};
+
+static struct rb_root root_alloc_stat;
+static struct rb_root root_alloc_sorted;
+static struct rb_root root_caller_stat;
+static struct rb_root root_caller_sorted;
+
+static unsigned long total_requested, total_allocated, total_freed;
+static unsigned long nr_allocs, nr_cross_allocs;
+
+/* filters for controlling start and stop of time of analysis */
+static struct perf_time_interval ptime;
+const char *time_str;
+
+static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+			     int bytes_req, int bytes_alloc, int cpu)
+{
+	struct rb_node **node = &root_alloc_stat.rb_node;
+	struct rb_node *parent = NULL;
+	struct alloc_stat *data = NULL;
+
+	while (*node) {
+		parent = *node;
+		data = rb_entry(*node, struct alloc_stat, node);
+
+		if (ptr > data->ptr)
+			node = &(*node)->rb_right;
+		else if (ptr < data->ptr)
+			node = &(*node)->rb_left;
+		else
+			break;
+	}
+
+	if (data && data->ptr == ptr) {
+		data->hit++;
+		data->bytes_req += bytes_req;
+		data->bytes_alloc += bytes_alloc;
+	} else {
+		data = malloc(sizeof(*data));
+		if (!data) {
+			pr_err("%s: malloc failed\n", __func__);
+			return -1;
+		}
+		data->ptr = ptr;
+		data->pingpong = 0;
+		data->hit = 1;
+		data->bytes_req = bytes_req;
+		data->bytes_alloc = bytes_alloc;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &root_alloc_stat);
+	}
+	data->call_site = call_site;
+	data->alloc_cpu = cpu;
+	data->last_alloc = bytes_alloc;
+
+	return 0;
+}
+
+static int insert_caller_stat(unsigned long call_site,
+			      int bytes_req, int bytes_alloc)
+{
+	struct rb_node **node = &root_caller_stat.rb_node;
+	struct rb_node *parent = NULL;
+	struct alloc_stat *data = NULL;
+
+	while (*node) {
+		parent = *node;
+		data = rb_entry(*node, struct alloc_stat, node);
+
+		if (call_site > data->call_site)
+			node = &(*node)->rb_right;
+		else if (call_site < data->call_site)
+			node = &(*node)->rb_left;
+		else
+			break;
+	}
+
+	if (data && data->call_site == call_site) {
+		data->hit++;
+		data->bytes_req += bytes_req;
+		data->bytes_alloc += bytes_alloc;
+	} else {
+		data = malloc(sizeof(*data));
+		if (!data) {
+			pr_err("%s: malloc failed\n", __func__);
+			return -1;
+		}
+		data->call_site = call_site;
+		data->pingpong = 0;
+		data->hit = 1;
+		data->bytes_req = bytes_req;
+		data->bytes_alloc = bytes_alloc;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &root_caller_stat);
+	}
+
+	return 0;
+}
+
+static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
+					   struct perf_sample *sample)
+{
+	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
+		      call_site = perf_evsel__intval(evsel, sample, "call_site");
+	int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
+	    bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
+
+	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
+	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
+		return -1;
+
+	total_requested += bytes_req;
+	total_allocated += bytes_alloc;
+
+	nr_allocs++;
+	return 0;
+}
+
+static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
+						struct perf_sample *sample)
+{
+	int ret = perf_evsel__process_alloc_event(evsel, sample);
+
+	if (!ret) {
+		int node1 = cpu__get_node(sample->cpu),
+		    node2 = perf_evsel__intval(evsel, sample, "node");
+
+		if (node1 != node2)
+			nr_cross_allocs++;
+	}
+
+	return ret;
+}
+
+static int ptr_cmp(void *, void *);
+static int slab_callsite_cmp(void *, void *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+					    unsigned long call_site,
+					    struct rb_root *root,
+					    sort_fn_t sort_fn)
+{
+	struct rb_node *node = root->rb_node;
+	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+	while (node) {
+		struct alloc_stat *data;
+		int cmp;
+
+		data = rb_entry(node, struct alloc_stat, node);
+
+		cmp = sort_fn(&key, data);
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static int perf_evsel__process_free_event(struct perf_evsel *evsel,
+					  struct perf_sample *sample)
+{
+	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
+	struct alloc_stat *s_alloc, *s_caller;
+
+	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+	if (!s_alloc)
+		return 0;
+
+	total_freed += s_alloc->last_alloc;
+
+	if ((short)sample->cpu != s_alloc->alloc_cpu) {
+		s_alloc->pingpong++;
+
+		s_caller = search_alloc_stat(0, s_alloc->call_site,
+					     &root_caller_stat,
+					     slab_callsite_cmp);
+		if (!s_caller)
+			return -1;
+		s_caller->pingpong++;
+	}
+	s_alloc->alloc_cpu = -1;
+
+	return 0;
+}
+
+static u64 total_page_alloc_bytes;
+static u64 total_page_free_bytes;
+static u64 total_page_nomatch_bytes;
+static u64 total_page_fail_bytes;
+static unsigned long nr_page_allocs;
+static unsigned long nr_page_frees;
+static unsigned long nr_page_fails;
+static unsigned long nr_page_nomatch;
+
+static bool use_pfn;
+static bool live_page;
+static struct perf_session *kmem_session;
+
+#define MAX_MIGRATE_TYPES  6
+#define MAX_PAGE_ORDER     11
+
+static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
+
+struct page_stat {
+	struct rb_node 	node;
+	u64 		page;
+	u64 		callsite;
+	int 		order;
+	unsigned 	gfp_flags;
+	unsigned 	migrate_type;
+	u64		alloc_bytes;
+	u64 		free_bytes;
+	int 		nr_alloc;
+	int 		nr_free;
+};
+
+static struct rb_root page_live_tree;
+static struct rb_root page_alloc_tree;
+static struct rb_root page_alloc_sorted;
+static struct rb_root page_caller_tree;
+static struct rb_root page_caller_sorted;
+
+struct alloc_func {
+	u64 start;
+	u64 end;
+	char *name;
+};
+
+static int nr_alloc_funcs;
+static struct alloc_func *alloc_func_list;
+
+static int funcmp(const void *a, const void *b)
+{
+	const struct alloc_func *fa = a;
+	const struct alloc_func *fb = b;
+
+	if (fa->start > fb->start)
+		return 1;
+	else
+		return -1;
+}
+
+static int callcmp(const void *a, const void *b)
+{
+	const struct alloc_func *fa = a;
+	const struct alloc_func *fb = b;
+
+	if (fb->start <= fa->start && fa->end < fb->end)
+		return 0;
+
+	if (fa->start > fb->start)
+		return 1;
+	else
+		return -1;
+}
+
+static int build_alloc_func_list(void)
+{
+	int ret;
+	struct map *kernel_map;
+	struct symbol *sym;
+	struct rb_node *node;
+	struct alloc_func *func;
+	struct machine *machine = &kmem_session->machines.host;
+	regex_t alloc_func_regex;
+	const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
+
+	ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
+	if (ret) {
+		char err[BUFSIZ];
+
+		regerror(ret, &alloc_func_regex, err, sizeof(err));
+		pr_err("Invalid regex: %s\n%s", pattern, err);
+		return -EINVAL;
+	}
+
+	kernel_map = machine__kernel_map(machine);
+	if (map__load(kernel_map) < 0) {
+		pr_err("cannot load kernel map\n");
+		return -ENOENT;
+	}
+
+	map__for_each_symbol(kernel_map, sym, node) {
+		if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
+			continue;
+
+		func = realloc(alloc_func_list,
+			       (nr_alloc_funcs + 1) * sizeof(*func));
+		if (func == NULL)
+			return -ENOMEM;
+
+		pr_debug("alloc func: %s\n", sym->name);
+		func[nr_alloc_funcs].start = sym->start;
+		func[nr_alloc_funcs].end   = sym->end;
+		func[nr_alloc_funcs].name  = sym->name;
+
+		alloc_func_list = func;
+		nr_alloc_funcs++;
+	}
+
+	qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
+
+	regfree(&alloc_func_regex);
+	return 0;
+}
+
+/*
+ * Find first non-memory allocation function from callchain.
+ * The allocation functions are in the 'alloc_func_list'.
+ */
+static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+	struct addr_location al;
+	struct machine *machine = &kmem_session->machines.host;
+	struct callchain_cursor_node *node;
+
+	if (alloc_func_list == NULL) {
+		if (build_alloc_func_list() < 0)
+			goto out;
+	}
+
+	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
+
+	callchain_cursor_commit(&callchain_cursor);
+	while (true) {
+		struct alloc_func key, *caller;
+		u64 addr;
+
+		node = callchain_cursor_current(&callchain_cursor);
+		if (node == NULL)
+			break;
+
+		key.start = key.end = node->ip;
+		caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
+				 sizeof(key), callcmp);
+		if (!caller) {
+			/* found */
+			if (node->map)
+				addr = map__unmap_ip(node->map, node->ip);
+			else
+				addr = node->ip;
+
+			return addr;
+		} else
+			pr_debug3("skipping alloc function: %s\n", caller->name);
+
+		callchain_cursor_advance(&callchain_cursor);
+	}
+
+out:
+	pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
+	return sample->ip;
+}
+
+struct sort_dimension {
+	const char		name[20];
+	sort_fn_t		cmp;
+	struct list_head	list;
+};
+
+static LIST_HEAD(page_alloc_sort_input);
+static LIST_HEAD(page_caller_sort_input);
+
+static struct page_stat *
+__page_stat__findnew_page(struct page_stat *pstat, bool create)
+{
+	struct rb_node **node = &page_live_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_stat *data;
+
+	while (*node) {
+		s64 cmp;
+
+		parent = *node;
+		data = rb_entry(*node, struct page_stat, node);
+
+		cmp = data->page - pstat->page;
+		if (cmp < 0)
+			node = &parent->rb_left;
+		else if (cmp > 0)
+			node = &parent->rb_right;
+		else
+			return data;
+	}
+
+	if (!create)
+		return NULL;
+
+	data = zalloc(sizeof(*data));
+	if (data != NULL) {
+		data->page = pstat->page;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &page_live_tree);
+	}
+
+	return data;
+}
+
+static struct page_stat *page_stat__find_page(struct page_stat *pstat)
+{
+	return __page_stat__findnew_page(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
+{
+	return __page_stat__findnew_page(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
+{
+	struct rb_node **node = &page_alloc_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_stat *data;
+	struct sort_dimension *sort;
+
+	while (*node) {
+		int cmp = 0;
+
+		parent = *node;
+		data = rb_entry(*node, struct page_stat, node);
+
+		list_for_each_entry(sort, &page_alloc_sort_input, list) {
+			cmp = sort->cmp(pstat, data);
+			if (cmp)
+				break;
+		}
+
+		if (cmp < 0)
+			node = &parent->rb_left;
+		else if (cmp > 0)
+			node = &parent->rb_right;
+		else
+			return data;
+	}
+
+	if (!create)
+		return NULL;
+
+	data = zalloc(sizeof(*data));
+	if (data != NULL) {
+		data->page = pstat->page;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &page_alloc_tree);
+	}
+
+	return data;
+}
+
+static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
+{
+	return __page_stat__findnew_alloc(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
+{
+	return __page_stat__findnew_alloc(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_caller(struct page_stat *pstat, bool create)
+{
+	struct rb_node **node = &page_caller_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_stat *data;
+	struct sort_dimension *sort;
+
+	while (*node) {
+		int cmp = 0;
+
+		parent = *node;
+		data = rb_entry(*node, struct page_stat, node);
+
+		list_for_each_entry(sort, &page_caller_sort_input, list) {
+			cmp = sort->cmp(pstat, data);
+			if (cmp)
+				break;
+		}
+
+		if (cmp < 0)
+			node = &parent->rb_left;
+		else if (cmp > 0)
+			node = &parent->rb_right;
+		else
+			return data;
+	}
+
+	if (!create)
+		return NULL;
+
+	data = zalloc(sizeof(*data));
+	if (data != NULL) {
+		data->callsite = pstat->callsite;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &page_caller_tree);
+	}
+
+	return data;
+}
+
+static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
+{
+	return __page_stat__findnew_caller(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
+{
+	return __page_stat__findnew_caller(pstat, true);
+}
+
+static bool valid_page(u64 pfn_or_page)
+{
+	if (use_pfn && pfn_or_page == -1UL)
+		return false;
+	if (!use_pfn && pfn_or_page == 0)
+		return false;
+	return true;
+}
+
+struct gfp_flag {
+	unsigned int flags;
+	char *compact_str;
+	char *human_readable;
+};
+
+static struct gfp_flag *gfps;
+static int nr_gfps;
+
+static int gfpcmp(const void *a, const void *b)
+{
+	const struct gfp_flag *fa = a;
+	const struct gfp_flag *fb = b;
+
+	return fa->flags - fb->flags;
+}
+
+/* see include/trace/events/mmflags.h */
+static const struct {
+	const char *original;
+	const char *compact;
+} gfp_compact_table[] = {
+	{ "GFP_TRANSHUGE",		"THP" },
+	{ "GFP_TRANSHUGE_LIGHT",	"THL" },
+	{ "GFP_HIGHUSER_MOVABLE",	"HUM" },
+	{ "GFP_HIGHUSER",		"HU" },
+	{ "GFP_USER",			"U" },
+	{ "GFP_KERNEL_ACCOUNT",		"KAC" },
+	{ "GFP_KERNEL",			"K" },
+	{ "GFP_NOFS",			"NF" },
+	{ "GFP_ATOMIC",			"A" },
+	{ "GFP_NOIO",			"NI" },
+	{ "GFP_NOWAIT",			"NW" },
+	{ "GFP_DMA",			"D" },
+	{ "__GFP_HIGHMEM",		"HM" },
+	{ "GFP_DMA32",			"D32" },
+	{ "__GFP_HIGH",			"H" },
+	{ "__GFP_ATOMIC",		"_A" },
+	{ "__GFP_IO",			"I" },
+	{ "__GFP_FS",			"F" },
+	{ "__GFP_NOWARN",		"NWR" },
+	{ "__GFP_RETRY_MAYFAIL",	"R" },
+	{ "__GFP_NOFAIL",		"NF" },
+	{ "__GFP_NORETRY",		"NR" },
+	{ "__GFP_COMP",			"C" },
+	{ "__GFP_ZERO",			"Z" },
+	{ "__GFP_NOMEMALLOC",		"NMA" },
+	{ "__GFP_MEMALLOC",		"MA" },
+	{ "__GFP_HARDWALL",		"HW" },
+	{ "__GFP_THISNODE",		"TN" },
+	{ "__GFP_RECLAIMABLE",		"RC" },
+	{ "__GFP_MOVABLE",		"M" },
+	{ "__GFP_ACCOUNT",		"AC" },
+	{ "__GFP_WRITE",		"WR" },
+	{ "__GFP_RECLAIM",		"R" },
+	{ "__GFP_DIRECT_RECLAIM",	"DR" },
+	{ "__GFP_KSWAPD_RECLAIM",	"KR" },
+};
+
+static size_t max_gfp_len;
+
+static char *compact_gfp_flags(char *gfp_flags)
+{
+	char *orig_flags = strdup(gfp_flags);
+	char *new_flags = NULL;
+	char *str, *pos = NULL;
+	size_t len = 0;
+
+	if (orig_flags == NULL)
+		return NULL;
+
+	str = strtok_r(orig_flags, "|", &pos);
+	while (str) {
+		size_t i;
+		char *new;
+		const char *cpt;
+
+		for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
+			if (strcmp(gfp_compact_table[i].original, str))
+				continue;
+
+			cpt = gfp_compact_table[i].compact;
+			new = realloc(new_flags, len + strlen(cpt) + 2);
+			if (new == NULL) {
+				free(new_flags);
+				return NULL;
+			}
+
+			new_flags = new;
+
+			if (!len) {
+				strcpy(new_flags, cpt);
+			} else {
+				strcat(new_flags, "|");
+				strcat(new_flags, cpt);
+				len++;
+			}
+
+			len += strlen(cpt);
+		}
+
+		str = strtok_r(NULL, "|", &pos);
+	}
+
+	if (max_gfp_len < len)
+		max_gfp_len = len;
+
+	free(orig_flags);
+	return new_flags;
+}
+
+static char *compact_gfp_string(unsigned long gfp_flags)
+{
+	struct gfp_flag key = {
+		.flags = gfp_flags,
+	};
+	struct gfp_flag *gfp;
+
+	gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+	if (gfp)
+		return gfp->compact_str;
+
+	return NULL;
+}
+
+static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
+			   unsigned int gfp_flags)
+{
+	struct pevent_record record = {
+		.cpu = sample->cpu,
+		.data = sample->raw_data,
+		.size = sample->raw_size,
+	};
+	struct trace_seq seq;
+	char *str, *pos = NULL;
+
+	if (nr_gfps) {
+		struct gfp_flag key = {
+			.flags = gfp_flags,
+		};
+
+		if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
+			return 0;
+	}
+
+	trace_seq_init(&seq);
+	pevent_event_info(&seq, evsel->tp_format, &record);
+
+	str = strtok_r(seq.buffer, " ", &pos);
+	while (str) {
+		if (!strncmp(str, "gfp_flags=", 10)) {
+			struct gfp_flag *new;
+
+			new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
+			if (new == NULL)
+				return -ENOMEM;
+
+			gfps = new;
+			new += nr_gfps++;
+
+			new->flags = gfp_flags;
+			new->human_readable = strdup(str + 10);
+			new->compact_str = compact_gfp_flags(str + 10);
+			if (!new->human_readable || !new->compact_str)
+				return -ENOMEM;
+
+			qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+		}
+
+		str = strtok_r(NULL, " ", &pos);
+	}
+
+	trace_seq_destroy(&seq);
+	return 0;
+}
+
+static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
+						struct perf_sample *sample)
+{
+	u64 page;
+	unsigned int order = perf_evsel__intval(evsel, sample, "order");
+	unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
+	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
+						       "migratetype");
+	u64 bytes = kmem_page_size << order;
+	u64 callsite;
+	struct page_stat *pstat;
+	struct page_stat this = {
+		.order = order,
+		.gfp_flags = gfp_flags,
+		.migrate_type = migrate_type,
+	};
+
+	if (use_pfn)
+		page = perf_evsel__intval(evsel, sample, "pfn");
+	else
+		page = perf_evsel__intval(evsel, sample, "page");
+
+	nr_page_allocs++;
+	total_page_alloc_bytes += bytes;
+
+	if (!valid_page(page)) {
+		nr_page_fails++;
+		total_page_fail_bytes += bytes;
+
+		return 0;
+	}
+
+	if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
+		return -1;
+
+	callsite = find_callsite(evsel, sample);
+
+	/*
+	 * This is to find the current page (with correct gfp flags and
+	 * migrate type) at free event.
+	 */
+	this.page = page;
+	pstat = page_stat__findnew_page(&this);
+	if (pstat == NULL)
+		return -ENOMEM;
+
+	pstat->nr_alloc++;
+	pstat->alloc_bytes += bytes;
+	pstat->callsite = callsite;
+
+	if (!live_page) {
+		pstat = page_stat__findnew_alloc(&this);
+		if (pstat == NULL)
+			return -ENOMEM;
+
+		pstat->nr_alloc++;
+		pstat->alloc_bytes += bytes;
+		pstat->callsite = callsite;
+	}
+
+	this.callsite = callsite;
+	pstat = page_stat__findnew_caller(&this);
+	if (pstat == NULL)
+		return -ENOMEM;
+
+	pstat->nr_alloc++;
+	pstat->alloc_bytes += bytes;
+
+	order_stats[order][migrate_type]++;
+
+	return 0;
+}
+
+static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
+						struct perf_sample *sample)
+{
+	u64 page;
+	unsigned int order = perf_evsel__intval(evsel, sample, "order");
+	u64 bytes = kmem_page_size << order;
+	struct page_stat *pstat;
+	struct page_stat this = {
+		.order = order,
+	};
+
+	if (use_pfn)
+		page = perf_evsel__intval(evsel, sample, "pfn");
+	else
+		page = perf_evsel__intval(evsel, sample, "page");
+
+	nr_page_frees++;
+	total_page_free_bytes += bytes;
+
+	this.page = page;
+	pstat = page_stat__find_page(&this);
+	if (pstat == NULL) {
+		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
+			  page, order);
+
+		nr_page_nomatch++;
+		total_page_nomatch_bytes += bytes;
+
+		return 0;
+	}
+
+	this.gfp_flags = pstat->gfp_flags;
+	this.migrate_type = pstat->migrate_type;
+	this.callsite = pstat->callsite;
+
+	rb_erase(&pstat->node, &page_live_tree);
+	free(pstat);
+
+	if (live_page) {
+		order_stats[this.order][this.migrate_type]--;
+	} else {
+		pstat = page_stat__find_alloc(&this);
+		if (pstat == NULL)
+			return -ENOMEM;
+
+		pstat->nr_free++;
+		pstat->free_bytes += bytes;
+	}
+
+	pstat = page_stat__find_caller(&this);
+	if (pstat == NULL)
+		return -ENOENT;
+
+	pstat->nr_free++;
+	pstat->free_bytes += bytes;
+
+	if (live_page) {
+		pstat->nr_alloc--;
+		pstat->alloc_bytes -= bytes;
+
+		if (pstat->nr_alloc == 0) {
+			rb_erase(&pstat->node, &page_caller_tree);
+			free(pstat);
+		}
+	}
+
+	return 0;
+}
+
+static bool perf_kmem__skip_sample(struct perf_sample *sample)
+{
+	/* skip sample based on time? */
+	if (perf_time__skip_sample(&ptime, sample->time))
+		return true;
+
+	return false;
+}
+
+typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
+				  struct perf_sample *sample);
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	int err = 0;
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
+
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
+	if (perf_kmem__skip_sample(sample))
+		return 0;
+
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
+		err = f(evsel, sample);
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
+static struct perf_tool perf_kmem = {
+	.sample		 = process_sample_event,
+	.comm		 = perf_event__process_comm,
+	.mmap		 = perf_event__process_mmap,
+	.mmap2		 = perf_event__process_mmap2,
+	.namespaces	 = perf_event__process_namespaces,
+	.ordered_events	 = true,
+};
+
+static double fragmentation(unsigned long n_req, unsigned long n_alloc)
+{
+	if (n_alloc == 0)
+		return 0.0;
+	else
+		return 100.0 - (100.0 * n_req / n_alloc);
+}
+
+static void __print_slab_result(struct rb_root *root,
+				struct perf_session *session,
+				int n_lines, int is_caller)
+{
+	struct rb_node *next;
+	struct machine *machine = &session->machines.host;
+
+	printf("%.105s\n", graph_dotted_line);
+	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
+	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
+	printf("%.105s\n", graph_dotted_line);
+
+	next = rb_first(root);
+
+	while (next && n_lines--) {
+		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
+						   node);
+		struct symbol *sym = NULL;
+		struct map *map;
+		char buf[BUFSIZ];
+		u64 addr;
+
+		if (is_caller) {
+			addr = data->call_site;
+			if (!raw_ip)
+				sym = machine__find_kernel_function(machine, addr, &map);
+		} else
+			addr = data->ptr;
+
+		if (sym != NULL)
+			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
+				 addr - map->unmap_ip(map, sym->start));
+		else
+			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
+		printf(" %-34s |", buf);
+
+		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
+		       (unsigned long long)data->bytes_alloc,
+		       (unsigned long)data->bytes_alloc / data->hit,
+		       (unsigned long long)data->bytes_req,
+		       (unsigned long)data->bytes_req / data->hit,
+		       (unsigned long)data->hit,
+		       (unsigned long)data->pingpong,
+		       fragmentation(data->bytes_req, data->bytes_alloc));
+
+		next = rb_next(next);
+	}
+
+	if (n_lines == -1)
+		printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
+
+	printf("%.105s\n", graph_dotted_line);
+}
+
+static const char * const migrate_type_str[] = {
+	"UNMOVABL",
+	"RECLAIM",
+	"MOVABLE",
+	"RESERVED",
+	"CMA/ISLT",
+	"UNKNOWN",
+};
+
+static void __print_page_alloc_result(struct perf_session *session, int n_lines)
+{
+	struct rb_node *next = rb_first(&page_alloc_sorted);
+	struct machine *machine = &session->machines.host;
+	const char *format;
+	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
+
+	printf("\n%.105s\n", graph_dotted_line);
+	printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
+	       use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
+	       gfp_len, "GFP flags");
+	printf("%.105s\n", graph_dotted_line);
+
+	if (use_pfn)
+		format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
+	else
+		format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
+
+	while (next && n_lines--) {
+		struct page_stat *data;
+		struct symbol *sym;
+		struct map *map;
+		char buf[32];
+		char *caller = buf;
+
+		data = rb_entry(next, struct page_stat, node);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
+		if (sym)
+			caller = sym->name;
+		else
+			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+		printf(format, (unsigned long long)data->page,
+		       (unsigned long long)data->alloc_bytes / 1024,
+		       data->nr_alloc, data->order,
+		       migrate_type_str[data->migrate_type],
+		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
+
+		next = rb_next(next);
+	}
+
+	if (n_lines == -1) {
+		printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
+		       gfp_len, "...");
+	}
+
+	printf("%.105s\n", graph_dotted_line);
+}
+
+static void __print_page_caller_result(struct perf_session *session, int n_lines)
+{
+	struct rb_node *next = rb_first(&page_caller_sorted);
+	struct machine *machine = &session->machines.host;
+	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
+
+	printf("\n%.105s\n", graph_dotted_line);
+	printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
+	       live_page ? "Live" : "Total", gfp_len, "GFP flags");
+	printf("%.105s\n", graph_dotted_line);
+
+	while (next && n_lines--) {
+		struct page_stat *data;
+		struct symbol *sym;
+		struct map *map;
+		char buf[32];
+		char *caller = buf;
+
+		data = rb_entry(next, struct page_stat, node);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
+		if (sym)
+			caller = sym->name;
+		else
+			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+		printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
+		       (unsigned long long)data->alloc_bytes / 1024,
+		       data->nr_alloc, data->order,
+		       migrate_type_str[data->migrate_type],
+		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
+
+		next = rb_next(next);
+	}
+
+	if (n_lines == -1) {
+		printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
+		       gfp_len, "...");
+	}
+
+	printf("%.105s\n", graph_dotted_line);
+}
+
+static void print_gfp_flags(void)
+{
+	int i;
+
+	printf("#\n");
+	printf("# GFP flags\n");
+	printf("# ---------\n");
+	for (i = 0; i < nr_gfps; i++) {
+		printf("# %08x: %*s: %s\n", gfps[i].flags,
+		       (int) max_gfp_len, gfps[i].compact_str,
+		       gfps[i].human_readable);
+	}
+}
+
+static void print_slab_summary(void)
+{
+	printf("\nSUMMARY (SLAB allocator)");
+	printf("\n========================\n");
+	printf("Total bytes requested: %'lu\n", total_requested);
+	printf("Total bytes allocated: %'lu\n", total_allocated);
+	printf("Total bytes freed:     %'lu\n", total_freed);
+	if (total_allocated > total_freed) {
+		printf("Net total bytes allocated: %'lu\n",
+		total_allocated - total_freed);
+	}
+	printf("Total bytes wasted on internal fragmentation: %'lu\n",
+	       total_allocated - total_requested);
+	printf("Internal fragmentation: %f%%\n",
+	       fragmentation(total_requested, total_allocated));
+	printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
+}
+
+static void print_page_summary(void)
+{
+	int o, m;
+	u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
+	u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
+
+	printf("\nSUMMARY (page allocator)");
+	printf("\n========================\n");
+	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
+	       nr_page_allocs, total_page_alloc_bytes / 1024);
+	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
+	       nr_page_frees, total_page_free_bytes / 1024);
+	printf("\n");
+
+	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+	       nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
+	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+	       nr_page_allocs - nr_alloc_freed,
+	       (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
+	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
+	       nr_page_nomatch, total_page_nomatch_bytes / 1024);
+	printf("\n");
+
+	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
+	       nr_page_fails, total_page_fail_bytes / 1024);
+	printf("\n");
+
+	printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
+	       "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
+	printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
+	       graph_dotted_line, graph_dotted_line, graph_dotted_line,
+	       graph_dotted_line, graph_dotted_line);
+
+	for (o = 0; o < MAX_PAGE_ORDER; o++) {
+		printf("%5d", o);
+		for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
+			if (order_stats[o][m])
+				printf("  %'12d", order_stats[o][m]);
+			else
+				printf("  %12c", '.');
+		}
+		printf("\n");
+	}
+}
+
+static void print_slab_result(struct perf_session *session)
+{
+	if (caller_flag)
+		__print_slab_result(&root_caller_sorted, session, caller_lines, 1);
+	if (alloc_flag)
+		__print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
+	print_slab_summary();
+}
+
+static void print_page_result(struct perf_session *session)
+{
+	if (caller_flag || alloc_flag)
+		print_gfp_flags();
+	if (caller_flag)
+		__print_page_caller_result(session, caller_lines);
+	if (alloc_flag)
+		__print_page_alloc_result(session, alloc_lines);
+	print_page_summary();
+}
+
+static void print_result(struct perf_session *session)
+{
+	if (kmem_slab)
+		print_slab_result(session);
+	if (kmem_page)
+		print_page_result(session);
+}
+
+static LIST_HEAD(slab_caller_sort);
+static LIST_HEAD(slab_alloc_sort);
+static LIST_HEAD(page_caller_sort);
+static LIST_HEAD(page_alloc_sort);
+
+static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
+			     struct list_head *sort_list)
+{
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+	struct sort_dimension *sort;
+
+	while (*new) {
+		struct alloc_stat *this;
+		int cmp = 0;
+
+		this = rb_entry(*new, struct alloc_stat, node);
+		parent = *new;
+
+		list_for_each_entry(sort, sort_list, list) {
+			cmp = sort->cmp(data, this);
+			if (cmp)
+				break;
+		}
+
+		if (cmp > 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
+			       struct list_head *sort_list)
+{
+	struct rb_node *node;
+	struct alloc_stat *data;
+
+	for (;;) {
+		node = rb_first(root);
+		if (!node)
+			break;
+
+		rb_erase(node, root);
+		data = rb_entry(node, struct alloc_stat, node);
+		sort_slab_insert(root_sorted, data, sort_list);
+	}
+}
+
+static void sort_page_insert(struct rb_root *root, struct page_stat *data,
+			     struct list_head *sort_list)
+{
+	struct rb_node **new = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct sort_dimension *sort;
+
+	while (*new) {
+		struct page_stat *this;
+		int cmp = 0;
+
+		this = rb_entry(*new, struct page_stat, node);
+		parent = *new;
+
+		list_for_each_entry(sort, sort_list, list) {
+			cmp = sort->cmp(data, this);
+			if (cmp)
+				break;
+		}
+
+		if (cmp > 0)
+			new = &parent->rb_left;
+		else
+			new = &parent->rb_right;
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
+			       struct list_head *sort_list)
+{
+	struct rb_node *node;
+	struct page_stat *data;
+
+	for (;;) {
+		node = rb_first(root);
+		if (!node)
+			break;
+
+		rb_erase(node, root);
+		data = rb_entry(node, struct page_stat, node);
+		sort_page_insert(root_sorted, data, sort_list);
+	}
+}
+
+static void sort_result(void)
+{
+	if (kmem_slab) {
+		__sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
+				   &slab_alloc_sort);
+		__sort_slab_result(&root_caller_stat, &root_caller_sorted,
+				   &slab_caller_sort);
+	}
+	if (kmem_page) {
+		if (live_page)
+			__sort_page_result(&page_live_tree, &page_alloc_sorted,
+					   &page_alloc_sort);
+		else
+			__sort_page_result(&page_alloc_tree, &page_alloc_sorted,
+					   &page_alloc_sort);
+
+		__sort_page_result(&page_caller_tree, &page_caller_sorted,
+				   &page_caller_sort);
+	}
+}
+
+static int __cmd_kmem(struct perf_session *session)
+{
+	int err = -EINVAL;
+	struct perf_evsel *evsel;
+	const struct perf_evsel_str_handler kmem_tracepoints[] = {
+		/* slab allocator */
+		{ "kmem:kmalloc",		perf_evsel__process_alloc_event, },
+    		{ "kmem:kmem_cache_alloc",	perf_evsel__process_alloc_event, },
+		{ "kmem:kmalloc_node",		perf_evsel__process_alloc_node_event, },
+    		{ "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
+		{ "kmem:kfree",			perf_evsel__process_free_event, },
+    		{ "kmem:kmem_cache_free",	perf_evsel__process_free_event, },
+		/* page allocator */
+		{ "kmem:mm_page_alloc",		perf_evsel__process_page_alloc_event, },
+		{ "kmem:mm_page_free",		perf_evsel__process_page_free_event, },
+	};
+
+	if (!perf_session__has_traces(session, "kmem record"))
+		goto out;
+
+	if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
+		pr_err("Initializing perf session tracepoint handlers failed\n");
+		goto out;
+	}
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
+		    perf_evsel__field(evsel, "pfn")) {
+			use_pfn = true;
+			break;
+		}
+	}
+
+	setup_pager();
+	err = perf_session__process_events(session);
+	if (err != 0) {
+		pr_err("error during process events: %d\n", err);
+		goto out;
+	}
+	sort_result();
+	print_result(session);
+out:
+	return err;
+}
+
+/* slab sort keys */
+static int ptr_cmp(void *a, void *b)
+{
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
+	if (l->ptr < r->ptr)
+		return -1;
+	else if (l->ptr > r->ptr)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension ptr_sort_dimension = {
+	.name	= "ptr",
+	.cmp	= ptr_cmp,
+};
+
+static int slab_callsite_cmp(void *a, void *b)
+{
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
+	if (l->call_site < r->call_site)
+		return -1;
+	else if (l->call_site > r->call_site)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension callsite_sort_dimension = {
+	.name	= "callsite",
+	.cmp	= slab_callsite_cmp,
+};
+
+static int hit_cmp(void *a, void *b)
+{
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
+	if (l->hit < r->hit)
+		return -1;
+	else if (l->hit > r->hit)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension hit_sort_dimension = {
+	.name	= "hit",
+	.cmp	= hit_cmp,
+};
+
+static int bytes_cmp(void *a, void *b)
+{
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
+	if (l->bytes_alloc < r->bytes_alloc)
+		return -1;
+	else if (l->bytes_alloc > r->bytes_alloc)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension bytes_sort_dimension = {
+	.name	= "bytes",
+	.cmp	= bytes_cmp,
+};
+
+static int frag_cmp(void *a, void *b)
+{
+	double x, y;
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
+	x = fragmentation(l->bytes_req, l->bytes_alloc);
+	y = fragmentation(r->bytes_req, r->bytes_alloc);
+
+	if (x < y)
+		return -1;
+	else if (x > y)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension frag_sort_dimension = {
+	.name	= "frag",
+	.cmp	= frag_cmp,
+};
+
+static int pingpong_cmp(void *a, void *b)
+{
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
+	if (l->pingpong < r->pingpong)
+		return -1;
+	else if (l->pingpong > r->pingpong)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+	.name	= "pingpong",
+	.cmp	= pingpong_cmp,
+};
+
+/* page sort keys */
+static int page_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->page < r->page)
+		return -1;
+	else if (l->page > r->page)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_sort_dimension = {
+	.name	= "page",
+	.cmp	= page_cmp,
+};
+
+static int page_callsite_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->callsite < r->callsite)
+		return -1;
+	else if (l->callsite > r->callsite)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_callsite_sort_dimension = {
+	.name	= "callsite",
+	.cmp	= page_callsite_cmp,
+};
+
+static int page_hit_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->nr_alloc < r->nr_alloc)
+		return -1;
+	else if (l->nr_alloc > r->nr_alloc)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_hit_sort_dimension = {
+	.name	= "hit",
+	.cmp	= page_hit_cmp,
+};
+
+static int page_bytes_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->alloc_bytes < r->alloc_bytes)
+		return -1;
+	else if (l->alloc_bytes > r->alloc_bytes)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_bytes_sort_dimension = {
+	.name	= "bytes",
+	.cmp	= page_bytes_cmp,
+};
+
+static int page_order_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->order < r->order)
+		return -1;
+	else if (l->order > r->order)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_order_sort_dimension = {
+	.name	= "order",
+	.cmp	= page_order_cmp,
+};
+
+static int migrate_type_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	/* for internal use to find free'd page */
+	if (l->migrate_type == -1U)
+		return 0;
+
+	if (l->migrate_type < r->migrate_type)
+		return -1;
+	else if (l->migrate_type > r->migrate_type)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension migrate_type_sort_dimension = {
+	.name	= "migtype",
+	.cmp	= migrate_type_cmp,
+};
+
+static int gfp_flags_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	/* for internal use to find free'd page */
+	if (l->gfp_flags == -1U)
+		return 0;
+
+	if (l->gfp_flags < r->gfp_flags)
+		return -1;
+	else if (l->gfp_flags > r->gfp_flags)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension gfp_flags_sort_dimension = {
+	.name	= "gfp",
+	.cmp	= gfp_flags_cmp,
+};
+
+static struct sort_dimension *slab_sorts[] = {
+	&ptr_sort_dimension,
+	&callsite_sort_dimension,
+	&hit_sort_dimension,
+	&bytes_sort_dimension,
+	&frag_sort_dimension,
+	&pingpong_sort_dimension,
+};
+
+static struct sort_dimension *page_sorts[] = {
+	&page_sort_dimension,
+	&page_callsite_sort_dimension,
+	&page_hit_sort_dimension,
+	&page_bytes_sort_dimension,
+	&page_order_sort_dimension,
+	&migrate_type_sort_dimension,
+	&gfp_flags_sort_dimension,
+};
+
+static int slab_sort_dimension__add(const char *tok, struct list_head *list)
+{
+	struct sort_dimension *sort;
+	int i;
+
+	for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
+		if (!strcmp(slab_sorts[i]->name, tok)) {
+			sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
+			if (!sort) {
+				pr_err("%s: memdup failed\n", __func__);
+				return -1;
+			}
+			list_add_tail(&sort->list, list);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int page_sort_dimension__add(const char *tok, struct list_head *list)
+{
+	struct sort_dimension *sort;
+	int i;
+
+	for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
+		if (!strcmp(page_sorts[i]->name, tok)) {
+			sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
+			if (!sort) {
+				pr_err("%s: memdup failed\n", __func__);
+				return -1;
+			}
+			list_add_tail(&sort->list, list);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
+{
+	char *tok;
+	char *str = strdup(arg);
+	char *pos = str;
+
+	if (!str) {
+		pr_err("%s: strdup failed\n", __func__);
+		return -1;
+	}
+
+	while (true) {
+		tok = strsep(&pos, ",");
+		if (!tok)
+			break;
+		if (slab_sort_dimension__add(tok, sort_list) < 0) {
+			pr_err("Unknown slab --sort key: '%s'", tok);
+			free(str);
+			return -1;
+		}
+	}
+
+	free(str);
+	return 0;
+}
+
+static int setup_page_sorting(struct list_head *sort_list, const char *arg)
+{
+	char *tok;
+	char *str = strdup(arg);
+	char *pos = str;
+
+	if (!str) {
+		pr_err("%s: strdup failed\n", __func__);
+		return -1;
+	}
+
+	while (true) {
+		tok = strsep(&pos, ",");
+		if (!tok)
+			break;
+		if (page_sort_dimension__add(tok, sort_list) < 0) {
+			pr_err("Unknown page --sort key: '%s'", tok);
+			free(str);
+			return -1;
+		}
+	}
+
+	free(str);
+	return 0;
+}
+
+static int parse_sort_opt(const struct option *opt __maybe_unused,
+			  const char *arg, int unset __maybe_unused)
+{
+	if (!arg)
+		return -1;
+
+	if (kmem_page > kmem_slab ||
+	    (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
+		if (caller_flag > alloc_flag)
+			return setup_page_sorting(&page_caller_sort, arg);
+		else
+			return setup_page_sorting(&page_alloc_sort, arg);
+	} else {
+		if (caller_flag > alloc_flag)
+			return setup_slab_sorting(&slab_caller_sort, arg);
+		else
+			return setup_slab_sorting(&slab_alloc_sort, arg);
+	}
+
+	return 0;
+}
+
+static int parse_caller_opt(const struct option *opt __maybe_unused,
+			    const char *arg __maybe_unused,
+			    int unset __maybe_unused)
+{
+	caller_flag = (alloc_flag + 1);
+	return 0;
+}
+
+static int parse_alloc_opt(const struct option *opt __maybe_unused,
+			   const char *arg __maybe_unused,
+			   int unset __maybe_unused)
+{
+	alloc_flag = (caller_flag + 1);
+	return 0;
+}
+
+static int parse_slab_opt(const struct option *opt __maybe_unused,
+			  const char *arg __maybe_unused,
+			  int unset __maybe_unused)
+{
+	kmem_slab = (kmem_page + 1);
+	return 0;
+}
+
+static int parse_page_opt(const struct option *opt __maybe_unused,
+			  const char *arg __maybe_unused,
+			  int unset __maybe_unused)
+{
+	kmem_page = (kmem_slab + 1);
+	return 0;
+}
+
+static int parse_line_opt(const struct option *opt __maybe_unused,
+			  const char *arg, int unset __maybe_unused)
+{
+	int lines;
+
+	if (!arg)
+		return -1;
+
+	lines = strtoul(arg, NULL, 10);
+
+	if (caller_flag > alloc_flag)
+		caller_lines = lines;
+	else
+		alloc_lines = lines;
+
+	return 0;
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	const char * const record_args[] = {
+	"record", "-a", "-R", "-c", "1",
+	};
+	const char * const slab_events[] = {
+	"-e", "kmem:kmalloc",
+	"-e", "kmem:kmalloc_node",
+	"-e", "kmem:kfree",
+	"-e", "kmem:kmem_cache_alloc",
+	"-e", "kmem:kmem_cache_alloc_node",
+	"-e", "kmem:kmem_cache_free",
+	};
+	const char * const page_events[] = {
+	"-e", "kmem:mm_page_alloc",
+	"-e", "kmem:mm_page_free",
+	};
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+
+	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	if (kmem_slab)
+		rec_argc += ARRAY_SIZE(slab_events);
+	if (kmem_page)
+		rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
+
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	if (kmem_slab) {
+		for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
+			rec_argv[i] = strdup(slab_events[j]);
+	}
+	if (kmem_page) {
+		rec_argv[i++] = strdup("-g");
+
+		for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
+			rec_argv[i] = strdup(page_events[j]);
+	}
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	return cmd_record(i, rec_argv);
+}
+
+static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
+{
+	if (!strcmp(var, "kmem.default")) {
+		if (!strcmp(value, "slab"))
+			kmem_default = KMEM_SLAB;
+		else if (!strcmp(value, "page"))
+			kmem_default = KMEM_PAGE;
+		else
+			pr_err("invalid default value ('slab' or 'page' required): %s\n",
+			       value);
+		return 0;
+	}
+
+	return 0;
+}
+
+int cmd_kmem(int argc, const char **argv)
+{
+	const char * const default_slab_sort = "frag,hit,bytes";
+	const char * const default_page_sort = "bytes,hit";
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_READ,
+	};
+	const struct option kmem_options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
+			   "show per-callsite statistics", parse_caller_opt),
+	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
+			   "show per-allocation statistics", parse_alloc_opt),
+	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+		     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
+		     "page, order, migtype, gfp", parse_sort_opt),
+	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
+	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
+	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
+	OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
+			   parse_slab_opt),
+	OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
+			   parse_page_opt),
+	OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
+	OPT_STRING(0, "time", &time_str, "str",
+		   "Time span of interest (start,stop)"),
+	OPT_END()
+	};
+	const char *const kmem_subcommands[] = { "record", "stat", NULL };
+	const char *kmem_usage[] = {
+		NULL,
+		NULL
+	};
+	struct perf_session *session;
+	const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
+	int ret = perf_config(kmem_config, NULL);
+
+	if (ret)
+		return ret;
+
+	argc = parse_options_subcommand(argc, argv, kmem_options,
+					kmem_subcommands, kmem_usage, 0);
+
+	if (!argc)
+		usage_with_options(kmem_usage, kmem_options);
+
+	if (kmem_slab == 0 && kmem_page == 0) {
+		if (kmem_default == KMEM_SLAB)
+			kmem_slab = 1;
+		else
+			kmem_page = 1;
+	}
+
+	if (!strncmp(argv[0], "rec", 3)) {
+		symbol__init(NULL);
+		return __cmd_record(argc, argv);
+	}
+
+	data.file.path = input_name;
+
+	kmem_session = session = perf_session__new(&data, false, &perf_kmem);
+	if (session == NULL)
+		return -1;
+
+	ret = -1;
+
+	if (kmem_slab) {
+		if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+							  "kmem:kmalloc")) {
+			pr_err(errmsg, "slab", "slab");
+			goto out_delete;
+		}
+	}
+
+	if (kmem_page) {
+		struct perf_evsel *evsel;
+
+		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+							     "kmem:mm_page_alloc");
+		if (evsel == NULL) {
+			pr_err(errmsg, "page", "page");
+			goto out_delete;
+		}
+
+		kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+		symbol_conf.use_callchain = true;
+	}
+
+	symbol__init(&session->header.env);
+
+	if (perf_time__parse_str(&ptime, time_str) != 0) {
+		pr_err("Invalid time string\n");
+		ret = -EINVAL;
+		goto out_delete;
+	}
+
+	if (!strcmp(argv[0], "stat")) {
+		setlocale(LC_ALL, "");
+
+		if (cpu__setup_cpunode_map())
+			goto out_delete;
+
+		if (list_empty(&slab_caller_sort))
+			setup_slab_sorting(&slab_caller_sort, default_slab_sort);
+		if (list_empty(&slab_alloc_sort))
+			setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
+		if (list_empty(&page_caller_sort))
+			setup_page_sorting(&page_caller_sort, default_page_sort);
+		if (list_empty(&page_alloc_sort))
+			setup_page_sorting(&page_alloc_sort, default_page_sort);
+
+		if (kmem_page) {
+			setup_page_sorting(&page_alloc_sort_input,
+					   "page,order,migtype,gfp");
+			setup_page_sorting(&page_caller_sort_input,
+					   "callsite,order,migtype,gfp");
+		}
+		ret = __cmd_kmem(session);
+	} else
+		usage_with_options(kmem_usage, kmem_options);
+
+out_delete:
+	perf_session__delete(session);
+
+	return ret;
+}
+
diff --git a/builtin-kvm.c b/builtin-kvm.c
new file mode 100644
index 0000000..72e2ca0
--- /dev/null
+++ b/builtin-kvm.c
@@ -0,0 +1,1642 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/term.h"
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/intlist.h"
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+#include "util/debug.h"
+#include "util/tool.h"
+#include "util/stat.h"
+#include "util/top.h"
+#include "util/data.h"
+#include "util/ordered-events.h"
+
+#include <sys/prctl.h>
+#ifdef HAVE_TIMERFD_SUPPORT
+#include <sys/timerfd.h>
+#endif
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <termios.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <math.h>
+
+static const char *get_filename_for_perf_kvm(void)
+{
+	const char *filename;
+
+	if (perf_host && !perf_guest)
+		filename = strdup("perf.data.host");
+	else if (!perf_host && perf_guest)
+		filename = strdup("perf.data.guest");
+	else
+		filename = strdup("perf.data.kvm");
+
+	return filename;
+}
+
+#ifdef HAVE_KVM_STAT_SUPPORT
+#include "util/kvm-stat.h"
+
+void exit_event_get_key(struct perf_evsel *evsel,
+			struct perf_sample *sample,
+			struct event_key *key)
+{
+	key->info = 0;
+	key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
+}
+
+bool kvm_exit_event(struct perf_evsel *evsel)
+{
+	return !strcmp(evsel->name, kvm_exit_trace);
+}
+
+bool exit_event_begin(struct perf_evsel *evsel,
+		      struct perf_sample *sample, struct event_key *key)
+{
+	if (kvm_exit_event(evsel)) {
+		exit_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+
+bool kvm_entry_event(struct perf_evsel *evsel)
+{
+	return !strcmp(evsel->name, kvm_entry_trace);
+}
+
+bool exit_event_end(struct perf_evsel *evsel,
+		    struct perf_sample *sample __maybe_unused,
+		    struct event_key *key __maybe_unused)
+{
+	return kvm_entry_event(evsel);
+}
+
+static const char *get_exit_reason(struct perf_kvm_stat *kvm,
+				   struct exit_reasons_table *tbl,
+				   u64 exit_code)
+{
+	while (tbl->reason != NULL) {
+		if (tbl->exit_code == exit_code)
+			return tbl->reason;
+		tbl++;
+	}
+
+	pr_err("unknown kvm exit code:%lld on %s\n",
+		(unsigned long long)exit_code, kvm->exit_reasons_isa);
+	return "UNKNOWN";
+}
+
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+			   struct event_key *key,
+			   char *decode)
+{
+	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
+						  key->key);
+
+	scnprintf(decode, decode_str_len, "%s", exit_reason);
+}
+
+static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
+{
+	struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
+
+	for (events_ops = kvm_reg_events_ops; events_ops->name; events_ops++) {
+		if (!strcmp(events_ops->name, kvm->report_event)) {
+			kvm->events_ops = events_ops->ops;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+struct vcpu_event_record {
+	int vcpu_id;
+	u64 start_time;
+	struct kvm_event *last_event;
+};
+
+
+static void init_kvm_event_record(struct perf_kvm_stat *kvm)
+{
+	unsigned int i;
+
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++)
+		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static void clear_events_cache_stats(struct list_head *kvm_events_cache)
+{
+	struct list_head *head;
+	struct kvm_event *event;
+	unsigned int i;
+	int j;
+
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+		head = &kvm_events_cache[i];
+		list_for_each_entry(event, head, hash_entry) {
+			/* reset stats for event */
+			event->total.time = 0;
+			init_stats(&event->total.stats);
+
+			for (j = 0; j < event->max_vcpu; ++j) {
+				event->vcpu[j].time = 0;
+				init_stats(&event->vcpu[j].stats);
+			}
+		}
+	}
+}
+#endif
+
+static int kvm_events_hash_fn(u64 key)
+{
+	return key & (EVENTS_CACHE_SIZE - 1);
+}
+
+static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
+{
+	int old_max_vcpu = event->max_vcpu;
+	void *prev;
+
+	if (vcpu_id < event->max_vcpu)
+		return true;
+
+	while (event->max_vcpu <= vcpu_id)
+		event->max_vcpu += DEFAULT_VCPU_NUM;
+
+	prev = event->vcpu;
+	event->vcpu = realloc(event->vcpu,
+			      event->max_vcpu * sizeof(*event->vcpu));
+	if (!event->vcpu) {
+		free(prev);
+		pr_err("Not enough memory\n");
+		return false;
+	}
+
+	memset(event->vcpu + old_max_vcpu, 0,
+	       (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
+	return true;
+}
+
+static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
+{
+	struct kvm_event *event;
+
+	event = zalloc(sizeof(*event));
+	if (!event) {
+		pr_err("Not enough memory\n");
+		return NULL;
+	}
+
+	event->key = *key;
+	init_stats(&event->total.stats);
+	return event;
+}
+
+static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
+					       struct event_key *key)
+{
+	struct kvm_event *event;
+	struct list_head *head;
+
+	BUG_ON(key->key == INVALID_KEY);
+
+	head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
+	list_for_each_entry(event, head, hash_entry) {
+		if (event->key.key == key->key && event->key.info == key->info)
+			return event;
+	}
+
+	event = kvm_alloc_init_event(key);
+	if (!event)
+		return NULL;
+
+	list_add(&event->hash_entry, head);
+	return event;
+}
+
+static bool handle_begin_event(struct perf_kvm_stat *kvm,
+			       struct vcpu_event_record *vcpu_record,
+			       struct event_key *key, u64 timestamp)
+{
+	struct kvm_event *event = NULL;
+
+	if (key->key != INVALID_KEY)
+		event = find_create_kvm_event(kvm, key);
+
+	vcpu_record->last_event = event;
+	vcpu_record->start_time = timestamp;
+	return true;
+}
+
+static void
+kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
+{
+	kvm_stats->time += time_diff;
+	update_stats(&kvm_stats->stats, time_diff);
+}
+
+static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
+{
+	struct kvm_event_stats *kvm_stats = &event->total;
+
+	if (vcpu_id != -1)
+		kvm_stats = &event->vcpu[vcpu_id];
+
+	return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
+				avg_stats(&kvm_stats->stats));
+}
+
+static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
+			     u64 time_diff)
+{
+	if (vcpu_id == -1) {
+		kvm_update_event_stats(&event->total, time_diff);
+		return true;
+	}
+
+	if (!kvm_event_expand(event, vcpu_id))
+		return false;
+
+	kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
+	return true;
+}
+
+static bool is_child_event(struct perf_kvm_stat *kvm,
+			   struct perf_evsel *evsel,
+			   struct perf_sample *sample,
+			   struct event_key *key)
+{
+	struct child_event_ops *child_ops;
+
+	child_ops = kvm->events_ops->child_ops;
+
+	if (!child_ops)
+		return false;
+
+	for (; child_ops->name; child_ops++) {
+		if (!strcmp(evsel->name, child_ops->name)) {
+			child_ops->get_key(evsel, sample, key);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool handle_child_event(struct perf_kvm_stat *kvm,
+			       struct vcpu_event_record *vcpu_record,
+			       struct event_key *key,
+			       struct perf_sample *sample __maybe_unused)
+{
+	struct kvm_event *event = NULL;
+
+	if (key->key != INVALID_KEY)
+		event = find_create_kvm_event(kvm, key);
+
+	vcpu_record->last_event = event;
+
+	return true;
+}
+
+static bool skip_event(const char *event)
+{
+	const char * const *skip_events;
+
+	for (skip_events = kvm_skip_events; *skip_events; skip_events++)
+		if (!strcmp(event, *skip_events))
+			return true;
+
+	return false;
+}
+
+static bool handle_end_event(struct perf_kvm_stat *kvm,
+			     struct vcpu_event_record *vcpu_record,
+			     struct event_key *key,
+			     struct perf_sample *sample)
+{
+	struct kvm_event *event;
+	u64 time_begin, time_diff;
+	int vcpu;
+
+	if (kvm->trace_vcpu == -1)
+		vcpu = -1;
+	else
+		vcpu = vcpu_record->vcpu_id;
+
+	event = vcpu_record->last_event;
+	time_begin = vcpu_record->start_time;
+
+	/* The begin event is not caught. */
+	if (!time_begin)
+		return true;
+
+	/*
+	 * In some case, the 'begin event' only records the start timestamp,
+	 * the actual event is recognized in the 'end event' (e.g. mmio-event).
+	 */
+
+	/* Both begin and end events did not get the key. */
+	if (!event && key->key == INVALID_KEY)
+		return true;
+
+	if (!event)
+		event = find_create_kvm_event(kvm, key);
+
+	if (!event)
+		return false;
+
+	vcpu_record->last_event = NULL;
+	vcpu_record->start_time = 0;
+
+	/* seems to happen once in a while during live mode */
+	if (sample->time < time_begin) {
+		pr_debug("End time before begin time; skipping event.\n");
+		return true;
+	}
+
+	time_diff = sample->time - time_begin;
+
+	if (kvm->duration && time_diff > kvm->duration) {
+		char decode[decode_str_len];
+
+		kvm->events_ops->decode_key(kvm, &event->key, decode);
+		if (!skip_event(decode)) {
+			pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
+				 sample->time, sample->pid, vcpu_record->vcpu_id,
+				 decode, time_diff / NSEC_PER_USEC);
+		}
+	}
+
+	return update_kvm_event(event, vcpu, time_diff);
+}
+
+static
+struct vcpu_event_record *per_vcpu_record(struct thread *thread,
+					  struct perf_evsel *evsel,
+					  struct perf_sample *sample)
+{
+	/* Only kvm_entry records vcpu id. */
+	if (!thread__priv(thread) && kvm_entry_event(evsel)) {
+		struct vcpu_event_record *vcpu_record;
+
+		vcpu_record = zalloc(sizeof(*vcpu_record));
+		if (!vcpu_record) {
+			pr_err("%s: Not enough memory\n", __func__);
+			return NULL;
+		}
+
+		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
+							  vcpu_id_str);
+		thread__set_priv(thread, vcpu_record);
+	}
+
+	return thread__priv(thread);
+}
+
+static bool handle_kvm_event(struct perf_kvm_stat *kvm,
+			     struct thread *thread,
+			     struct perf_evsel *evsel,
+			     struct perf_sample *sample)
+{
+	struct vcpu_event_record *vcpu_record;
+	struct event_key key = { .key = INVALID_KEY,
+				 .exit_reasons = kvm->exit_reasons };
+
+	vcpu_record = per_vcpu_record(thread, evsel, sample);
+	if (!vcpu_record)
+		return true;
+
+	/* only process events for vcpus user cares about */
+	if ((kvm->trace_vcpu != -1) &&
+	    (kvm->trace_vcpu != vcpu_record->vcpu_id))
+		return true;
+
+	if (kvm->events_ops->is_begin_event(evsel, sample, &key))
+		return handle_begin_event(kvm, vcpu_record, &key, sample->time);
+
+	if (is_child_event(kvm, evsel, sample, &key))
+		return handle_child_event(kvm, vcpu_record, &key, sample);
+
+	if (kvm->events_ops->is_end_event(evsel, sample, &key))
+		return handle_end_event(kvm, vcpu_record, &key, sample);
+
+	return true;
+}
+
+#define GET_EVENT_KEY(func, field)					\
+static u64 get_event_ ##func(struct kvm_event *event, int vcpu)		\
+{									\
+	if (vcpu == -1)							\
+		return event->total.field;				\
+									\
+	if (vcpu >= event->max_vcpu)					\
+		return 0;						\
+									\
+	return event->vcpu[vcpu].field;					\
+}
+
+#define COMPARE_EVENT_KEY(func, field)					\
+GET_EVENT_KEY(func, field)						\
+static int compare_kvm_event_ ## func(struct kvm_event *one,		\
+					struct kvm_event *two, int vcpu)\
+{									\
+	return get_event_ ##func(one, vcpu) >				\
+				get_event_ ##func(two, vcpu);		\
+}
+
+GET_EVENT_KEY(time, time);
+COMPARE_EVENT_KEY(count, stats.n);
+COMPARE_EVENT_KEY(mean, stats.mean);
+GET_EVENT_KEY(max, stats.max);
+GET_EVENT_KEY(min, stats.min);
+
+#define DEF_SORT_NAME_KEY(name, compare_key)				\
+	{ #name, compare_kvm_event_ ## compare_key }
+
+static struct kvm_event_key keys[] = {
+	DEF_SORT_NAME_KEY(sample, count),
+	DEF_SORT_NAME_KEY(time, mean),
+	{ NULL, NULL }
+};
+
+static bool select_key(struct perf_kvm_stat *kvm)
+{
+	int i;
+
+	for (i = 0; keys[i].name; i++) {
+		if (!strcmp(keys[i].name, kvm->sort_key)) {
+			kvm->compare = keys[i].key;
+			return true;
+		}
+	}
+
+	pr_err("Unknown compare key:%s\n", kvm->sort_key);
+	return false;
+}
+
+static void insert_to_result(struct rb_root *result, struct kvm_event *event,
+			     key_cmp_fun bigger, int vcpu)
+{
+	struct rb_node **rb = &result->rb_node;
+	struct rb_node *parent = NULL;
+	struct kvm_event *p;
+
+	while (*rb) {
+		p = container_of(*rb, struct kvm_event, rb);
+		parent = *rb;
+
+		if (bigger(event, p, vcpu))
+			rb = &(*rb)->rb_left;
+		else
+			rb = &(*rb)->rb_right;
+	}
+
+	rb_link_node(&event->rb, parent, rb);
+	rb_insert_color(&event->rb, result);
+}
+
+static void
+update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
+{
+	int vcpu = kvm->trace_vcpu;
+
+	kvm->total_count += get_event_count(event, vcpu);
+	kvm->total_time += get_event_time(event, vcpu);
+}
+
+static bool event_is_valid(struct kvm_event *event, int vcpu)
+{
+	return !!get_event_count(event, vcpu);
+}
+
+static void sort_result(struct perf_kvm_stat *kvm)
+{
+	unsigned int i;
+	int vcpu = kvm->trace_vcpu;
+	struct kvm_event *event;
+
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+		list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
+			if (event_is_valid(event, vcpu)) {
+				update_total_count(kvm, event);
+				insert_to_result(&kvm->result, event,
+						 kvm->compare, vcpu);
+			}
+		}
+	}
+}
+
+/* returns left most element of result, and erase it */
+static struct kvm_event *pop_from_result(struct rb_root *result)
+{
+	struct rb_node *node = rb_first(result);
+
+	if (!node)
+		return NULL;
+
+	rb_erase(node, result);
+	return container_of(node, struct kvm_event, rb);
+}
+
+static void print_vcpu_info(struct perf_kvm_stat *kvm)
+{
+	int vcpu = kvm->trace_vcpu;
+
+	pr_info("Analyze events for ");
+
+	if (kvm->opts.target.system_wide)
+		pr_info("all VMs, ");
+	else if (kvm->opts.target.pid)
+		pr_info("pid(s) %s, ", kvm->opts.target.pid);
+	else
+		pr_info("dazed and confused on what is monitored, ");
+
+	if (vcpu == -1)
+		pr_info("all VCPUs:\n\n");
+	else
+		pr_info("VCPU %d:\n\n", vcpu);
+}
+
+static void show_timeofday(void)
+{
+	char date[64];
+	struct timeval tv;
+	struct tm ltime;
+
+	gettimeofday(&tv, NULL);
+	if (localtime_r(&tv.tv_sec, &ltime)) {
+		strftime(date, sizeof(date), "%H:%M:%S", &ltime);
+		pr_info("%s.%06ld", date, tv.tv_usec);
+	} else
+		pr_info("00:00:00.000000");
+
+	return;
+}
+
+static void print_result(struct perf_kvm_stat *kvm)
+{
+	char decode[decode_str_len];
+	struct kvm_event *event;
+	int vcpu = kvm->trace_vcpu;
+
+	if (kvm->live) {
+		puts(CONSOLE_CLEAR);
+		show_timeofday();
+	}
+
+	pr_info("\n\n");
+	print_vcpu_info(kvm);
+	pr_info("%*s ", decode_str_len, kvm->events_ops->name);
+	pr_info("%10s ", "Samples");
+	pr_info("%9s ", "Samples%");
+
+	pr_info("%9s ", "Time%");
+	pr_info("%11s ", "Min Time");
+	pr_info("%11s ", "Max Time");
+	pr_info("%16s ", "Avg time");
+	pr_info("\n\n");
+
+	while ((event = pop_from_result(&kvm->result))) {
+		u64 ecount, etime, max, min;
+
+		ecount = get_event_count(event, vcpu);
+		etime = get_event_time(event, vcpu);
+		max = get_event_max(event, vcpu);
+		min = get_event_min(event, vcpu);
+
+		kvm->events_ops->decode_key(kvm, &event->key, decode);
+		pr_info("%*s ", decode_str_len, decode);
+		pr_info("%10llu ", (unsigned long long)ecount);
+		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
+		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
+		pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
+		pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
+		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
+			kvm_event_rel_stddev(vcpu, event));
+		pr_info("\n");
+	}
+
+	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
+		kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
+
+	if (kvm->lost_events)
+		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static int process_lost_event(struct perf_tool *tool,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
+
+	kvm->lost_events++;
+	return 0;
+}
+#endif
+
+static bool skip_sample(struct perf_kvm_stat *kvm,
+			struct perf_sample *sample)
+{
+	if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
+		return true;
+
+	return false;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	int err = 0;
+	struct thread *thread;
+	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
+						 tool);
+
+	if (skip_sample(kvm, sample))
+		return 0;
+
+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (!handle_kvm_event(kvm, thread, evsel, sample))
+		err = -1;
+
+	thread__put(thread);
+	return err;
+}
+
+static int cpu_isa_config(struct perf_kvm_stat *kvm)
+{
+	char buf[64], *cpuid;
+	int err;
+
+	if (kvm->live) {
+		err = get_cpuid(buf, sizeof(buf));
+		if (err != 0) {
+			pr_err("Failed to look up CPU type\n");
+			return err;
+		}
+		cpuid = buf;
+	} else
+		cpuid = kvm->session->header.env.cpuid;
+
+	if (!cpuid) {
+		pr_err("Failed to look up CPU type\n");
+		return -EINVAL;
+	}
+
+	err = cpu_isa_init(kvm, cpuid);
+	if (err == -ENOTSUP)
+		pr_err("CPU %s is not supported.\n", cpuid);
+
+	return err;
+}
+
+static bool verify_vcpu(int vcpu)
+{
+	if (vcpu != -1 && vcpu < 0) {
+		pr_err("Invalid vcpu:%d.\n", vcpu);
+		return false;
+	}
+
+	return true;
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+/* keeping the max events to a modest level to keep
+ * the processing of samples per mmap smooth.
+ */
+#define PERF_KVM__MAX_EVENTS_PER_MMAP  25
+
+static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
+				   u64 *mmap_time)
+{
+	struct perf_evlist *evlist = kvm->evlist;
+	union perf_event *event;
+	struct perf_mmap *md;
+	u64 timestamp;
+	s64 n = 0;
+	int err;
+
+	*mmap_time = ULLONG_MAX;
+	md = &evlist->mmap[idx];
+	err = perf_mmap__read_init(md);
+	if (err < 0)
+		return (err == -EAGAIN) ? 0 : -1;
+
+	while ((event = perf_mmap__read_event(md)) != NULL) {
+		err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+		if (err) {
+			perf_mmap__consume(md);
+			pr_err("Failed to parse sample\n");
+			return -1;
+		}
+
+		err = perf_session__queue_event(kvm->session, event, timestamp, 0);
+		/*
+		 * FIXME: Here we can't consume the event, as perf_session__queue_event will
+		 *        point to it, and it'll get possibly overwritten by the kernel.
+		 */
+		perf_mmap__consume(md);
+
+		if (err) {
+			pr_err("Failed to enqueue sample: %d\n", err);
+			return -1;
+		}
+
+		/* save time stamp of our first sample for this mmap */
+		if (n == 0)
+			*mmap_time = timestamp;
+
+		/* limit events per mmap handled all at once */
+		n++;
+		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+			break;
+	}
+
+	perf_mmap__read_done(md);
+	return n;
+}
+
+static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
+{
+	int i, err, throttled = 0;
+	s64 n, ntotal = 0;
+	u64 flush_time = ULLONG_MAX, mmap_time;
+
+	for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+		n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
+		if (n < 0)
+			return -1;
+
+		/* flush time is going to be the minimum of all the individual
+		 * mmap times. Essentially, we flush all the samples queued up
+		 * from the last pass under our minimal start time -- that leaves
+		 * a very small race for samples to come in with a lower timestamp.
+		 * The ioctl to return the perf_clock timestamp should close the
+		 * race entirely.
+		 */
+		if (mmap_time < flush_time)
+			flush_time = mmap_time;
+
+		ntotal += n;
+		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+			throttled = 1;
+	}
+
+	/* flush queue after each round in which we processed events */
+	if (ntotal) {
+		struct ordered_events *oe = &kvm->session->ordered_events;
+
+		oe->next_flush = flush_time;
+		err = ordered_events__flush(oe, OE_FLUSH__ROUND);
+		if (err) {
+			if (kvm->lost_events)
+				pr_info("\nLost events: %" PRIu64 "\n\n",
+					kvm->lost_events);
+			return err;
+		}
+	}
+
+	return throttled;
+}
+
+static volatile int done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
+{
+	struct itimerspec new_value;
+	int rc = -1;
+
+	kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+	if (kvm->timerfd < 0) {
+		pr_err("timerfd_create failed\n");
+		goto out;
+	}
+
+	new_value.it_value.tv_sec = kvm->display_time;
+	new_value.it_value.tv_nsec = 0;
+	new_value.it_interval.tv_sec = kvm->display_time;
+	new_value.it_interval.tv_nsec = 0;
+
+	if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
+		pr_err("timerfd_settime failed: %d\n", errno);
+		close(kvm->timerfd);
+		goto out;
+	}
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
+{
+	uint64_t c;
+	int rc;
+
+	rc = read(kvm->timerfd, &c, sizeof(uint64_t));
+	if (rc < 0) {
+		if (errno == EAGAIN)
+			return 0;
+
+		pr_err("Failed to read timer fd: %d\n", errno);
+		return -1;
+	}
+
+	if (rc != sizeof(uint64_t)) {
+		pr_err("Error reading timer fd - invalid size returned\n");
+		return -1;
+	}
+
+	if (c != 1)
+		pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
+
+	/* update display */
+	sort_result(kvm);
+	print_result(kvm);
+
+	/* reset counts */
+	clear_events_cache_stats(kvm->kvm_events_cache);
+	kvm->total_count = 0;
+	kvm->total_time = 0;
+	kvm->lost_events = 0;
+
+	return 0;
+}
+
+static int fd_set_nonblock(int fd)
+{
+	long arg = 0;
+
+	arg = fcntl(fd, F_GETFL);
+	if (arg < 0) {
+		pr_err("Failed to get current flags for fd %d\n", fd);
+		return -1;
+	}
+
+	if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
+		pr_err("Failed to set non-block option on fd %d\n", fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int perf_kvm__handle_stdin(void)
+{
+	int c;
+
+	c = getc(stdin);
+	if (c == 'q')
+		return 1;
+
+	return 0;
+}
+
+static int kvm_events_live_report(struct perf_kvm_stat *kvm)
+{
+	int nr_stdin, ret, err = -EINVAL;
+	struct termios save;
+
+	/* live flag must be set first */
+	kvm->live = true;
+
+	ret = cpu_isa_config(kvm);
+	if (ret < 0)
+		return ret;
+
+	if (!verify_vcpu(kvm->trace_vcpu) ||
+	    !select_key(kvm) ||
+	    !register_kvm_events_ops(kvm)) {
+		goto out;
+	}
+
+	set_term_quiet_input(&save);
+	init_kvm_event_record(kvm);
+
+	signal(SIGINT, sig_handler);
+	signal(SIGTERM, sig_handler);
+
+	/* add timer fd */
+	if (perf_kvm__timerfd_create(kvm) < 0) {
+		err = -1;
+		goto out;
+	}
+
+	if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
+		goto out;
+
+	nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
+	if (nr_stdin < 0)
+		goto out;
+
+	if (fd_set_nonblock(fileno(stdin)) != 0)
+		goto out;
+
+	/* everything is good - enable the events and process */
+	perf_evlist__enable(kvm->evlist);
+
+	while (!done) {
+		struct fdarray *fda = &kvm->evlist->pollfd;
+		int rc;
+
+		rc = perf_kvm__mmap_read(kvm);
+		if (rc < 0)
+			break;
+
+		err = perf_kvm__handle_timerfd(kvm);
+		if (err)
+			goto out;
+
+		if (fda->entries[nr_stdin].revents & POLLIN)
+			done = perf_kvm__handle_stdin();
+
+		if (!rc && !done)
+			err = fdarray__poll(fda, 100);
+	}
+
+	perf_evlist__disable(kvm->evlist);
+
+	if (err == 0) {
+		sort_result(kvm);
+		print_result(kvm);
+	}
+
+out:
+	if (kvm->timerfd >= 0)
+		close(kvm->timerfd);
+
+	tcsetattr(0, TCSAFLUSH, &save);
+	return err;
+}
+
+static int kvm_live_open_events(struct perf_kvm_stat *kvm)
+{
+	int err, rc = -1;
+	struct perf_evsel *pos;
+	struct perf_evlist *evlist = kvm->evlist;
+	char sbuf[STRERR_BUFSIZE];
+
+	perf_evlist__config(evlist, &kvm->opts, NULL);
+
+	/*
+	 * Note: exclude_{guest,host} do not apply here.
+	 *       This command processes KVM tracepoints from host only
+	 */
+	evlist__for_each_entry(evlist, pos) {
+		struct perf_event_attr *attr = &pos->attr;
+
+		/* make sure these *are* set */
+		perf_evsel__set_sample_bit(pos, TID);
+		perf_evsel__set_sample_bit(pos, TIME);
+		perf_evsel__set_sample_bit(pos, CPU);
+		perf_evsel__set_sample_bit(pos, RAW);
+		/* make sure these are *not*; want as small a sample as possible */
+		perf_evsel__reset_sample_bit(pos, PERIOD);
+		perf_evsel__reset_sample_bit(pos, IP);
+		perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+		perf_evsel__reset_sample_bit(pos, ADDR);
+		perf_evsel__reset_sample_bit(pos, READ);
+		attr->mmap = 0;
+		attr->comm = 0;
+		attr->task = 0;
+
+		attr->sample_period = 1;
+
+		attr->watermark = 0;
+		attr->wakeup_events = 1000;
+
+		/* will enable all once we are ready */
+		attr->disabled = 1;
+	}
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		printf("Couldn't create the events: %s\n",
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out;
+	}
+
+	if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
+		ui__error("Failed to mmap the events: %s\n",
+			  str_error_r(errno, sbuf, sizeof(sbuf)));
+		perf_evlist__close(evlist);
+		goto out;
+	}
+
+	rc = 0;
+
+out:
+	return rc;
+}
+#endif
+
+static int read_events(struct perf_kvm_stat *kvm)
+{
+	int ret;
+
+	struct perf_tool eops = {
+		.sample			= process_sample_event,
+		.comm			= perf_event__process_comm,
+		.namespaces		= perf_event__process_namespaces,
+		.ordered_events		= true,
+	};
+	struct perf_data file = {
+		.file      = {
+			.path = kvm->file_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = kvm->force,
+	};
+
+	kvm->tool = eops;
+	kvm->session = perf_session__new(&file, false, &kvm->tool);
+	if (!kvm->session) {
+		pr_err("Initializing perf session failed\n");
+		return -1;
+	}
+
+	symbol__init(&kvm->session->header.env);
+
+	if (!perf_session__has_traces(kvm->session, "kvm record")) {
+		ret = -EINVAL;
+		goto out_delete;
+	}
+
+	/*
+	 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
+	 * traced in the old kernel.
+	 */
+	ret = cpu_isa_config(kvm);
+	if (ret < 0)
+		goto out_delete;
+
+	ret = perf_session__process_events(kvm->session);
+
+out_delete:
+	perf_session__delete(kvm->session);
+	return ret;
+}
+
+static int parse_target_str(struct perf_kvm_stat *kvm)
+{
+	if (kvm->opts.target.pid) {
+		kvm->pid_list = intlist__new(kvm->opts.target.pid);
+		if (kvm->pid_list == NULL) {
+			pr_err("Error parsing process id string\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
+{
+	int ret = -EINVAL;
+	int vcpu = kvm->trace_vcpu;
+
+	if (parse_target_str(kvm) != 0)
+		goto exit;
+
+	if (!verify_vcpu(vcpu))
+		goto exit;
+
+	if (!select_key(kvm))
+		goto exit;
+
+	if (!register_kvm_events_ops(kvm))
+		goto exit;
+
+	init_kvm_event_record(kvm);
+	setup_pager();
+
+	ret = read_events(kvm);
+	if (ret)
+		goto exit;
+
+	sort_result(kvm);
+	print_result(kvm);
+
+exit:
+	return ret;
+}
+
+#define STRDUP_FAIL_EXIT(s)		\
+	({	char *_p;		\
+	_p = strdup(s);		\
+		if (!_p)		\
+			return -ENOMEM;	\
+		_p;			\
+	})
+
+int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
+{
+	return 0;
+}
+
+static int
+kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j, events_tp_size;
+	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-R",
+		"-m", "1024",
+		"-c", "1",
+	};
+	const char * const kvm_stat_record_usage[] = {
+		"perf kvm stat record [<options>]",
+		NULL
+	};
+	const char * const *events_tp;
+	int ret;
+
+	events_tp_size = 0;
+	ret = setup_kvm_events_tp(kvm);
+	if (ret < 0) {
+		pr_err("Unable to setup the kvm tracepoints\n");
+		return ret;
+	}
+
+	for (events_tp = kvm_events_tp; *events_tp; events_tp++)
+		events_tp_size++;
+
+	rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
+		   2 * events_tp_size;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
+
+	for (j = 0; j < events_tp_size; j++) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
+	}
+
+	rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
+	rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
+	set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
+	set_option_flag(record_options, 'R', "raw-samples", PARSE_OPT_HIDDEN);
+
+	set_option_flag(record_options, 'F', "freq", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 0, "group", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'g', NULL, PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 0, "call-graph", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'd', "data", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'T', "timestamp", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'P', "period", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'n', "no-samples", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'N', "no-buildid-cache", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'B', "no-buildid", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'G', "cgroup", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'b', "branch-any", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'j', "branch-filter", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 'W', "weight", PARSE_OPT_DISABLED);
+	set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
+
+	record_usage = kvm_stat_record_usage;
+	return cmd_record(i, rec_argv);
+}
+
+static int
+kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
+{
+	const struct option kvm_events_report_options[] = {
+		OPT_STRING(0, "event", &kvm->report_event, "report event",
+			   "event for reporting: vmexit, "
+			   "mmio (x86 only), ioport (x86 only)"),
+		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+			    "vcpu id to report"),
+		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+			    "key for sorting: sample(sort by samples number)"
+			    " time (sort by avg time)"),
+		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+			   "analyze events only for given process id(s)"),
+		OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"),
+		OPT_END()
+	};
+
+	const char * const kvm_events_report_usage[] = {
+		"perf kvm stat report [<options>]",
+		NULL
+	};
+
+	if (argc) {
+		argc = parse_options(argc, argv,
+				     kvm_events_report_options,
+				     kvm_events_report_usage, 0);
+		if (argc)
+			usage_with_options(kvm_events_report_usage,
+					   kvm_events_report_options);
+	}
+
+	if (!kvm->opts.target.pid)
+		kvm->opts.target.system_wide = true;
+
+	return kvm_events_report_vcpu(kvm);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static struct perf_evlist *kvm_live_event_list(void)
+{
+	struct perf_evlist *evlist;
+	char *tp, *name, *sys;
+	int err = -1;
+	const char * const *events_tp;
+
+	evlist = perf_evlist__new();
+	if (evlist == NULL)
+		return NULL;
+
+	for (events_tp = kvm_events_tp; *events_tp; events_tp++) {
+
+		tp = strdup(*events_tp);
+		if (tp == NULL)
+			goto out;
+
+		/* split tracepoint into subsystem and name */
+		sys = tp;
+		name = strchr(tp, ':');
+		if (name == NULL) {
+			pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
+			       *events_tp);
+			free(tp);
+			goto out;
+		}
+		*name = '\0';
+		name++;
+
+		if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+			pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
+			free(tp);
+			goto out;
+		}
+
+		free(tp);
+	}
+
+	err = 0;
+
+out:
+	if (err) {
+		perf_evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+static int kvm_events_live(struct perf_kvm_stat *kvm,
+			   int argc, const char **argv)
+{
+	char errbuf[BUFSIZ];
+	int err;
+
+	const struct option live_options[] = {
+		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+			"record events on existing process id"),
+		OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
+			"number of mmap data pages",
+			perf_evlist__parse_mmap_pages),
+		OPT_INCR('v', "verbose", &verbose,
+			"be more verbose (show counter open errors, etc)"),
+		OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
+			"system-wide collection from all CPUs"),
+		OPT_UINTEGER('d', "display", &kvm->display_time,
+			"time in seconds between display updates"),
+		OPT_STRING(0, "event", &kvm->report_event, "report event",
+			"event for reporting: "
+			"vmexit, mmio (x86 only), ioport (x86 only)"),
+		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+			"vcpu id to report"),
+		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+			"key for sorting: sample(sort by samples number)"
+			" time (sort by avg time)"),
+		OPT_U64(0, "duration", &kvm->duration,
+			"show events other than"
+			" HLT (x86 only) or Wait state (s390 only)"
+			" that take longer than duration usecs"),
+		OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
+				"per thread proc mmap processing timeout in ms"),
+		OPT_END()
+	};
+	const char * const live_usage[] = {
+		"perf kvm stat live [<options>]",
+		NULL
+	};
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_WRITE,
+	};
+
+
+	/* event handling */
+	kvm->tool.sample = process_sample_event;
+	kvm->tool.comm   = perf_event__process_comm;
+	kvm->tool.exit   = perf_event__process_exit;
+	kvm->tool.fork   = perf_event__process_fork;
+	kvm->tool.lost   = process_lost_event;
+	kvm->tool.namespaces  = perf_event__process_namespaces;
+	kvm->tool.ordered_events = true;
+	perf_tool__fill_defaults(&kvm->tool);
+
+	/* set defaults */
+	kvm->display_time = 1;
+	kvm->opts.user_interval = 1;
+	kvm->opts.mmap_pages = 512;
+	kvm->opts.target.uses_mmap = false;
+	kvm->opts.target.uid_str = NULL;
+	kvm->opts.target.uid = UINT_MAX;
+	kvm->opts.proc_map_timeout = 500;
+
+	symbol__init(NULL);
+	disable_buildid_cache();
+
+	use_browser = 0;
+
+	if (argc) {
+		argc = parse_options(argc, argv, live_options,
+				     live_usage, 0);
+		if (argc)
+			usage_with_options(live_usage, live_options);
+	}
+
+	kvm->duration *= NSEC_PER_USEC;   /* convert usec to nsec */
+
+	/*
+	 * target related setups
+	 */
+	err = target__validate(&kvm->opts.target);
+	if (err) {
+		target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
+	}
+
+	if (target__none(&kvm->opts.target))
+		kvm->opts.target.system_wide = true;
+
+
+	/*
+	 * generate the event list
+	 */
+	err = setup_kvm_events_tp(kvm);
+	if (err < 0) {
+		pr_err("Unable to setup the kvm tracepoints\n");
+		return err;
+	}
+
+	kvm->evlist = kvm_live_event_list();
+	if (kvm->evlist == NULL) {
+		err = -1;
+		goto out;
+	}
+
+	symbol_conf.nr_events = kvm->evlist->nr_entries;
+
+	if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+		usage_with_options(live_usage, live_options);
+
+	/*
+	 * perf session
+	 */
+	kvm->session = perf_session__new(&data, false, &kvm->tool);
+	if (kvm->session == NULL) {
+		err = -1;
+		goto out;
+	}
+	kvm->session->evlist = kvm->evlist;
+	perf_session__set_id_hdr_size(kvm->session);
+	ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
+	machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
+				    kvm->evlist->threads, false,
+				    kvm->opts.proc_map_timeout, 1);
+	err = kvm_live_open_events(kvm);
+	if (err)
+		goto out;
+
+	err = kvm_events_live_report(kvm);
+
+out:
+	perf_session__delete(kvm->session);
+	kvm->session = NULL;
+	perf_evlist__delete(kvm->evlist);
+
+	return err;
+}
+#endif
+
+static void print_kvm_stat_usage(void)
+{
+	printf("Usage: perf kvm stat <command>\n\n");
+
+	printf("# Available commands:\n");
+	printf("\trecord: record kvm events\n");
+	printf("\treport: report statistical data of kvm events\n");
+	printf("\tlive:   live reporting of statistical data of kvm events\n");
+
+	printf("\nOtherwise, it is the alias of 'perf stat':\n");
+}
+
+static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
+{
+	struct perf_kvm_stat kvm = {
+		.file_name = file_name,
+
+		.trace_vcpu	= -1,
+		.report_event	= "vmexit",
+		.sort_key	= "sample",
+
+	};
+
+	if (argc == 1) {
+		print_kvm_stat_usage();
+		goto perf_stat;
+	}
+
+	if (!strncmp(argv[1], "rec", 3))
+		return kvm_events_record(&kvm, argc - 1, argv + 1);
+
+	if (!strncmp(argv[1], "rep", 3))
+		return kvm_events_report(&kvm, argc - 1 , argv + 1);
+
+#ifdef HAVE_TIMERFD_SUPPORT
+	if (!strncmp(argv[1], "live", 4))
+		return kvm_events_live(&kvm, argc - 1 , argv + 1);
+#endif
+
+perf_stat:
+	return cmd_stat(argc, argv);
+}
+#endif /* HAVE_KVM_STAT_SUPPORT */
+
+static int __cmd_record(const char *file_name, int argc, const char **argv)
+{
+	int rec_argc, i = 0, j;
+	const char **rec_argv;
+
+	rec_argc = argc + 2;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	rec_argv[i++] = strdup("record");
+	rec_argv[i++] = strdup("-o");
+	rec_argv[i++] = strdup(file_name);
+	for (j = 1; j < argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_record(i, rec_argv);
+}
+
+static int __cmd_report(const char *file_name, int argc, const char **argv)
+{
+	int rec_argc, i = 0, j;
+	const char **rec_argv;
+
+	rec_argc = argc + 2;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	rec_argv[i++] = strdup("report");
+	rec_argv[i++] = strdup("-i");
+	rec_argv[i++] = strdup(file_name);
+	for (j = 1; j < argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_report(i, rec_argv);
+}
+
+static int
+__cmd_buildid_list(const char *file_name, int argc, const char **argv)
+{
+	int rec_argc, i = 0, j;
+	const char **rec_argv;
+
+	rec_argc = argc + 2;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	rec_argv[i++] = strdup("buildid-list");
+	rec_argv[i++] = strdup("-i");
+	rec_argv[i++] = strdup(file_name);
+	for (j = 1; j < argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_buildid_list(i, rec_argv);
+}
+
+int cmd_kvm(int argc, const char **argv)
+{
+	const char *file_name = NULL;
+	const struct option kvm_options[] = {
+		OPT_STRING('i', "input", &file_name, "file",
+			   "Input file name"),
+		OPT_STRING('o', "output", &file_name, "file",
+			   "Output file name"),
+		OPT_BOOLEAN(0, "guest", &perf_guest,
+			    "Collect guest os data"),
+		OPT_BOOLEAN(0, "host", &perf_host,
+			    "Collect host os data"),
+		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
+			   "guest mount directory under which every guest os"
+			   " instance has a subdir"),
+		OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
+			   "file", "file saving guest os vmlinux"),
+		OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
+			   "file", "file saving guest os /proc/kallsyms"),
+		OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
+			   "file", "file saving guest os /proc/modules"),
+		OPT_INCR('v', "verbose", &verbose,
+			    "be more verbose (show counter open errors, etc)"),
+		OPT_END()
+	};
+
+	const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
+						"buildid-list", "stat", NULL };
+	const char *kvm_usage[] = { NULL, NULL };
+
+	perf_host  = 0;
+	perf_guest = 1;
+
+	argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
+					PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		usage_with_options(kvm_usage, kvm_options);
+
+	if (!perf_host)
+		perf_guest = 1;
+
+	if (!file_name) {
+		file_name = get_filename_for_perf_kvm();
+
+		if (!file_name) {
+			pr_err("Failed to allocate memory for filename\n");
+			return -ENOMEM;
+		}
+	}
+
+	if (!strncmp(argv[0], "rec", 3))
+		return __cmd_record(file_name, argc, argv);
+	else if (!strncmp(argv[0], "rep", 3))
+		return __cmd_report(file_name, argc, argv);
+	else if (!strncmp(argv[0], "diff", 4))
+		return cmd_diff(argc, argv);
+	else if (!strncmp(argv[0], "top", 3))
+		return cmd_top(argc, argv);
+	else if (!strncmp(argv[0], "buildid-list", 12))
+		return __cmd_buildid_list(file_name, argc, argv);
+#ifdef HAVE_KVM_STAT_SUPPORT
+	else if (!strncmp(argv[0], "stat", 4))
+		return kvm_cmd_stat(file_name, argc, argv);
+#endif
+	else
+		usage_with_options(kvm_usage, kvm_options);
+
+	return 0;
+}
diff --git a/builtin-list.c b/builtin-list.c
new file mode 100644
index 0000000..ead221e
--- /dev/null
+++ b/builtin-list.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-list.c
+ *
+ * Builtin list command: list all event types
+ *
+ * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/parse-events.h"
+#include "util/cache.h"
+#include "util/pmu.h"
+#include "util/debug.h"
+#include "util/metricgroup.h"
+#include <subcmd/parse-options.h>
+
+static bool desc_flag = true;
+static bool details_flag;
+
+int cmd_list(int argc, const char **argv)
+{
+	int i;
+	bool raw_dump = false;
+	bool long_desc_flag = false;
+	struct option list_options[] = {
+		OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
+		OPT_BOOLEAN('d', "desc", &desc_flag,
+			    "Print extra event descriptions. --no-desc to not print."),
+		OPT_BOOLEAN('v', "long-desc", &long_desc_flag,
+			    "Print longer event descriptions."),
+		OPT_BOOLEAN(0, "details", &details_flag,
+			    "Print information on the perf event names and expressions used internally by events."),
+		OPT_INCR(0, "debug", &verbose,
+			     "Enable debugging output"),
+		OPT_END()
+	};
+	const char * const list_usage[] = {
+		"perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+		NULL
+	};
+
+	set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN);
+
+	argc = parse_options(argc, argv, list_options, list_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	setup_pager();
+
+	if (!raw_dump && pager_in_use())
+		printf("\nList of pre-defined events (to be used in -e):\n\n");
+
+	if (argc == 0) {
+		print_events(NULL, raw_dump, !desc_flag, long_desc_flag,
+				details_flag);
+		return 0;
+	}
+
+	for (i = 0; i < argc; ++i) {
+		char *sep, *s;
+
+		if (strcmp(argv[i], "tracepoint") == 0)
+			print_tracepoint_events(NULL, NULL, raw_dump);
+		else if (strcmp(argv[i], "hw") == 0 ||
+			 strcmp(argv[i], "hardware") == 0)
+			print_symbol_events(NULL, PERF_TYPE_HARDWARE,
+					event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
+		else if (strcmp(argv[i], "sw") == 0 ||
+			 strcmp(argv[i], "software") == 0)
+			print_symbol_events(NULL, PERF_TYPE_SOFTWARE,
+					event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
+		else if (strcmp(argv[i], "cache") == 0 ||
+			 strcmp(argv[i], "hwcache") == 0)
+			print_hwcache_events(NULL, raw_dump);
+		else if (strcmp(argv[i], "pmu") == 0)
+			print_pmu_events(NULL, raw_dump, !desc_flag,
+						long_desc_flag, details_flag);
+		else if (strcmp(argv[i], "sdt") == 0)
+			print_sdt_events(NULL, NULL, raw_dump);
+		else if (strcmp(argv[i], "metric") == 0)
+			metricgroup__print(true, false, NULL, raw_dump);
+		else if (strcmp(argv[i], "metricgroup") == 0)
+			metricgroup__print(false, true, NULL, raw_dump);
+		else if ((sep = strchr(argv[i], ':')) != NULL) {
+			int sep_idx;
+
+			if (sep == NULL) {
+				print_events(argv[i], raw_dump, !desc_flag,
+							long_desc_flag,
+							details_flag);
+				continue;
+			}
+			sep_idx = sep - argv[i];
+			s = strdup(argv[i]);
+			if (s == NULL)
+				return -1;
+
+			s[sep_idx] = '\0';
+			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
+			print_sdt_events(s, s + sep_idx + 1, raw_dump);
+			metricgroup__print(true, true, s, raw_dump);
+			free(s);
+		} else {
+			if (asprintf(&s, "*%s*", argv[i]) < 0) {
+				printf("Critical: Not enough memory! Trying to continue...\n");
+				continue;
+			}
+			print_symbol_events(s, PERF_TYPE_HARDWARE,
+					    event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
+			print_symbol_events(s, PERF_TYPE_SOFTWARE,
+					    event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
+			print_hwcache_events(s, raw_dump);
+			print_pmu_events(s, raw_dump, !desc_flag,
+						long_desc_flag,
+						details_flag);
+			print_tracepoint_events(NULL, s, raw_dump);
+			print_sdt_events(NULL, s, raw_dump);
+			metricgroup__print(true, true, NULL, raw_dump);
+			free(s);
+		}
+	}
+	return 0;
+}
diff --git a/builtin-lock.c b/builtin-lock.c
new file mode 100644
index 0000000..6e0189d
--- /dev/null
+++ b/builtin-lock.c
@@ -0,0 +1,1034 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/data.h"
+
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
+#include <limits.h>
+
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+
+static struct perf_session *session;
+
+/* based on kernel/lockdep.c */
+#define LOCKHASH_BITS		12
+#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
+
+static struct list_head lockhash_table[LOCKHASH_SIZE];
+
+#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
+#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
+
+struct lock_stat {
+	struct list_head	hash_entry;
+	struct rb_node		rb;		/* used for sorting */
+
+	/*
+	 * FIXME: perf_evsel__intval() returns u64,
+	 * so address of lockdep_map should be dealed as 64bit.
+	 * Is there more better solution?
+	 */
+	void			*addr;		/* address of lockdep_map, used as ID */
+	char			*name;		/* for strcpy(), we cannot use const */
+
+	unsigned int		nr_acquire;
+	unsigned int		nr_acquired;
+	unsigned int		nr_contended;
+	unsigned int		nr_release;
+
+	unsigned int		nr_readlock;
+	unsigned int		nr_trylock;
+
+	/* these times are in nano sec. */
+	u64                     avg_wait_time;
+	u64			wait_time_total;
+	u64			wait_time_min;
+	u64			wait_time_max;
+
+	int			discard; /* flag of blacklist */
+};
+
+/*
+ * States of lock_seq_stat
+ *
+ * UNINITIALIZED is required for detecting first event of acquire.
+ * As the nature of lock events, there is no guarantee
+ * that the first event for the locks are acquire,
+ * it can be acquired, contended or release.
+ */
+#define SEQ_STATE_UNINITIALIZED      0	       /* initial state */
+#define SEQ_STATE_RELEASED	1
+#define SEQ_STATE_ACQUIRING	2
+#define SEQ_STATE_ACQUIRED	3
+#define SEQ_STATE_READ_ACQUIRED	4
+#define SEQ_STATE_CONTENDED	5
+
+/*
+ * MAX_LOCK_DEPTH
+ * Imported from include/linux/sched.h.
+ * Should this be synchronized?
+ */
+#define MAX_LOCK_DEPTH 48
+
+/*
+ * struct lock_seq_stat:
+ * Place to put on state of one lock sequence
+ * 1) acquire -> acquired -> release
+ * 2) acquire -> contended -> acquired -> release
+ * 3) acquire (with read or try) -> release
+ * 4) Are there other patterns?
+ */
+struct lock_seq_stat {
+	struct list_head        list;
+	int			state;
+	u64			prev_event_time;
+	void                    *addr;
+
+	int                     read_count;
+};
+
+struct thread_stat {
+	struct rb_node		rb;
+
+	u32                     tid;
+	struct list_head        seq_list;
+};
+
+static struct rb_root		thread_stats;
+
+static struct thread_stat *thread_stat_find(u32 tid)
+{
+	struct rb_node *node;
+	struct thread_stat *st;
+
+	node = thread_stats.rb_node;
+	while (node) {
+		st = container_of(node, struct thread_stat, rb);
+		if (st->tid == tid)
+			return st;
+		else if (tid < st->tid)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+
+	return NULL;
+}
+
+static void thread_stat_insert(struct thread_stat *new)
+{
+	struct rb_node **rb = &thread_stats.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread_stat *p;
+
+	while (*rb) {
+		p = container_of(*rb, struct thread_stat, rb);
+		parent = *rb;
+
+		if (new->tid < p->tid)
+			rb = &(*rb)->rb_left;
+		else if (new->tid > p->tid)
+			rb = &(*rb)->rb_right;
+		else
+			BUG_ON("inserting invalid thread_stat\n");
+	}
+
+	rb_link_node(&new->rb, parent, rb);
+	rb_insert_color(&new->rb, &thread_stats);
+}
+
+static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
+{
+	struct thread_stat *st;
+
+	st = thread_stat_find(tid);
+	if (st)
+		return st;
+
+	st = zalloc(sizeof(struct thread_stat));
+	if (!st) {
+		pr_err("memory allocation failed\n");
+		return NULL;
+	}
+
+	st->tid = tid;
+	INIT_LIST_HEAD(&st->seq_list);
+
+	thread_stat_insert(st);
+
+	return st;
+}
+
+static struct thread_stat *thread_stat_findnew_first(u32 tid);
+static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
+	thread_stat_findnew_first;
+
+static struct thread_stat *thread_stat_findnew_first(u32 tid)
+{
+	struct thread_stat *st;
+
+	st = zalloc(sizeof(struct thread_stat));
+	if (!st) {
+		pr_err("memory allocation failed\n");
+		return NULL;
+	}
+	st->tid = tid;
+	INIT_LIST_HEAD(&st->seq_list);
+
+	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
+	rb_insert_color(&st->rb, &thread_stats);
+
+	thread_stat_findnew = thread_stat_findnew_after_first;
+	return st;
+}
+
+/* build simple key function one is bigger than two */
+#define SINGLE_KEY(member)						\
+	static int lock_stat_key_ ## member(struct lock_stat *one,	\
+					 struct lock_stat *two)		\
+	{								\
+		return one->member > two->member;			\
+	}
+
+SINGLE_KEY(nr_acquired)
+SINGLE_KEY(nr_contended)
+SINGLE_KEY(avg_wait_time)
+SINGLE_KEY(wait_time_total)
+SINGLE_KEY(wait_time_max)
+
+static int lock_stat_key_wait_time_min(struct lock_stat *one,
+					struct lock_stat *two)
+{
+	u64 s1 = one->wait_time_min;
+	u64 s2 = two->wait_time_min;
+	if (s1 == ULLONG_MAX)
+		s1 = 0;
+	if (s2 == ULLONG_MAX)
+		s2 = 0;
+	return s1 > s2;
+}
+
+struct lock_key {
+	/*
+	 * name: the value for specify by user
+	 * this should be simpler than raw name of member
+	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
+	 */
+	const char		*name;
+	int			(*key)(struct lock_stat*, struct lock_stat*);
+};
+
+static const char		*sort_key = "acquired";
+
+static int			(*compare)(struct lock_stat *, struct lock_stat *);
+
+static struct rb_root		result;	/* place to store sorted data */
+
+#define DEF_KEY_LOCK(name, fn_suffix)	\
+	{ #name, lock_stat_key_ ## fn_suffix }
+struct lock_key keys[] = {
+	DEF_KEY_LOCK(acquired, nr_acquired),
+	DEF_KEY_LOCK(contended, nr_contended),
+	DEF_KEY_LOCK(avg_wait, avg_wait_time),
+	DEF_KEY_LOCK(wait_total, wait_time_total),
+	DEF_KEY_LOCK(wait_min, wait_time_min),
+	DEF_KEY_LOCK(wait_max, wait_time_max),
+
+	/* extra comparisons much complicated should be here */
+
+	{ NULL, NULL }
+};
+
+static int select_key(void)
+{
+	int i;
+
+	for (i = 0; keys[i].name; i++) {
+		if (!strcmp(keys[i].name, sort_key)) {
+			compare = keys[i].key;
+			return 0;
+		}
+	}
+
+	pr_err("Unknown compare key: %s\n", sort_key);
+
+	return -1;
+}
+
+static void insert_to_result(struct lock_stat *st,
+			     int (*bigger)(struct lock_stat *, struct lock_stat *))
+{
+	struct rb_node **rb = &result.rb_node;
+	struct rb_node *parent = NULL;
+	struct lock_stat *p;
+
+	while (*rb) {
+		p = container_of(*rb, struct lock_stat, rb);
+		parent = *rb;
+
+		if (bigger(st, p))
+			rb = &(*rb)->rb_left;
+		else
+			rb = &(*rb)->rb_right;
+	}
+
+	rb_link_node(&st->rb, parent, rb);
+	rb_insert_color(&st->rb, &result);
+}
+
+/* returns left most element of result, and erase it */
+static struct lock_stat *pop_from_result(void)
+{
+	struct rb_node *node = result.rb_node;
+
+	if (!node)
+		return NULL;
+
+	while (node->rb_left)
+		node = node->rb_left;
+
+	rb_erase(node, &result);
+	return container_of(node, struct lock_stat, rb);
+}
+
+static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
+{
+	struct list_head *entry = lockhashentry(addr);
+	struct lock_stat *ret, *new;
+
+	list_for_each_entry(ret, entry, hash_entry) {
+		if (ret->addr == addr)
+			return ret;
+	}
+
+	new = zalloc(sizeof(struct lock_stat));
+	if (!new)
+		goto alloc_failed;
+
+	new->addr = addr;
+	new->name = zalloc(sizeof(char) * strlen(name) + 1);
+	if (!new->name) {
+		free(new);
+		goto alloc_failed;
+	}
+
+	strcpy(new->name, name);
+	new->wait_time_min = ULLONG_MAX;
+
+	list_add(&new->hash_entry, entry);
+	return new;
+
+alloc_failed:
+	pr_err("memory allocation failed\n");
+	return NULL;
+}
+
+struct trace_lock_handler {
+	int (*acquire_event)(struct perf_evsel *evsel,
+			     struct perf_sample *sample);
+
+	int (*acquired_event)(struct perf_evsel *evsel,
+			      struct perf_sample *sample);
+
+	int (*contended_event)(struct perf_evsel *evsel,
+			       struct perf_sample *sample);
+
+	int (*release_event)(struct perf_evsel *evsel,
+			     struct perf_sample *sample);
+};
+
+static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr)
+{
+	struct lock_seq_stat *seq;
+
+	list_for_each_entry(seq, &ts->seq_list, list) {
+		if (seq->addr == addr)
+			return seq;
+	}
+
+	seq = zalloc(sizeof(struct lock_seq_stat));
+	if (!seq) {
+		pr_err("memory allocation failed\n");
+		return NULL;
+	}
+	seq->state = SEQ_STATE_UNINITIALIZED;
+	seq->addr = addr;
+
+	list_add(&seq->list, &ts->seq_list);
+	return seq;
+}
+
+enum broken_state {
+	BROKEN_ACQUIRE,
+	BROKEN_ACQUIRED,
+	BROKEN_CONTENDED,
+	BROKEN_RELEASE,
+	BROKEN_MAX,
+};
+
+static int bad_hist[BROKEN_MAX];
+
+enum acquire_flags {
+	TRY_LOCK = 1,
+	READ_LOCK = 2,
+};
+
+static int report_lock_acquire_event(struct perf_evsel *evsel,
+				     struct perf_sample *sample)
+{
+	void *addr;
+	struct lock_stat *ls;
+	struct thread_stat *ts;
+	struct lock_seq_stat *seq;
+	const char *name = perf_evsel__strval(evsel, sample, "name");
+	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+	int flag = perf_evsel__intval(evsel, sample, "flag");
+
+	memcpy(&addr, &tmp, sizeof(void *));
+
+	ls = lock_stat_findnew(addr, name);
+	if (!ls)
+		return -ENOMEM;
+	if (ls->discard)
+		return 0;
+
+	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -ENOMEM;
+
+	seq = get_seq(ts, addr);
+	if (!seq)
+		return -ENOMEM;
+
+	switch (seq->state) {
+	case SEQ_STATE_UNINITIALIZED:
+	case SEQ_STATE_RELEASED:
+		if (!flag) {
+			seq->state = SEQ_STATE_ACQUIRING;
+		} else {
+			if (flag & TRY_LOCK)
+				ls->nr_trylock++;
+			if (flag & READ_LOCK)
+				ls->nr_readlock++;
+			seq->state = SEQ_STATE_READ_ACQUIRED;
+			seq->read_count = 1;
+			ls->nr_acquired++;
+		}
+		break;
+	case SEQ_STATE_READ_ACQUIRED:
+		if (flag & READ_LOCK) {
+			seq->read_count++;
+			ls->nr_acquired++;
+			goto end;
+		} else {
+			goto broken;
+		}
+		break;
+	case SEQ_STATE_ACQUIRED:
+	case SEQ_STATE_ACQUIRING:
+	case SEQ_STATE_CONTENDED:
+broken:
+		/* broken lock sequence, discard it */
+		ls->discard = 1;
+		bad_hist[BROKEN_ACQUIRE]++;
+		list_del(&seq->list);
+		free(seq);
+		goto end;
+	default:
+		BUG_ON("Unknown state of lock sequence found!\n");
+		break;
+	}
+
+	ls->nr_acquire++;
+	seq->prev_event_time = sample->time;
+end:
+	return 0;
+}
+
+static int report_lock_acquired_event(struct perf_evsel *evsel,
+				      struct perf_sample *sample)
+{
+	void *addr;
+	struct lock_stat *ls;
+	struct thread_stat *ts;
+	struct lock_seq_stat *seq;
+	u64 contended_term;
+	const char *name = perf_evsel__strval(evsel, sample, "name");
+	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+
+	memcpy(&addr, &tmp, sizeof(void *));
+
+	ls = lock_stat_findnew(addr, name);
+	if (!ls)
+		return -ENOMEM;
+	if (ls->discard)
+		return 0;
+
+	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -ENOMEM;
+
+	seq = get_seq(ts, addr);
+	if (!seq)
+		return -ENOMEM;
+
+	switch (seq->state) {
+	case SEQ_STATE_UNINITIALIZED:
+		/* orphan event, do nothing */
+		return 0;
+	case SEQ_STATE_ACQUIRING:
+		break;
+	case SEQ_STATE_CONTENDED:
+		contended_term = sample->time - seq->prev_event_time;
+		ls->wait_time_total += contended_term;
+		if (contended_term < ls->wait_time_min)
+			ls->wait_time_min = contended_term;
+		if (ls->wait_time_max < contended_term)
+			ls->wait_time_max = contended_term;
+		break;
+	case SEQ_STATE_RELEASED:
+	case SEQ_STATE_ACQUIRED:
+	case SEQ_STATE_READ_ACQUIRED:
+		/* broken lock sequence, discard it */
+		ls->discard = 1;
+		bad_hist[BROKEN_ACQUIRED]++;
+		list_del(&seq->list);
+		free(seq);
+		goto end;
+	default:
+		BUG_ON("Unknown state of lock sequence found!\n");
+		break;
+	}
+
+	seq->state = SEQ_STATE_ACQUIRED;
+	ls->nr_acquired++;
+	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
+	seq->prev_event_time = sample->time;
+end:
+	return 0;
+}
+
+static int report_lock_contended_event(struct perf_evsel *evsel,
+				       struct perf_sample *sample)
+{
+	void *addr;
+	struct lock_stat *ls;
+	struct thread_stat *ts;
+	struct lock_seq_stat *seq;
+	const char *name = perf_evsel__strval(evsel, sample, "name");
+	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+
+	memcpy(&addr, &tmp, sizeof(void *));
+
+	ls = lock_stat_findnew(addr, name);
+	if (!ls)
+		return -ENOMEM;
+	if (ls->discard)
+		return 0;
+
+	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -ENOMEM;
+
+	seq = get_seq(ts, addr);
+	if (!seq)
+		return -ENOMEM;
+
+	switch (seq->state) {
+	case SEQ_STATE_UNINITIALIZED:
+		/* orphan event, do nothing */
+		return 0;
+	case SEQ_STATE_ACQUIRING:
+		break;
+	case SEQ_STATE_RELEASED:
+	case SEQ_STATE_ACQUIRED:
+	case SEQ_STATE_READ_ACQUIRED:
+	case SEQ_STATE_CONTENDED:
+		/* broken lock sequence, discard it */
+		ls->discard = 1;
+		bad_hist[BROKEN_CONTENDED]++;
+		list_del(&seq->list);
+		free(seq);
+		goto end;
+	default:
+		BUG_ON("Unknown state of lock sequence found!\n");
+		break;
+	}
+
+	seq->state = SEQ_STATE_CONTENDED;
+	ls->nr_contended++;
+	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
+	seq->prev_event_time = sample->time;
+end:
+	return 0;
+}
+
+static int report_lock_release_event(struct perf_evsel *evsel,
+				     struct perf_sample *sample)
+{
+	void *addr;
+	struct lock_stat *ls;
+	struct thread_stat *ts;
+	struct lock_seq_stat *seq;
+	const char *name = perf_evsel__strval(evsel, sample, "name");
+	u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+
+	memcpy(&addr, &tmp, sizeof(void *));
+
+	ls = lock_stat_findnew(addr, name);
+	if (!ls)
+		return -ENOMEM;
+	if (ls->discard)
+		return 0;
+
+	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -ENOMEM;
+
+	seq = get_seq(ts, addr);
+	if (!seq)
+		return -ENOMEM;
+
+	switch (seq->state) {
+	case SEQ_STATE_UNINITIALIZED:
+		goto end;
+	case SEQ_STATE_ACQUIRED:
+		break;
+	case SEQ_STATE_READ_ACQUIRED:
+		seq->read_count--;
+		BUG_ON(seq->read_count < 0);
+		if (!seq->read_count) {
+			ls->nr_release++;
+			goto end;
+		}
+		break;
+	case SEQ_STATE_ACQUIRING:
+	case SEQ_STATE_CONTENDED:
+	case SEQ_STATE_RELEASED:
+		/* broken lock sequence, discard it */
+		ls->discard = 1;
+		bad_hist[BROKEN_RELEASE]++;
+		goto free_seq;
+	default:
+		BUG_ON("Unknown state of lock sequence found!\n");
+		break;
+	}
+
+	ls->nr_release++;
+free_seq:
+	list_del(&seq->list);
+	free(seq);
+end:
+	return 0;
+}
+
+/* lock oriented handlers */
+/* TODO: handlers for CPU oriented, thread oriented */
+static struct trace_lock_handler report_lock_ops  = {
+	.acquire_event		= report_lock_acquire_event,
+	.acquired_event		= report_lock_acquired_event,
+	.contended_event	= report_lock_contended_event,
+	.release_event		= report_lock_release_event,
+};
+
+static struct trace_lock_handler *trace_handler;
+
+static int perf_evsel__process_lock_acquire(struct perf_evsel *evsel,
+					     struct perf_sample *sample)
+{
+	if (trace_handler->acquire_event)
+		return trace_handler->acquire_event(evsel, sample);
+	return 0;
+}
+
+static int perf_evsel__process_lock_acquired(struct perf_evsel *evsel,
+					      struct perf_sample *sample)
+{
+	if (trace_handler->acquired_event)
+		return trace_handler->acquired_event(evsel, sample);
+	return 0;
+}
+
+static int perf_evsel__process_lock_contended(struct perf_evsel *evsel,
+					      struct perf_sample *sample)
+{
+	if (trace_handler->contended_event)
+		return trace_handler->contended_event(evsel, sample);
+	return 0;
+}
+
+static int perf_evsel__process_lock_release(struct perf_evsel *evsel,
+					    struct perf_sample *sample)
+{
+	if (trace_handler->release_event)
+		return trace_handler->release_event(evsel, sample);
+	return 0;
+}
+
+static void print_bad_events(int bad, int total)
+{
+	/* Output for debug, this have to be removed */
+	int i;
+	const char *name[4] =
+		{ "acquire", "acquired", "contended", "release" };
+
+	pr_info("\n=== output for debug===\n\n");
+	pr_info("bad: %d, total: %d\n", bad, total);
+	pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
+	pr_info("histogram of events caused bad sequence\n");
+	for (i = 0; i < BROKEN_MAX; i++)
+		pr_info(" %10s: %d\n", name[i], bad_hist[i]);
+}
+
+/* TODO: various way to print, coloring, nano or milli sec */
+static void print_result(void)
+{
+	struct lock_stat *st;
+	char cut_name[20];
+	int bad, total;
+
+	pr_info("%20s ", "Name");
+	pr_info("%10s ", "acquired");
+	pr_info("%10s ", "contended");
+
+	pr_info("%15s ", "avg wait (ns)");
+	pr_info("%15s ", "total wait (ns)");
+	pr_info("%15s ", "max wait (ns)");
+	pr_info("%15s ", "min wait (ns)");
+
+	pr_info("\n\n");
+
+	bad = total = 0;
+	while ((st = pop_from_result())) {
+		total++;
+		if (st->discard) {
+			bad++;
+			continue;
+		}
+		bzero(cut_name, 20);
+
+		if (strlen(st->name) < 16) {
+			/* output raw name */
+			pr_info("%20s ", st->name);
+		} else {
+			strncpy(cut_name, st->name, 16);
+			cut_name[16] = '.';
+			cut_name[17] = '.';
+			cut_name[18] = '.';
+			cut_name[19] = '\0';
+			/* cut off name for saving output style */
+			pr_info("%20s ", cut_name);
+		}
+
+		pr_info("%10u ", st->nr_acquired);
+		pr_info("%10u ", st->nr_contended);
+
+		pr_info("%15" PRIu64 " ", st->avg_wait_time);
+		pr_info("%15" PRIu64 " ", st->wait_time_total);
+		pr_info("%15" PRIu64 " ", st->wait_time_max);
+		pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
+		       0 : st->wait_time_min);
+		pr_info("\n");
+	}
+
+	print_bad_events(bad, total);
+}
+
+static bool info_threads, info_map;
+
+static void dump_threads(void)
+{
+	struct thread_stat *st;
+	struct rb_node *node;
+	struct thread *t;
+
+	pr_info("%10s: comm\n", "Thread ID");
+
+	node = rb_first(&thread_stats);
+	while (node) {
+		st = container_of(node, struct thread_stat, rb);
+		t = perf_session__findnew(session, st->tid);
+		pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
+		node = rb_next(node);
+		thread__put(t);
+	};
+}
+
+static void dump_map(void)
+{
+	unsigned int i;
+	struct lock_stat *st;
+
+	pr_info("Address of instance: name of class\n");
+	for (i = 0; i < LOCKHASH_SIZE; i++) {
+		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+			pr_info(" %p: %s\n", st->addr, st->name);
+		}
+	}
+}
+
+static int dump_info(void)
+{
+	int rc = 0;
+
+	if (info_threads)
+		dump_threads();
+	else if (info_map)
+		dump_map();
+	else {
+		rc = -1;
+		pr_err("Unknown type of information\n");
+	}
+
+	return rc;
+}
+
+typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
+				  struct perf_sample *sample);
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	int err = 0;
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
+
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
+		err = f(evsel, sample);
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
+static void sort_result(void)
+{
+	unsigned int i;
+	struct lock_stat *st;
+
+	for (i = 0; i < LOCKHASH_SIZE; i++) {
+		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+			insert_to_result(st, compare);
+		}
+	}
+}
+
+static const struct perf_evsel_str_handler lock_tracepoints[] = {
+	{ "lock:lock_acquire",	 perf_evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
+	{ "lock:lock_acquired",	 perf_evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+	{ "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+	{ "lock:lock_release",	 perf_evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
+};
+
+static bool force;
+
+static int __cmd_report(bool display_info)
+{
+	int err = -EINVAL;
+	struct perf_tool eops = {
+		.sample		 = process_sample_event,
+		.comm		 = perf_event__process_comm,
+		.namespaces	 = perf_event__process_namespaces,
+		.ordered_events	 = true,
+	};
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = force,
+	};
+
+	session = perf_session__new(&data, false, &eops);
+	if (!session) {
+		pr_err("Initializing perf session failed\n");
+		return -1;
+	}
+
+	symbol__init(&session->header.env);
+
+	if (!perf_session__has_traces(session, "lock record"))
+		goto out_delete;
+
+	if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
+		pr_err("Initializing perf session tracepoint handlers failed\n");
+		goto out_delete;
+	}
+
+	if (select_key())
+		goto out_delete;
+
+	err = perf_session__process_events(session);
+	if (err)
+		goto out_delete;
+
+	setup_pager();
+	if (display_info) /* used for info subcommand */
+		err = dump_info();
+	else {
+		sort_result();
+		print_result();
+	}
+
+out_delete:
+	perf_session__delete(session);
+	return err;
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	const char *record_args[] = {
+		"record", "-R", "-m", "1024", "-c", "1",
+	};
+	unsigned int rec_argc, i, j, ret;
+	const char **rec_argv;
+
+	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
+		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
+				pr_err("tracepoint %s is not enabled. "
+				       "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
+				       lock_tracepoints[i].name);
+				return 1;
+		}
+	}
+
+	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	/* factor of 2 is for -e in front of each tracepoint */
+	rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
+
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	if (!rec_argv)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = strdup(lock_tracepoints[j].name);
+	}
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	ret = cmd_record(i, rec_argv);
+	free(rec_argv);
+	return ret;
+}
+
+int cmd_lock(int argc, const char **argv)
+{
+	const struct option lock_options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
+	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+	OPT_END()
+	};
+
+	const struct option info_options[] = {
+	OPT_BOOLEAN('t', "threads", &info_threads,
+		    "dump thread list in perf.data"),
+	OPT_BOOLEAN('m', "map", &info_map,
+		    "map of lock instances (address:name table)"),
+	OPT_PARENT(lock_options)
+	};
+
+	const struct option report_options[] = {
+	OPT_STRING('k', "key", &sort_key, "acquired",
+		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
+	/* TODO: type */
+	OPT_PARENT(lock_options)
+	};
+
+	const char * const info_usage[] = {
+		"perf lock info [<options>]",
+		NULL
+	};
+	const char *const lock_subcommands[] = { "record", "report", "script",
+						 "info", NULL };
+	const char *lock_usage[] = {
+		NULL,
+		NULL
+	};
+	const char * const report_usage[] = {
+		"perf lock report [<options>]",
+		NULL
+	};
+	unsigned int i;
+	int rc = 0;
+
+	for (i = 0; i < LOCKHASH_SIZE; i++)
+		INIT_LIST_HEAD(lockhash_table + i);
+
+	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
+					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		usage_with_options(lock_usage, lock_options);
+
+	if (!strncmp(argv[0], "rec", 3)) {
+		return __cmd_record(argc, argv);
+	} else if (!strncmp(argv[0], "report", 6)) {
+		trace_handler = &report_lock_ops;
+		if (argc) {
+			argc = parse_options(argc, argv,
+					     report_options, report_usage, 0);
+			if (argc)
+				usage_with_options(report_usage, report_options);
+		}
+		rc = __cmd_report(false);
+	} else if (!strcmp(argv[0], "script")) {
+		/* Aliased to 'perf script' */
+		return cmd_script(argc, argv);
+	} else if (!strcmp(argv[0], "info")) {
+		if (argc) {
+			argc = parse_options(argc, argv,
+					     info_options, info_usage, 0);
+			if (argc)
+				usage_with_options(info_usage, info_options);
+		}
+		/* recycling report_lock_ops */
+		trace_handler = &report_lock_ops;
+		rc = __cmd_report(true);
+	} else {
+		usage_with_options(lock_usage, lock_options);
+	}
+
+	return rc;
+}
diff --git a/builtin-mem.c b/builtin-mem.c
new file mode 100644
index 0000000..57393e9
--- /dev/null
+++ b/builtin-mem.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "builtin.h"
+#include "perf.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+#include "util/tool.h"
+#include "util/session.h"
+#include "util/data.h"
+#include "util/mem-events.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+
+#define MEM_OPERATION_LOAD	0x1
+#define MEM_OPERATION_STORE	0x2
+
+struct perf_mem {
+	struct perf_tool	tool;
+	char const		*input_name;
+	bool			hide_unresolved;
+	bool			dump_raw;
+	bool			force;
+	bool			phys_addr;
+	int			operation;
+	const char		*cpu_list;
+	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
+
+static int parse_record_events(const struct option *opt,
+			       const char *str, int unset __maybe_unused)
+{
+	struct perf_mem *mem = *(struct perf_mem **)opt->value;
+	int j;
+
+	if (strcmp(str, "list")) {
+		if (!perf_mem_events__parse(str)) {
+			mem->operation = 0;
+			return 0;
+		}
+		exit(-1);
+	}
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		struct perf_mem_event *e = &perf_mem_events[j];
+
+		fprintf(stderr, "%-13s%-*s%s\n",
+			e->tag,
+			verbose > 0 ? 25 : 0,
+			verbose > 0 ? perf_mem_events__name(j) : "",
+			e->supported ? ": available" : "");
+	}
+	exit(0);
+}
+
+static const char * const __usage[] = {
+	"perf mem record [<options>] [<command>]",
+	"perf mem record [<options>] -- <command> [<options>]",
+	NULL
+};
+
+static const char * const *record_mem_usage = __usage;
+
+static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
+{
+	int rec_argc, i = 0, j;
+	const char **rec_argv;
+	int ret;
+	bool all_user = false, all_kernel = false;
+	struct option options[] = {
+	OPT_CALLBACK('e', "event", &mem, "event",
+		     "event selector. use 'perf mem record -e list' to list available events",
+		     parse_record_events),
+	OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
+	OPT_INCR('v', "verbose", &verbose,
+		 "be more verbose (show counter open errors, etc)"),
+	OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"),
+	OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"),
+	OPT_END()
+	};
+
+	argc = parse_options(argc, argv, options, record_mem_usage,
+			     PARSE_OPT_KEEP_UNKNOWN);
+
+	rec_argc = argc + 9; /* max number of arguments */
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+	if (!rec_argv)
+		return -1;
+
+	rec_argv[i++] = "record";
+
+	if (mem->operation & MEM_OPERATION_LOAD)
+		perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
+	if (mem->operation & MEM_OPERATION_STORE)
+		perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
+	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+		rec_argv[i++] = "-W";
+
+	rec_argv[i++] = "-d";
+
+	if (mem->phys_addr)
+		rec_argv[i++] = "--phys-data";
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		if (!perf_mem_events[j].record)
+			continue;
+
+		if (!perf_mem_events[j].supported) {
+			pr_err("failed: event '%s' not supported\n",
+			       perf_mem_events__name(j));
+			free(rec_argv);
+			return -1;
+		}
+
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = perf_mem_events__name(j);
+	};
+
+	if (all_user)
+		rec_argv[i++] = "--all-user";
+
+	if (all_kernel)
+		rec_argv[i++] = "--all-kernel";
+
+	for (j = 0; j < argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	if (verbose > 0) {
+		pr_debug("calling: record ");
+
+		while (rec_argv[j]) {
+			pr_debug("%s ", rec_argv[j]);
+			j++;
+		}
+		pr_debug("\n");
+	}
+
+	ret = cmd_record(i, rec_argv);
+	free(rec_argv);
+	return ret;
+}
+
+static int
+dump_raw_samples(struct perf_tool *tool,
+		 union perf_event *event,
+		 struct perf_sample *sample,
+		 struct machine *machine)
+{
+	struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
+	struct addr_location al;
+	const char *fmt;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+				event->header.type);
+		return -1;
+	}
+
+	if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
+		goto out_put;
+
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	if (mem->phys_addr) {
+		if (symbol_conf.field_sep) {
+			fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
+			      "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+		} else {
+			fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+			      "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
+			      "%s%s:%s\n";
+			symbol_conf.field_sep = " ";
+		}
+
+		printf(fmt,
+			sample->pid,
+			symbol_conf.field_sep,
+			sample->tid,
+			symbol_conf.field_sep,
+			sample->ip,
+			symbol_conf.field_sep,
+			sample->addr,
+			symbol_conf.field_sep,
+			sample->phys_addr,
+			symbol_conf.field_sep,
+			sample->weight,
+			symbol_conf.field_sep,
+			sample->data_src,
+			symbol_conf.field_sep,
+			al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+			al.sym ? al.sym->name : "???");
+	} else {
+		if (symbol_conf.field_sep) {
+			fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
+			      "%s0x%"PRIx64"%s%s:%s\n";
+		} else {
+			fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+			      "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+			symbol_conf.field_sep = " ";
+		}
+
+		printf(fmt,
+			sample->pid,
+			symbol_conf.field_sep,
+			sample->tid,
+			symbol_conf.field_sep,
+			sample->ip,
+			symbol_conf.field_sep,
+			sample->addr,
+			symbol_conf.field_sep,
+			sample->weight,
+			symbol_conf.field_sep,
+			sample->data_src,
+			symbol_conf.field_sep,
+			al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+			al.sym ? al.sym->name : "???");
+	}
+out_put:
+	addr_location__put(&al);
+	return 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel __maybe_unused,
+				struct machine *machine)
+{
+	return dump_raw_samples(tool, event, sample, machine);
+}
+
+static int report_raw_events(struct perf_mem *mem)
+{
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = mem->force,
+	};
+	int ret;
+	struct perf_session *session = perf_session__new(&data, false,
+							 &mem->tool);
+
+	if (session == NULL)
+		return -1;
+
+	if (mem->cpu_list) {
+		ret = perf_session__cpu_bitmap(session, mem->cpu_list,
+					       mem->cpu_bitmap);
+		if (ret < 0)
+			goto out_delete;
+	}
+
+	ret = symbol__init(&session->header.env);
+	if (ret < 0)
+		goto out_delete;
+
+	if (mem->phys_addr)
+		printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+	else
+		printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+
+	ret = perf_session__process_events(session);
+
+out_delete:
+	perf_session__delete(session);
+	return ret;
+}
+
+static int report_events(int argc, const char **argv, struct perf_mem *mem)
+{
+	const char **rep_argv;
+	int ret, i = 0, j, rep_argc;
+
+	if (mem->dump_raw)
+		return report_raw_events(mem);
+
+	rep_argc = argc + 3;
+	rep_argv = calloc(rep_argc + 1, sizeof(char *));
+	if (!rep_argv)
+		return -1;
+
+	rep_argv[i++] = "report";
+	rep_argv[i++] = "--mem-mode";
+	rep_argv[i++] = "-n"; /* display number of samples */
+
+	/*
+	 * there is no weight (cost) associated with stores, so don't print
+	 * the column
+	 */
+	if (!(mem->operation & MEM_OPERATION_LOAD)) {
+		if (mem->phys_addr)
+			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+					"dso_daddr,tlb,locked,phys_daddr";
+		else
+			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+					"dso_daddr,tlb,locked";
+	} else if (mem->phys_addr)
+		rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+				"dso_daddr,snoop,tlb,locked,phys_daddr";
+
+	for (j = 1; j < argc; j++, i++)
+		rep_argv[i] = argv[j];
+
+	ret = cmd_report(i, rep_argv);
+	free(rep_argv);
+	return ret;
+}
+
+struct mem_mode {
+	const char *name;
+	int mode;
+};
+
+#define MEM_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define MEM_END { .name = NULL }
+
+static const struct mem_mode mem_modes[]={
+	MEM_OPT("load", MEM_OPERATION_LOAD),
+	MEM_OPT("store", MEM_OPERATION_STORE),
+	MEM_END
+};
+
+static int
+parse_mem_ops(const struct option *opt, const char *str, int unset)
+{
+	int *mode = (int *)opt->value;
+	const struct mem_mode *m;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+
+	if (unset)
+		return 0;
+
+	/* str may be NULL in case no arg is passed to -t */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		/* reset mode */
+		*mode = 0;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			for (m = mem_modes; m->name; m++) {
+				if (!strcasecmp(s, m->name))
+					break;
+			}
+			if (!m->name) {
+				fprintf(stderr, "unknown sampling op %s,"
+					    " check man page\n", s);
+				goto error;
+			}
+
+			*mode |= m->mode;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	if (*mode == 0)
+		*mode = MEM_OPERATION_LOAD;
+error:
+	free(os);
+	return ret;
+}
+
+int cmd_mem(int argc, const char **argv)
+{
+	struct stat st;
+	struct perf_mem mem = {
+		.tool = {
+			.sample		= process_sample_event,
+			.mmap		= perf_event__process_mmap,
+			.mmap2		= perf_event__process_mmap2,
+			.comm		= perf_event__process_comm,
+			.lost		= perf_event__process_lost,
+			.fork		= perf_event__process_fork,
+			.build_id	= perf_event__process_build_id,
+			.namespaces	= perf_event__process_namespaces,
+			.ordered_events	= true,
+		},
+		.input_name		 = "perf.data",
+		/*
+		 * default to both load an store sampling
+		 */
+		.operation		 = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
+	};
+	const struct option mem_options[] = {
+	OPT_CALLBACK('t', "type", &mem.operation,
+		   "type", "memory operations(load,store) Default load,store",
+		    parse_mem_ops),
+	OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
+		    "dump raw samples in ASCII"),
+	OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
+		    "Only display entries resolved to a symbol"),
+	OPT_STRING('i', "input", &input_name, "file",
+		   "input file name"),
+	OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
+		   "list of cpus to profile"),
+	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep,
+		   "separator",
+		   "separator for columns, no spaces will be added"
+		   " between columns '.' is reserved."),
+	OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
+	OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
+	OPT_END()
+	};
+	const char *const mem_subcommands[] = { "record", "report", NULL };
+	const char *mem_usage[] = {
+		NULL,
+		NULL
+	};
+
+	if (perf_mem_events__init()) {
+		pr_err("failed: memory events not supported\n");
+		return -1;
+	}
+
+	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
+					mem_usage, PARSE_OPT_KEEP_UNKNOWN);
+
+	if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
+		usage_with_options(mem_usage, mem_options);
+
+	if (!mem.input_name || !strlen(mem.input_name)) {
+		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+			mem.input_name = "-";
+		else
+			mem.input_name = "perf.data";
+	}
+
+	if (!strncmp(argv[0], "rec", 3))
+		return __cmd_record(argc, argv, &mem);
+	else if (!strncmp(argv[0], "rep", 3))
+		return report_events(argc, argv, &mem);
+	else
+		usage_with_options(mem_usage, mem_options);
+
+	return 0;
+}
diff --git a/builtin-probe.c b/builtin-probe.c
new file mode 100644
index 0000000..c006592
--- /dev/null
+++ b/builtin-probe.c
@@ -0,0 +1,736 @@
+/*
+ * builtin-probe.c
+ *
+ * Builtin probe command: Set up probe events by C expression
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "perf.h"
+#include "builtin.h"
+#include "util/util.h"
+#include "util/strlist.h"
+#include "util/strfilter.h"
+#include "util/symbol.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/probe-finder.h"
+#include "util/probe-event.h"
+#include "util/probe-file.h"
+
+#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
+#define DEFAULT_FUNC_FILTER "!_*"
+#define DEFAULT_LIST_FILTER "*"
+
+/* Session management structure */
+static struct {
+	int command;	/* Command short_name */
+	bool list_events;
+	bool uprobes;
+	bool quiet;
+	bool target_used;
+	int nevents;
+	struct perf_probe_event events[MAX_PROBES];
+	struct line_range line_range;
+	char *target;
+	struct strfilter *filter;
+	struct nsinfo *nsi;
+} params;
+
+/* Parse an event definition. Note that any error must die. */
+static int parse_probe_event(const char *str)
+{
+	struct perf_probe_event *pev = &params.events[params.nevents];
+	int ret;
+
+	pr_debug("probe-definition(%d): %s\n", params.nevents, str);
+	if (++params.nevents == MAX_PROBES) {
+		pr_err("Too many probes (> %d) were specified.", MAX_PROBES);
+		return -1;
+	}
+
+	pev->uprobes = params.uprobes;
+	if (params.target) {
+		pev->target = strdup(params.target);
+		if (!pev->target)
+			return -ENOMEM;
+		params.target_used = true;
+	}
+
+	if (params.nsi)
+		pev->nsi = nsinfo__get(params.nsi);
+
+	/* Parse a perf-probe command into event */
+	ret = parse_perf_probe_command(str, pev);
+	pr_debug("%d arguments\n", pev->nargs);
+
+	return ret;
+}
+
+static int params_add_filter(const char *str)
+{
+	const char *err = NULL;
+	int ret = 0;
+
+	pr_debug2("Add filter: %s\n", str);
+	if (!params.filter) {
+		params.filter = strfilter__new(str, &err);
+		if (!params.filter)
+			ret = err ? -EINVAL : -ENOMEM;
+	} else
+		ret = strfilter__or(params.filter, str, &err);
+
+	if (ret == -EINVAL) {
+		pr_err("Filter parse error at %td.\n", err - str + 1);
+		pr_err("Source: \"%s\"\n", str);
+		pr_err("         %*c\n", (int)(err - str + 1), '^');
+	}
+
+	return ret;
+}
+
+static int set_target(const char *ptr)
+{
+	int found = 0;
+	const char *buf;
+
+	/*
+	 * The first argument after options can be an absolute path
+	 * to an executable / library or kernel module.
+	 *
+	 * TODO: Support relative path, and $PATH, $LD_LIBRARY_PATH,
+	 * short module name.
+	 */
+	if (!params.target && ptr && *ptr == '/') {
+		params.target = strdup(ptr);
+		if (!params.target)
+			return -ENOMEM;
+		params.target_used = false;
+
+		found = 1;
+		buf = ptr + (strlen(ptr) - 3);
+
+		if (strcmp(buf, ".ko"))
+			params.uprobes = true;
+
+	}
+
+	return found;
+}
+
+static int parse_probe_event_argv(int argc, const char **argv)
+{
+	int i, len, ret, found_target;
+	char *buf;
+
+	found_target = set_target(argv[0]);
+	if (found_target < 0)
+		return found_target;
+
+	if (found_target && argc == 1)
+		return 0;
+
+	/* Bind up rest arguments */
+	len = 0;
+	for (i = 0; i < argc; i++) {
+		if (i == 0 && found_target)
+			continue;
+
+		len += strlen(argv[i]) + 1;
+	}
+	buf = zalloc(len + 1);
+	if (buf == NULL)
+		return -ENOMEM;
+	len = 0;
+	for (i = 0; i < argc; i++) {
+		if (i == 0 && found_target)
+			continue;
+
+		len += sprintf(&buf[len], "%s ", argv[i]);
+	}
+	ret = parse_probe_event(buf);
+	free(buf);
+	return ret;
+}
+
+static int opt_set_target(const struct option *opt, const char *str,
+			int unset __maybe_unused)
+{
+	int ret = -ENOENT;
+	char *tmp;
+
+	if  (str) {
+		if (!strcmp(opt->long_name, "exec"))
+			params.uprobes = true;
+		else if (!strcmp(opt->long_name, "module"))
+			params.uprobes = false;
+		else
+			return ret;
+
+		/* Expand given path to absolute path, except for modulename */
+		if (params.uprobes || strchr(str, '/')) {
+			tmp = nsinfo__realpath(str, params.nsi);
+			if (!tmp) {
+				pr_warning("Failed to get the absolute path of %s: %m\n", str);
+				return ret;
+			}
+		} else {
+			tmp = strdup(str);
+			if (!tmp)
+				return -ENOMEM;
+		}
+		free(params.target);
+		params.target = tmp;
+		params.target_used = false;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int opt_set_target_ns(const struct option *opt __maybe_unused,
+			     const char *str, int unset __maybe_unused)
+{
+	int ret = -ENOENT;
+	pid_t ns_pid;
+	struct nsinfo *nsip;
+
+	if (str) {
+		errno = 0;
+		ns_pid = (pid_t)strtol(str, NULL, 10);
+		if (errno != 0) {
+			ret = -errno;
+			pr_warning("Failed to parse %s as a pid: %s\n", str,
+				   strerror(errno));
+			return ret;
+		}
+		nsip = nsinfo__new(ns_pid);
+		if (nsip && nsip->need_setns)
+			params.nsi = nsinfo__get(nsip);
+		nsinfo__put(nsip);
+
+		ret = 0;
+	}
+
+	return ret;
+}
+
+
+/* Command option callbacks */
+
+#ifdef HAVE_DWARF_SUPPORT
+static int opt_show_lines(const struct option *opt,
+			  const char *str, int unset __maybe_unused)
+{
+	int ret = 0;
+
+	if (!str)
+		return 0;
+
+	if (params.command == 'L') {
+		pr_warning("Warning: more than one --line options are"
+			   " detected. Only the first one is valid.\n");
+		return 0;
+	}
+
+	params.command = opt->short_name;
+	ret = parse_line_range_desc(str, &params.line_range);
+
+	return ret;
+}
+
+static int opt_show_vars(const struct option *opt,
+			 const char *str, int unset __maybe_unused)
+{
+	struct perf_probe_event *pev = &params.events[params.nevents];
+	int ret;
+
+	if (!str)
+		return 0;
+
+	ret = parse_probe_event(str);
+	if (!ret && pev->nargs != 0) {
+		pr_err("  Error: '--vars' doesn't accept arguments.\n");
+		return -EINVAL;
+	}
+	params.command = opt->short_name;
+
+	return ret;
+}
+#else
+# define opt_show_lines NULL
+# define opt_show_vars NULL
+#endif
+static int opt_add_probe_event(const struct option *opt,
+			      const char *str, int unset __maybe_unused)
+{
+	if (str) {
+		params.command = opt->short_name;
+		return parse_probe_event(str);
+	}
+
+	return 0;
+}
+
+static int opt_set_filter_with_command(const struct option *opt,
+				       const char *str, int unset)
+{
+	if (!unset)
+		params.command = opt->short_name;
+
+	if (str)
+		return params_add_filter(str);
+
+	return 0;
+}
+
+static int opt_set_filter(const struct option *opt __maybe_unused,
+			  const char *str, int unset __maybe_unused)
+{
+	if (str)
+		return params_add_filter(str);
+
+	return 0;
+}
+
+static int init_params(void)
+{
+	return line_range__init(&params.line_range);
+}
+
+static void cleanup_params(void)
+{
+	int i;
+
+	for (i = 0; i < params.nevents; i++)
+		clear_perf_probe_event(params.events + i);
+	line_range__clear(&params.line_range);
+	free(params.target);
+	strfilter__delete(params.filter);
+	nsinfo__put(params.nsi);
+	memset(&params, 0, sizeof(params));
+}
+
+static void pr_err_with_code(const char *msg, int err)
+{
+	char sbuf[STRERR_BUFSIZE];
+
+	pr_err("%s", msg);
+	pr_debug(" Reason: %s (Code: %d)",
+		 str_error_r(-err, sbuf, sizeof(sbuf)), err);
+	pr_err("\n");
+}
+
+static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int ret;
+	int i, k;
+	const char *event = NULL, *group = NULL;
+
+	ret = init_probe_symbol_maps(pevs->uprobes);
+	if (ret < 0)
+		return ret;
+
+	ret = convert_perf_probe_events(pevs, npevs);
+	if (ret < 0)
+		goto out_cleanup;
+
+	if (params.command == 'D') {	/* it shows definition */
+		ret = show_probe_trace_events(pevs, npevs);
+		goto out_cleanup;
+	}
+
+	ret = apply_perf_probe_events(pevs, npevs);
+	if (ret < 0)
+		goto out_cleanup;
+
+	for (i = k = 0; i < npevs; i++)
+		k += pevs[i].ntevs;
+
+	pr_info("Added new event%s\n", (k > 1) ? "s:" : ":");
+	for (i = 0; i < npevs; i++) {
+		struct perf_probe_event *pev = &pevs[i];
+
+		for (k = 0; k < pev->ntevs; k++) {
+			struct probe_trace_event *tev = &pev->tevs[k];
+
+			/* We use tev's name for showing new events */
+			show_perf_probe_event(tev->group, tev->event, pev,
+					      tev->point.module, false);
+
+			/* Save the last valid name */
+			event = tev->event;
+			group = tev->group;
+		}
+	}
+
+	/* Note that it is possible to skip all events because of blacklist */
+	if (event) {
+		/* Show how to use the event. */
+		pr_info("\nYou can now use it in all perf tools, such as:\n\n");
+		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
+	}
+
+out_cleanup:
+	cleanup_perf_probe_events(pevs, npevs);
+	exit_probe_symbol_maps();
+	return ret;
+}
+
+static int del_perf_probe_caches(struct strfilter *filter)
+{
+	struct probe_cache *cache;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	int ret;
+
+	bidlist = build_id_cache__list_all(false);
+	if (!bidlist) {
+		ret = -errno;
+		pr_debug("Failed to get buildids: %d\n", ret);
+		return ret ?: -ENOMEM;
+	}
+
+	strlist__for_each_entry(nd, bidlist) {
+		cache = probe_cache__new(nd->s, NULL);
+		if (!cache)
+			continue;
+		if (probe_cache__filter_purge(cache, filter) < 0 ||
+		    probe_cache__commit(cache) < 0)
+			pr_warning("Failed to remove entries for %s\n", nd->s);
+		probe_cache__delete(cache);
+	}
+	return 0;
+}
+
+static int perf_del_probe_events(struct strfilter *filter)
+{
+	int ret, ret2, ufd = -1, kfd = -1;
+	char *str = strfilter__string(filter);
+	struct strlist *klist = NULL, *ulist = NULL;
+	struct str_node *ent;
+
+	if (!str)
+		return -EINVAL;
+
+	pr_debug("Delete filter: \'%s\'\n", str);
+
+	if (probe_conf.cache)
+		return del_perf_probe_caches(filter);
+
+	/* Get current event names */
+	ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+	if (ret < 0)
+		goto out;
+
+	klist = strlist__new(NULL, NULL);
+	ulist = strlist__new(NULL, NULL);
+	if (!klist || !ulist) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = probe_file__get_events(kfd, filter, klist);
+	if (ret == 0) {
+		strlist__for_each_entry(ent, klist)
+			pr_info("Removed event: %s\n", ent->s);
+
+		ret = probe_file__del_strlist(kfd, klist);
+		if (ret < 0)
+			goto error;
+	}
+
+	ret2 = probe_file__get_events(ufd, filter, ulist);
+	if (ret2 == 0) {
+		strlist__for_each_entry(ent, ulist)
+			pr_info("Removed event: %s\n", ent->s);
+
+		ret2 = probe_file__del_strlist(ufd, ulist);
+		if (ret2 < 0)
+			goto error;
+	}
+
+	if (ret == -ENOENT && ret2 == -ENOENT)
+		pr_warning("\"%s\" does not hit any event.\n", str);
+	else
+		ret = 0;
+
+error:
+	if (kfd >= 0)
+		close(kfd);
+	if (ufd >= 0)
+		close(ufd);
+out:
+	strlist__delete(klist);
+	strlist__delete(ulist);
+	free(str);
+
+	return ret;
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+#define PROBEDEF_STR	\
+	"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]"
+#else
+#define PROBEDEF_STR	"[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]"
+#endif
+
+
+static int
+__cmd_probe(int argc, const char **argv)
+{
+	const char * const probe_usage[] = {
+		"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
+		"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
+		"perf probe [<options>] --del '[GROUP:]EVENT' ...",
+		"perf probe --list [GROUP:]EVENT ...",
+#ifdef HAVE_DWARF_SUPPORT
+		"perf probe [<options>] --line 'LINEDESC'",
+		"perf probe [<options>] --vars 'PROBEPOINT'",
+#endif
+		"perf probe [<options>] --funcs",
+		NULL
+	};
+	struct option options[] = {
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show parsed arguments, etc)"),
+	OPT_BOOLEAN('q', "quiet", &params.quiet,
+		    "be quiet (do not show any messages)"),
+	OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
+			     "list up probe events",
+			     opt_set_filter_with_command, DEFAULT_LIST_FILTER),
+	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
+		     opt_set_filter_with_command),
+	OPT_CALLBACK('a', "add", NULL, PROBEDEF_STR,
+		"probe point definition, where\n"
+		"\t\tGROUP:\tGroup name (optional)\n"
+		"\t\tEVENT:\tEvent name\n"
+		"\t\tFUNC:\tFunction name\n"
+		"\t\tOFF:\tOffset from function entry (in byte)\n"
+		"\t\t%return:\tPut the probe at function return\n"
+#ifdef HAVE_DWARF_SUPPORT
+		"\t\tSRC:\tSource code path\n"
+		"\t\tRL:\tRelative line number from function entry.\n"
+		"\t\tAL:\tAbsolute line number in file.\n"
+		"\t\tPT:\tLazy expression of line code.\n"
+		"\t\tARG:\tProbe argument (local variable name or\n"
+		"\t\t\tkprobe-tracer argument format.)\n",
+#else
+		"\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
+#endif
+		opt_add_probe_event),
+	OPT_CALLBACK('D', "definition", NULL, PROBEDEF_STR,
+		"Show trace event definition of given traceevent for k/uprobe_events.",
+		opt_add_probe_event),
+	OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
+		    " with existing name"),
+	OPT_CALLBACK('L', "line", NULL,
+		     "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
+		     "Show source code lines.", opt_show_lines),
+	OPT_CALLBACK('V', "vars", NULL,
+		     "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
+		     "Show accessible variables on PROBEDEF", opt_show_vars),
+	OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
+		    "Show external variables too (with --vars only)"),
+	OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range,
+		"Show variables location range in scope (with --vars only)"),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_STRING('s', "source", &symbol_conf.source_prefix,
+		   "directory", "path to kernel source"),
+	OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
+		"Don't search inlined functions"),
+	OPT__DRY_RUN(&probe_event_dry_run),
+	OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
+		 "Set how many probe points can be found for a probe."),
+	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+			     "Show potential probe-able functions.",
+			     opt_set_filter_with_command, DEFAULT_FUNC_FILTER),
+	OPT_CALLBACK('\0', "filter", NULL,
+		     "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
+		     "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
+		     "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
+		     opt_set_filter),
+	OPT_CALLBACK('x', "exec", NULL, "executable|path",
+			"target executable name or path", opt_set_target),
+	OPT_CALLBACK('m', "module", NULL, "modname|path",
+		"target module name (for online) or path (for offline)",
+		opt_set_target),
+	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+		    "Enable symbol demangling"),
+	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+		    "Enable kernel symbol demangling"),
+	OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"),
+	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+		   "Look for files with symbols relative to this directory"),
+	OPT_CALLBACK(0, "target-ns", NULL, "pid",
+		     "target pid for namespace contexts", opt_set_target_ns),
+	OPT_END()
+	};
+	int ret;
+
+	set_option_flag(options, 'a', "add", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE);
+#ifdef HAVE_DWARF_SUPPORT
+	set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
+#else
+# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c)
+	set_nobuild('L', "line", false);
+	set_nobuild('V', "vars", false);
+	set_nobuild('\0', "externs", false);
+	set_nobuild('\0', "range", false);
+	set_nobuild('k', "vmlinux", true);
+	set_nobuild('s', "source", true);
+	set_nobuild('\0', "no-inlines", true);
+# undef set_nobuild
+#endif
+	set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
+
+	argc = parse_options(argc, argv, options, probe_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (argc > 0) {
+		if (strcmp(argv[0], "-") == 0) {
+			usage_with_options_msg(probe_usage, options,
+				"'-' is not supported.\n");
+		}
+		if (params.command && params.command != 'a') {
+			usage_with_options_msg(probe_usage, options,
+				"another command except --add is set.\n");
+		}
+		ret = parse_probe_event_argv(argc, argv);
+		if (ret < 0) {
+			pr_err_with_code("  Error: Command Parse Error.", ret);
+			return ret;
+		}
+		params.command = 'a';
+	}
+
+	if (params.quiet) {
+		if (verbose != 0) {
+			pr_err("  Error: -v and -q are exclusive.\n");
+			return -EINVAL;
+		}
+		verbose = -1;
+	}
+
+	if (probe_conf.max_probes == 0)
+		probe_conf.max_probes = MAX_PROBES;
+
+	/*
+	 * Only consider the user's kernel image path if given.
+	 */
+	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+
+	/*
+	 * Except for --list, --del and --add, other command doesn't depend
+	 * nor change running kernel. So if user gives offline vmlinux,
+	 * ignore its buildid.
+	 */
+	if (!strchr("lda", params.command) && symbol_conf.vmlinux_name)
+		symbol_conf.ignore_vmlinux_buildid = true;
+
+	switch (params.command) {
+	case 'l':
+		if (params.uprobes) {
+			pr_err("  Error: Don't use --list with --exec.\n");
+			parse_options_usage(probe_usage, options, "l", true);
+			parse_options_usage(NULL, options, "x", true);
+			return -EINVAL;
+		}
+		ret = show_perf_probe_events(params.filter);
+		if (ret < 0)
+			pr_err_with_code("  Error: Failed to show event list.", ret);
+		return ret;
+	case 'F':
+		ret = show_available_funcs(params.target, params.nsi,
+					   params.filter, params.uprobes);
+		if (ret < 0)
+			pr_err_with_code("  Error: Failed to show functions.", ret);
+		return ret;
+#ifdef HAVE_DWARF_SUPPORT
+	case 'L':
+		ret = show_line_range(&params.line_range, params.target,
+				      params.nsi, params.uprobes);
+		if (ret < 0)
+			pr_err_with_code("  Error: Failed to show lines.", ret);
+		return ret;
+	case 'V':
+		if (!params.filter)
+			params.filter = strfilter__new(DEFAULT_VAR_FILTER,
+						       NULL);
+
+		ret = show_available_vars(params.events, params.nevents,
+					  params.filter);
+		if (ret < 0)
+			pr_err_with_code("  Error: Failed to show vars.", ret);
+		return ret;
+#endif
+	case 'd':
+		ret = perf_del_probe_events(params.filter);
+		if (ret < 0) {
+			pr_err_with_code("  Error: Failed to delete events.", ret);
+			return ret;
+		}
+		break;
+	case 'D':
+	case 'a':
+
+		/* Ensure the last given target is used */
+		if (params.target && !params.target_used) {
+			pr_err("  Error: -x/-m must follow the probe definitions.\n");
+			parse_options_usage(probe_usage, options, "m", true);
+			parse_options_usage(NULL, options, "x", true);
+			return -EINVAL;
+		}
+
+		ret = perf_add_probe_events(params.events, params.nevents);
+		if (ret < 0) {
+			pr_err_with_code("  Error: Failed to add events.", ret);
+			return ret;
+		}
+		break;
+	default:
+		usage_with_options(probe_usage, options);
+	}
+	return 0;
+}
+
+int cmd_probe(int argc, const char **argv)
+{
+	int ret;
+
+	ret = init_params();
+	if (!ret) {
+		ret = __cmd_probe(argc, argv);
+		cleanup_params();
+	}
+
+	return ret < 0 ? ret : 0;
+}
diff --git a/builtin-record.c b/builtin-record.c
new file mode 100644
index 0000000..22ebeb9
--- /dev/null
+++ b/builtin-record.c
@@ -0,0 +1,1902 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-record.c
+ *
+ * Builtin record command: Record the profile of a workload
+ * (or a CPU, or a PID) into the perf.data output file - for
+ * later analysis via perf report.
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/build-id.h"
+#include "util/util.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/config.h"
+
+#include "util/callchain.h"
+#include "util/cgroup.h"
+#include "util/header.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/debug.h"
+#include "util/drv_configs.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/symbol.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/data.h"
+#include "util/perf_regs.h"
+#include "util/auxtrace.h"
+#include "util/tsc.h"
+#include "util/parse-branch-options.h"
+#include "util/parse-regs-options.h"
+#include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
+#include "util/trigger.h"
+#include "util/perf-hooks.h"
+#include "util/time-utils.h"
+#include "util/units.h"
+#include "asm/bug.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <poll.h>
+#include <unistd.h>
+#include <sched.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <linux/time64.h>
+
+struct switch_output {
+	bool		 enabled;
+	bool		 signal;
+	unsigned long	 size;
+	unsigned long	 time;
+	const char	*str;
+	bool		 set;
+};
+
+struct record {
+	struct perf_tool	tool;
+	struct record_opts	opts;
+	u64			bytes_written;
+	struct perf_data	data;
+	struct auxtrace_record	*itr;
+	struct perf_evlist	*evlist;
+	struct perf_session	*session;
+	int			realtime_prio;
+	bool			no_buildid;
+	bool			no_buildid_set;
+	bool			no_buildid_cache;
+	bool			no_buildid_cache_set;
+	bool			buildid_all;
+	bool			timestamp_filename;
+	bool			timestamp_boundary;
+	struct switch_output	switch_output;
+	unsigned long long	samples;
+};
+
+static volatile int auxtrace_record__snapshot_started;
+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
+static DEFINE_TRIGGER(switch_output_trigger);
+
+static bool switch_output_signal(struct record *rec)
+{
+	return rec->switch_output.signal &&
+	       trigger_is_ready(&switch_output_trigger);
+}
+
+static bool switch_output_size(struct record *rec)
+{
+	return rec->switch_output.size &&
+	       trigger_is_ready(&switch_output_trigger) &&
+	       (rec->bytes_written >= rec->switch_output.size);
+}
+
+static bool switch_output_time(struct record *rec)
+{
+	return rec->switch_output.time &&
+	       trigger_is_ready(&switch_output_trigger);
+}
+
+static int record__write(struct record *rec, void *bf, size_t size)
+{
+	if (perf_data__write(rec->session->data, bf, size) < 0) {
+		pr_err("failed to write perf data, error: %m\n");
+		return -1;
+	}
+
+	rec->bytes_written += size;
+
+	if (switch_output_size(rec))
+		trigger_hit(&switch_output_trigger);
+
+	return 0;
+}
+
+static int process_synthesized_event(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	struct record *rec = container_of(tool, struct record, tool);
+	return record__write(rec, event, event->header.size);
+}
+
+static int record__pushfn(void *to, void *bf, size_t size)
+{
+	struct record *rec = to;
+
+	rec->samples++;
+	return record__write(rec, bf, size);
+}
+
+static volatile int done;
+static volatile int signr = -1;
+static volatile int child_finished;
+
+static void sig_handler(int sig)
+{
+	if (sig == SIGCHLD)
+		child_finished = 1;
+	else
+		signr = sig;
+
+	done = 1;
+}
+
+static void sigsegv_handler(int sig)
+{
+	perf_hooks__recover();
+	sighandler_dump_stack(sig);
+}
+
+static void record__sig_exit(void)
+{
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	raise(signr);
+}
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int record__process_auxtrace(struct perf_tool *tool,
+				    union perf_event *event, void *data1,
+				    size_t len1, void *data2, size_t len2)
+{
+	struct record *rec = container_of(tool, struct record, tool);
+	struct perf_data *data = &rec->data;
+	size_t padding;
+	u8 pad[8] = {0};
+
+	if (!perf_data__is_pipe(data)) {
+		off_t file_offset;
+		int fd = perf_data__fd(data);
+		int err;
+
+		file_offset = lseek(fd, 0, SEEK_CUR);
+		if (file_offset == -1)
+			return -1;
+		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
+						     event, file_offset);
+		if (err)
+			return err;
+	}
+
+	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
+	padding = (len1 + len2) & 7;
+	if (padding)
+		padding = 8 - padding;
+
+	record__write(rec, event, event->header.size);
+	record__write(rec, data1, len1);
+	if (len2)
+		record__write(rec, data2, len2);
+	record__write(rec, &pad, padding);
+
+	return 0;
+}
+
+static int record__auxtrace_mmap_read(struct record *rec,
+				      struct auxtrace_mmap *mm)
+{
+	int ret;
+
+	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+				  record__process_auxtrace);
+	if (ret < 0)
+		return ret;
+
+	if (ret)
+		rec->samples++;
+
+	return 0;
+}
+
+static int record__auxtrace_mmap_read_snapshot(struct record *rec,
+					       struct auxtrace_mmap *mm)
+{
+	int ret;
+
+	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+					   record__process_auxtrace,
+					   rec->opts.auxtrace_snapshot_size);
+	if (ret < 0)
+		return ret;
+
+	if (ret)
+		rec->samples++;
+
+	return 0;
+}
+
+static int record__auxtrace_read_snapshot_all(struct record *rec)
+{
+	int i;
+	int rc = 0;
+
+	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+		struct auxtrace_mmap *mm =
+				&rec->evlist->mmap[i].auxtrace_mmap;
+
+		if (!mm->base)
+			continue;
+
+		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+			rc = -1;
+			goto out;
+		}
+	}
+out:
+	return rc;
+}
+
+static void record__read_auxtrace_snapshot(struct record *rec)
+{
+	pr_debug("Recording AUX area tracing snapshot\n");
+	if (record__auxtrace_read_snapshot_all(rec) < 0) {
+		trigger_error(&auxtrace_snapshot_trigger);
+	} else {
+		if (auxtrace_record__snapshot_finish(rec->itr))
+			trigger_error(&auxtrace_snapshot_trigger);
+		else
+			trigger_ready(&auxtrace_snapshot_trigger);
+	}
+}
+
+static int record__auxtrace_init(struct record *rec)
+{
+	int err;
+
+	if (!rec->itr) {
+		rec->itr = auxtrace_record__init(rec->evlist, &err);
+		if (err)
+			return err;
+	}
+
+	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+					      rec->opts.auxtrace_snapshot_opts);
+	if (err)
+		return err;
+
+	return auxtrace_parse_filters(rec->evlist);
+}
+
+#else
+
+static inline
+int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
+			       struct auxtrace_mmap *mm __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
+{
+	return 0;
+}
+
+static int record__auxtrace_init(struct record *rec __maybe_unused)
+{
+	return 0;
+}
+
+#endif
+
+static int record__mmap_evlist(struct record *rec,
+			       struct perf_evlist *evlist)
+{
+	struct record_opts *opts = &rec->opts;
+	char msg[512];
+
+	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
+				 opts->auxtrace_mmap_pages,
+				 opts->auxtrace_snapshot_mode) < 0) {
+		if (errno == EPERM) {
+			pr_err("Permission error mapping pages.\n"
+			       "Consider increasing "
+			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
+			       "or try again with a smaller value of -m/--mmap_pages.\n"
+			       "(current value: %u,%u)\n",
+			       opts->mmap_pages, opts->auxtrace_mmap_pages);
+			return -errno;
+		} else {
+			pr_err("failed to mmap with %d (%s)\n", errno,
+				str_error_r(errno, msg, sizeof(msg)));
+			if (errno)
+				return -errno;
+			else
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int record__mmap(struct record *rec)
+{
+	return record__mmap_evlist(rec, rec->evlist);
+}
+
+static int record__open(struct record *rec)
+{
+	char msg[BUFSIZ];
+	struct perf_evsel *pos;
+	struct perf_evlist *evlist = rec->evlist;
+	struct perf_session *session = rec->session;
+	struct record_opts *opts = &rec->opts;
+	struct perf_evsel_config_term *err_term;
+	int rc = 0;
+
+	/*
+	 * For initial_delay we need to add a dummy event so that we can track
+	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
+	 * real events, the ones asked by the user.
+	 */
+	if (opts->initial_delay) {
+		if (perf_evlist__add_dummy(evlist))
+			return -ENOMEM;
+
+		pos = perf_evlist__first(evlist);
+		pos->tracking = 0;
+		pos = perf_evlist__last(evlist);
+		pos->tracking = 1;
+		pos->attr.enable_on_exec = 1;
+	}
+
+	perf_evlist__config(evlist, opts, &callchain_param);
+
+	evlist__for_each_entry(evlist, pos) {
+try_again:
+		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
+			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
+				if (verbose > 0)
+					ui__warning("%s\n", msg);
+				goto try_again;
+			}
+
+			rc = -errno;
+			perf_evsel__open_strerror(pos, &opts->target,
+						  errno, msg, sizeof(msg));
+			ui__error("%s\n", msg);
+			goto out;
+		}
+
+		pos->supported = true;
+	}
+
+	if (perf_evlist__apply_filters(evlist, &pos)) {
+		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
+			pos->filter, perf_evsel__name(pos), errno,
+			str_error_r(errno, msg, sizeof(msg)));
+		rc = -1;
+		goto out;
+	}
+
+	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
+		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+		      str_error_r(errno, msg, sizeof(msg)));
+		rc = -1;
+		goto out;
+	}
+
+	rc = record__mmap(rec);
+	if (rc)
+		goto out;
+
+	session->evlist = evlist;
+	perf_session__set_id_hdr_size(session);
+out:
+	return rc;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct record *rec = container_of(tool, struct record, tool);
+
+	if (rec->evlist->first_sample_time == 0)
+		rec->evlist->first_sample_time = sample->time;
+
+	rec->evlist->last_sample_time = sample->time;
+
+	if (rec->buildid_all)
+		return 0;
+
+	rec->samples++;
+	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+}
+
+static int process_buildids(struct record *rec)
+{
+	struct perf_data *data = &rec->data;
+	struct perf_session *session = rec->session;
+
+	if (data->size == 0)
+		return 0;
+
+	/*
+	 * During this process, it'll load kernel map and replace the
+	 * dso->long_name to a real pathname it found.  In this case
+	 * we prefer the vmlinux path like
+	 *   /lib/modules/3.16.4/build/vmlinux
+	 *
+	 * rather than build-id path (in debug directory).
+	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
+	 */
+	symbol_conf.ignore_vmlinux_buildid = true;
+
+	/*
+	 * If --buildid-all is given, it marks all DSO regardless of hits,
+	 * so no need to process samples. But if timestamp_boundary is enabled,
+	 * it still needs to walk on all samples to get the timestamps of
+	 * first/last samples.
+	 */
+	if (rec->buildid_all && !rec->timestamp_boundary)
+		rec->tool.sample = NULL;
+
+	return perf_session__process_events(session);
+}
+
+static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
+{
+	int err;
+	struct perf_tool *tool = data;
+	/*
+	 *As for guest kernel when processing subcommand record&report,
+	 *we arrange module mmap prior to guest kernel mmap and trigger
+	 *a preload dso because default guest module symbols are loaded
+	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
+	 *method is used to avoid symbol missing when the first addr is
+	 *in module instead of in guest kernel.
+	 */
+	err = perf_event__synthesize_modules(tool, process_synthesized_event,
+					     machine);
+	if (err < 0)
+		pr_err("Couldn't record guest kernel [%d]'s reference"
+		       " relocation symbol.\n", machine->pid);
+
+	/*
+	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
+	 * have no _text sometimes.
+	 */
+	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+						 machine);
+	if (err < 0)
+		pr_err("Couldn't record guest kernel [%d]'s reference"
+		       " relocation symbol.\n", machine->pid);
+}
+
+static struct perf_event_header finished_round_event = {
+	.size = sizeof(struct perf_event_header),
+	.type = PERF_RECORD_FINISHED_ROUND,
+};
+
+static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
+				    bool overwrite)
+{
+	u64 bytes_written = rec->bytes_written;
+	int i;
+	int rc = 0;
+	struct perf_mmap *maps;
+
+	if (!evlist)
+		return 0;
+
+	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
+	if (!maps)
+		return 0;
+
+	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+		return 0;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
+
+		if (maps[i].base) {
+			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
+				rc = -1;
+				goto out;
+			}
+		}
+
+		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
+		    record__auxtrace_mmap_read(rec, mm) != 0) {
+			rc = -1;
+			goto out;
+		}
+	}
+
+	/*
+	 * Mark the round finished in case we wrote
+	 * at least one event.
+	 */
+	if (bytes_written != rec->bytes_written)
+		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
+
+	if (overwrite)
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+out:
+	return rc;
+}
+
+static int record__mmap_read_all(struct record *rec)
+{
+	int err;
+
+	err = record__mmap_read_evlist(rec, rec->evlist, false);
+	if (err)
+		return err;
+
+	return record__mmap_read_evlist(rec, rec->evlist, true);
+}
+
+static void record__init_features(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	int feat;
+
+	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+		perf_header__set_feat(&session->header, feat);
+
+	if (rec->no_buildid)
+		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+
+	if (!have_tracepoints(&rec->evlist->entries))
+		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+
+	if (!rec->opts.branch_stack)
+		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
+	if (!rec->opts.full_auxtrace)
+		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+
+	perf_header__clear_feat(&session->header, HEADER_STAT);
+}
+
+static void
+record__finish_output(struct record *rec)
+{
+	struct perf_data *data = &rec->data;
+	int fd = perf_data__fd(data);
+
+	if (data->is_pipe)
+		return;
+
+	rec->session->header.data_size += rec->bytes_written;
+	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
+
+	if (!rec->no_buildid) {
+		process_buildids(rec);
+
+		if (rec->buildid_all)
+			dsos__hit_all(rec->session);
+	}
+	perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+	return;
+}
+
+static int record__synthesize_workload(struct record *rec, bool tail)
+{
+	int err;
+	struct thread_map *thread_map;
+
+	if (rec->opts.tail_synthesize != tail)
+		return 0;
+
+	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
+	if (thread_map == NULL)
+		return -1;
+
+	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
+						 process_synthesized_event,
+						 &rec->session->machines.host,
+						 rec->opts.sample_address,
+						 rec->opts.proc_map_timeout);
+	thread_map__put(thread_map);
+	return err;
+}
+
+static int record__synthesize(struct record *rec, bool tail);
+
+static int
+record__switch_output(struct record *rec, bool at_exit)
+{
+	struct perf_data *data = &rec->data;
+	int fd, err;
+
+	/* Same Size:      "2015122520103046"*/
+	char timestamp[] = "InvalidTimestamp";
+
+	record__synthesize(rec, true);
+	if (target__none(&rec->opts.target))
+		record__synthesize_workload(rec, true);
+
+	rec->samples = 0;
+	record__finish_output(rec);
+	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
+	if (err) {
+		pr_err("Failed to get current timestamp\n");
+		return -EINVAL;
+	}
+
+	fd = perf_data__switch(data, timestamp,
+				    rec->session->header.data_offset,
+				    at_exit);
+	if (fd >= 0 && !at_exit) {
+		rec->bytes_written = 0;
+		rec->session->header.data_size = 0;
+	}
+
+	if (!quiet)
+		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
+			data->file.path, timestamp);
+
+	/* Output tracking events */
+	if (!at_exit) {
+		record__synthesize(rec, false);
+
+		/*
+		 * In 'perf record --switch-output' without -a,
+		 * record__synthesize() in record__switch_output() won't
+		 * generate tracking events because there's no thread_map
+		 * in evlist. Which causes newly created perf.data doesn't
+		 * contain map and comm information.
+		 * Create a fake thread_map and directly call
+		 * perf_event__synthesize_thread_map() for those events.
+		 */
+		if (target__none(&rec->opts.target))
+			record__synthesize_workload(rec, false);
+	}
+	return fd;
+}
+
+static volatile int workload_exec_errno;
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1
+ * if the fork fails, since we asked by setting its
+ * want_signal to true.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused,
+					siginfo_t *info,
+					void *ucontext __maybe_unused)
+{
+	workload_exec_errno = info->si_value.sival_int;
+	done = 1;
+	child_finished = 1;
+}
+
+static void snapshot_sig_handler(int sig);
+static void alarm_sig_handler(int sig);
+
+int __weak
+perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+			    struct perf_tool *tool __maybe_unused,
+			    perf_event__handler_t process __maybe_unused,
+			    struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static const struct perf_event_mmap_page *
+perf_evlist__pick_pc(struct perf_evlist *evlist)
+{
+	if (evlist) {
+		if (evlist->mmap && evlist->mmap[0].base)
+			return evlist->mmap[0].base;
+		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
+			return evlist->overwrite_mmap[0].base;
+	}
+	return NULL;
+}
+
+static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
+{
+	const struct perf_event_mmap_page *pc;
+
+	pc = perf_evlist__pick_pc(rec->evlist);
+	if (pc)
+		return pc;
+	return NULL;
+}
+
+static int record__synthesize(struct record *rec, bool tail)
+{
+	struct perf_session *session = rec->session;
+	struct machine *machine = &session->machines.host;
+	struct perf_data *data = &rec->data;
+	struct record_opts *opts = &rec->opts;
+	struct perf_tool *tool = &rec->tool;
+	int fd = perf_data__fd(data);
+	int err = 0;
+
+	if (rec->opts.tail_synthesize != tail)
+		return 0;
+
+	if (data->is_pipe) {
+		/*
+		 * We need to synthesize events first, because some
+		 * features works on top of them (on report side).
+		 */
+		err = perf_event__synthesize_attrs(tool, session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			goto out;
+		}
+
+		err = perf_event__synthesize_features(tool, session, rec->evlist,
+						      process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize features.\n");
+			return err;
+		}
+
+		if (have_tracepoints(&rec->evlist->entries)) {
+			/*
+			 * FIXME err <= 0 here actually means that
+			 * there were no tracepoints so its not really
+			 * an error, just that we don't need to
+			 * synthesize anything.  We really have to
+			 * return this more properly and also
+			 * propagate errors that now are calling die()
+			 */
+			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
+								  process_synthesized_event);
+			if (err <= 0) {
+				pr_err("Couldn't record tracing data.\n");
+				goto out;
+			}
+			rec->bytes_written += err;
+		}
+	}
+
+	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
+					  process_synthesized_event, machine);
+	if (err)
+		goto out;
+
+	if (rec->opts.full_auxtrace) {
+		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+					session, process_synthesized_event);
+		if (err)
+			goto out;
+	}
+
+	if (!perf_evlist__exclude_kernel(rec->evlist)) {
+		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+							 machine);
+		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+				   "Check /proc/kallsyms permission or run as root.\n");
+
+		err = perf_event__synthesize_modules(tool, process_synthesized_event,
+						     machine);
+		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+				   "Check /proc/modules permission or run as root.\n");
+	}
+
+	if (perf_guest) {
+		machines__process_guests(&session->machines,
+					 perf_event__synthesize_guest_os, tool);
+	}
+
+	err = perf_event__synthesize_extra_attr(&rec->tool,
+						rec->evlist,
+						process_synthesized_event,
+						data->is_pipe);
+	if (err)
+		goto out;
+
+	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
+						 process_synthesized_event,
+						NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
+					     process_synthesized_event, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize cpu map.\n");
+		return err;
+	}
+
+	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout, 1);
+out:
+	return err;
+}
+
+static int __cmd_record(struct record *rec, int argc, const char **argv)
+{
+	int err;
+	int status = 0;
+	unsigned long waking = 0;
+	const bool forks = argc > 0;
+	struct perf_tool *tool = &rec->tool;
+	struct record_opts *opts = &rec->opts;
+	struct perf_data *data = &rec->data;
+	struct perf_session *session;
+	bool disabled = false, draining = false;
+	int fd;
+
+	atexit(record__sig_exit);
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
+	signal(SIGTERM, sig_handler);
+	signal(SIGSEGV, sigsegv_handler);
+
+	if (rec->opts.record_namespaces)
+		tool->namespace_events = true;
+
+	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
+		signal(SIGUSR2, snapshot_sig_handler);
+		if (rec->opts.auxtrace_snapshot_mode)
+			trigger_on(&auxtrace_snapshot_trigger);
+		if (rec->switch_output.enabled)
+			trigger_on(&switch_output_trigger);
+	} else {
+		signal(SIGUSR2, SIG_IGN);
+	}
+
+	session = perf_session__new(data, false, tool);
+	if (session == NULL) {
+		pr_err("Perf session creation failed.\n");
+		return -1;
+	}
+
+	fd = perf_data__fd(data);
+	rec->session = session;
+
+	record__init_features(rec);
+
+	if (forks) {
+		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
+						    argv, data->is_pipe,
+						    workload_exec_failed_signal);
+		if (err < 0) {
+			pr_err("Couldn't run the workload!\n");
+			status = err;
+			goto out_delete_session;
+		}
+	}
+
+	/*
+	 * If we have just single event and are sending data
+	 * through pipe, we need to force the ids allocation,
+	 * because we synthesize event name through the pipe
+	 * and need the id for that.
+	 */
+	if (data->is_pipe && rec->evlist->nr_entries == 1)
+		rec->opts.sample_id = true;
+
+	if (record__open(rec) != 0) {
+		err = -1;
+		goto out_child;
+	}
+
+	err = bpf__apply_obj_config();
+	if (err) {
+		char errbuf[BUFSIZ];
+
+		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Apply config to BPF failed: %s\n",
+			 errbuf);
+		goto out_child;
+	}
+
+	/*
+	 * Normally perf_session__new would do this, but it doesn't have the
+	 * evlist.
+	 */
+	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
+		rec->tool.ordered_events = false;
+	}
+
+	if (!rec->evlist->nr_groups)
+		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
+
+	if (data->is_pipe) {
+		err = perf_header__write_pipe(fd);
+		if (err < 0)
+			goto out_child;
+	} else {
+		err = perf_session__write_header(session, rec->evlist, fd, false);
+		if (err < 0)
+			goto out_child;
+	}
+
+	if (!rec->no_buildid
+	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
+		pr_err("Couldn't generate buildids. "
+		       "Use --no-buildid to profile anyway.\n");
+		err = -1;
+		goto out_child;
+	}
+
+	err = record__synthesize(rec, false);
+	if (err < 0)
+		goto out_child;
+
+	if (rec->realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = rec->realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			pr_err("Could not set realtime priority.\n");
+			err = -1;
+			goto out_child;
+		}
+	}
+
+	/*
+	 * When perf is starting the traced process, all the events
+	 * (apart from group members) have enable_on_exec=1 set,
+	 * so don't spoil it by prematurely enabling them.
+	 */
+	if (!target__none(&opts->target) && !opts->initial_delay)
+		perf_evlist__enable(rec->evlist);
+
+	/*
+	 * Let the child rip
+	 */
+	if (forks) {
+		struct machine *machine = &session->machines.host;
+		union perf_event *event;
+		pid_t tgid;
+
+		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
+		if (event == NULL) {
+			err = -ENOMEM;
+			goto out_child;
+		}
+
+		/*
+		 * Some H/W events are generated before COMM event
+		 * which is emitted during exec(), so perf script
+		 * cannot see a correct process name for those events.
+		 * Synthesize COMM event to prevent it.
+		 */
+		tgid = perf_event__synthesize_comm(tool, event,
+						   rec->evlist->workload.pid,
+						   process_synthesized_event,
+						   machine);
+		free(event);
+
+		if (tgid == -1)
+			goto out_child;
+
+		event = malloc(sizeof(event->namespaces) +
+			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+			       machine->id_hdr_size);
+		if (event == NULL) {
+			err = -ENOMEM;
+			goto out_child;
+		}
+
+		/*
+		 * Synthesize NAMESPACES event for the command specified.
+		 */
+		perf_event__synthesize_namespaces(tool, event,
+						  rec->evlist->workload.pid,
+						  tgid, process_synthesized_event,
+						  machine);
+		free(event);
+
+		perf_evlist__start_workload(rec->evlist);
+	}
+
+	if (opts->initial_delay) {
+		usleep(opts->initial_delay * USEC_PER_MSEC);
+		perf_evlist__enable(rec->evlist);
+	}
+
+	trigger_ready(&auxtrace_snapshot_trigger);
+	trigger_ready(&switch_output_trigger);
+	perf_hooks__invoke_record_start();
+	for (;;) {
+		unsigned long long hits = rec->samples;
+
+		/*
+		 * rec->evlist->bkw_mmap_state is possible to be
+		 * BKW_MMAP_EMPTY here: when done == true and
+		 * hits != rec->samples in previous round.
+		 *
+		 * perf_evlist__toggle_bkw_mmap ensure we never
+		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
+		 */
+		if (trigger_is_hit(&switch_output_trigger) || done || draining)
+			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
+
+		if (record__mmap_read_all(rec) < 0) {
+			trigger_error(&auxtrace_snapshot_trigger);
+			trigger_error(&switch_output_trigger);
+			err = -1;
+			goto out_child;
+		}
+
+		if (auxtrace_record__snapshot_started) {
+			auxtrace_record__snapshot_started = 0;
+			if (!trigger_is_error(&auxtrace_snapshot_trigger))
+				record__read_auxtrace_snapshot(rec);
+			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
+				pr_err("AUX area tracing snapshot failed\n");
+				err = -1;
+				goto out_child;
+			}
+		}
+
+		if (trigger_is_hit(&switch_output_trigger)) {
+			/*
+			 * If switch_output_trigger is hit, the data in
+			 * overwritable ring buffer should have been collected,
+			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
+			 *
+			 * If SIGUSR2 raise after or during record__mmap_read_all(),
+			 * record__mmap_read_all() didn't collect data from
+			 * overwritable ring buffer. Read again.
+			 */
+			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
+				continue;
+			trigger_ready(&switch_output_trigger);
+
+			/*
+			 * Reenable events in overwrite ring buffer after
+			 * record__mmap_read_all(): we should have collected
+			 * data from it.
+			 */
+			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
+
+			if (!quiet)
+				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
+					waking);
+			waking = 0;
+			fd = record__switch_output(rec, false);
+			if (fd < 0) {
+				pr_err("Failed to switch to new file\n");
+				trigger_error(&switch_output_trigger);
+				err = fd;
+				goto out_child;
+			}
+
+			/* re-arm the alarm */
+			if (rec->switch_output.time)
+				alarm(rec->switch_output.time);
+		}
+
+		if (hits == rec->samples) {
+			if (done || draining)
+				break;
+			err = perf_evlist__poll(rec->evlist, -1);
+			/*
+			 * Propagate error, only if there's any. Ignore positive
+			 * number of returned events and interrupt error.
+			 */
+			if (err > 0 || (err < 0 && errno == EINTR))
+				err = 0;
+			waking++;
+
+			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
+				draining = true;
+		}
+
+		/*
+		 * When perf is starting the traced process, at the end events
+		 * die with the process and we wait for that. Thus no need to
+		 * disable events in this case.
+		 */
+		if (done && !disabled && !target__none(&opts->target)) {
+			trigger_off(&auxtrace_snapshot_trigger);
+			perf_evlist__disable(rec->evlist);
+			disabled = true;
+		}
+	}
+	trigger_off(&auxtrace_snapshot_trigger);
+	trigger_off(&switch_output_trigger);
+
+	if (forks && workload_exec_errno) {
+		char msg[STRERR_BUFSIZE];
+		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
+		pr_err("Workload failed: %s\n", emsg);
+		err = -1;
+		goto out_child;
+	}
+
+	if (!quiet)
+		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
+
+	if (target__none(&rec->opts.target))
+		record__synthesize_workload(rec, true);
+
+out_child:
+	if (forks) {
+		int exit_status;
+
+		if (!child_finished)
+			kill(rec->evlist->workload.pid, SIGTERM);
+
+		wait(&exit_status);
+
+		if (err < 0)
+			status = err;
+		else if (WIFEXITED(exit_status))
+			status = WEXITSTATUS(exit_status);
+		else if (WIFSIGNALED(exit_status))
+			signr = WTERMSIG(exit_status);
+	} else
+		status = err;
+
+	record__synthesize(rec, true);
+	/* this will be recalculated during process_buildids() */
+	rec->samples = 0;
+
+	if (!err) {
+		if (!rec->timestamp_filename) {
+			record__finish_output(rec);
+		} else {
+			fd = record__switch_output(rec, true);
+			if (fd < 0) {
+				status = fd;
+				goto out_delete_session;
+			}
+		}
+	}
+
+	perf_hooks__invoke_record_end();
+
+	if (!err && !quiet) {
+		char samples[128];
+		const char *postfix = rec->timestamp_filename ?
+					".<timestamp>" : "";
+
+		if (rec->samples && !rec->opts.full_auxtrace)
+			scnprintf(samples, sizeof(samples),
+				  " (%" PRIu64 " samples)", rec->samples);
+		else
+			samples[0] = '\0';
+
+		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
+			perf_data__size(data) / 1024.0 / 1024.0,
+			data->file.path, postfix, samples);
+	}
+
+out_delete_session:
+	perf_session__delete(session);
+	return status;
+}
+
+static void callchain_debug(struct callchain_param *callchain)
+{
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
+
+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
+
+	if (callchain->record_mode == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset)
+{
+	int ret;
+	callchain->enabled = !unset;
+
+	/* --no-call-graph */
+	if (unset) {
+		callchain->record_mode = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = parse_callchain_record_opt(arg, callchain);
+	if (!ret) {
+		/* Enable data address sampling for DWARF unwind. */
+		if (callchain->record_mode == CALLCHAIN_DWARF)
+			record->sample_address = true;
+		callchain_debug(callchain);
+	}
+
+	return ret;
+}
+
+int record_parse_callchain_opt(const struct option *opt,
+			       const char *arg,
+			       int unset)
+{
+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
+int record_callchain_opt(const struct option *opt,
+			 const char *arg __maybe_unused,
+			 int unset __maybe_unused)
+{
+	struct callchain_param *callchain = opt->value;
+
+	callchain->enabled = true;
+
+	if (callchain->record_mode == CALLCHAIN_NONE)
+		callchain->record_mode = CALLCHAIN_FP;
+
+	callchain_debug(callchain);
+	return 0;
+}
+
+static int perf_record_config(const char *var, const char *value, void *cb)
+{
+	struct record *rec = cb;
+
+	if (!strcmp(var, "record.build-id")) {
+		if (!strcmp(value, "cache"))
+			rec->no_buildid_cache = false;
+		else if (!strcmp(value, "no-cache"))
+			rec->no_buildid_cache = true;
+		else if (!strcmp(value, "skip"))
+			rec->no_buildid = true;
+		else
+			return -1;
+		return 0;
+	}
+	if (!strcmp(var, "record.call-graph")) {
+		var = "call-graph.record-mode";
+		return perf_default_config(var, value, cb);
+	}
+
+	return 0;
+}
+
+struct clockid_map {
+	const char *name;
+	int clockid;
+};
+
+#define CLOCKID_MAP(n, c)	\
+	{ .name = n, .clockid = (c), }
+
+#define CLOCKID_END	{ .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+	/* available for all events, NMI safe */
+	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+	/* available for some events */
+	CLOCKID_MAP("realtime", CLOCK_REALTIME),
+	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+	CLOCKID_MAP("tai", CLOCK_TAI),
+
+	/* available for the lazy */
+	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+	CLOCKID_MAP("real", CLOCK_REALTIME),
+	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+	CLOCKID_END,
+};
+
+static int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+	struct record_opts *opts = (struct record_opts *)opt->value;
+	const struct clockid_map *cm;
+	const char *ostr = str;
+
+	if (unset) {
+		opts->use_clockid = 0;
+		return 0;
+	}
+
+	/* no arg passed */
+	if (!str)
+		return 0;
+
+	/* no setting it twice */
+	if (opts->use_clockid)
+		return -1;
+
+	opts->use_clockid = true;
+
+	/* if its a number, we're done */
+	if (sscanf(str, "%d", &opts->clockid) == 1)
+		return 0;
+
+	/* allow a "CLOCK_" prefix to the name */
+	if (!strncasecmp(str, "CLOCK_", 6))
+		str += 6;
+
+	for (cm = clockids; cm->name; cm++) {
+		if (!strcasecmp(str, cm->name)) {
+			opts->clockid = cm->clockid;
+			return 0;
+		}
+	}
+
+	opts->use_clockid = false;
+	ui__warning("unknown clockid %s, check man page\n", ostr);
+	return -1;
+}
+
+static int record__parse_mmap_pages(const struct option *opt,
+				    const char *str,
+				    int unset __maybe_unused)
+{
+	struct record_opts *opts = opt->value;
+	char *s, *p;
+	unsigned int mmap_pages;
+	int ret;
+
+	if (!str)
+		return -EINVAL;
+
+	s = strdup(str);
+	if (!s)
+		return -ENOMEM;
+
+	p = strchr(s, ',');
+	if (p)
+		*p = '\0';
+
+	if (*s) {
+		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+		if (ret)
+			goto out_free;
+		opts->mmap_pages = mmap_pages;
+	}
+
+	if (!p) {
+		ret = 0;
+		goto out_free;
+	}
+
+	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+	if (ret)
+		goto out_free;
+
+	opts->auxtrace_mmap_pages = mmap_pages;
+
+out_free:
+	free(s);
+	return ret;
+}
+
+static void switch_output_size_warn(struct record *rec)
+{
+	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
+	struct switch_output *s = &rec->switch_output;
+
+	wakeup_size /= 2;
+
+	if (s->size < wakeup_size) {
+		char buf[100];
+
+		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
+		pr_warning("WARNING: switch-output data size lower than "
+			   "wakeup kernel buffer size (%s) "
+			   "expect bigger perf.data sizes\n", buf);
+	}
+}
+
+static int switch_output_setup(struct record *rec)
+{
+	struct switch_output *s = &rec->switch_output;
+	static struct parse_tag tags_size[] = {
+		{ .tag  = 'B', .mult = 1       },
+		{ .tag  = 'K', .mult = 1 << 10 },
+		{ .tag  = 'M', .mult = 1 << 20 },
+		{ .tag  = 'G', .mult = 1 << 30 },
+		{ .tag  = 0 },
+	};
+	static struct parse_tag tags_time[] = {
+		{ .tag  = 's', .mult = 1        },
+		{ .tag  = 'm', .mult = 60       },
+		{ .tag  = 'h', .mult = 60*60    },
+		{ .tag  = 'd', .mult = 60*60*24 },
+		{ .tag  = 0 },
+	};
+	unsigned long val;
+
+	if (!s->set)
+		return 0;
+
+	if (!strcmp(s->str, "signal")) {
+		s->signal = true;
+		pr_debug("switch-output with SIGUSR2 signal\n");
+		goto enabled;
+	}
+
+	val = parse_tag_value(s->str, tags_size);
+	if (val != (unsigned long) -1) {
+		s->size = val;
+		pr_debug("switch-output with %s size threshold\n", s->str);
+		goto enabled;
+	}
+
+	val = parse_tag_value(s->str, tags_time);
+	if (val != (unsigned long) -1) {
+		s->time = val;
+		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
+			 s->str, s->time);
+		goto enabled;
+	}
+
+	return -1;
+
+enabled:
+	rec->timestamp_filename = true;
+	s->enabled              = true;
+
+	if (s->size && !rec->opts.no_buffering)
+		switch_output_size_warn(rec);
+
+	return 0;
+}
+
+static const char * const __record_usage[] = {
+	"perf record [<options>] [<command>]",
+	"perf record [<options>] -- <command> [<options>]",
+	NULL
+};
+const char * const *record_usage = __record_usage;
+
+/*
+ * XXX Ideally would be local to cmd_record() and passed to a record__new
+ * because we need to have access to it in record__exit, that is called
+ * after cmd_record() exits, but since record_options need to be accessible to
+ * builtin-script, leave it here.
+ *
+ * At least we don't ouch it in all the other functions here directly.
+ *
+ * Just say no to tons of global variables, sigh.
+ */
+static struct record record = {
+	.opts = {
+		.sample_time	     = true,
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 4000,
+		.target		     = {
+			.uses_mmap   = true,
+			.default_per_cpu = true,
+		},
+		.proc_map_timeout     = 500,
+	},
+	.tool = {
+		.sample		= process_sample_event,
+		.fork		= perf_event__process_fork,
+		.exit		= perf_event__process_exit,
+		.comm		= perf_event__process_comm,
+		.namespaces	= perf_event__process_namespaces,
+		.mmap		= perf_event__process_mmap,
+		.mmap2		= perf_event__process_mmap2,
+		.ordered_events	= true,
+	},
+};
+
+const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
+	"\n\t\t\t\tDefault: fp";
+
+static bool dry_run;
+
+/*
+ * XXX Will stay a global variable till we fix builtin-script.c to stop messing
+ * with it and switch to use the library functions in perf_evlist that came
+ * from builtin-record.c, i.e. use record_opts,
+ * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
+ * using pipes, etc.
+ */
+static struct option __record_options[] = {
+	OPT_CALLBACK('e', "event", &record.evlist, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events_option),
+	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
+		     "event filter", parse_filter),
+	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
+			   NULL, "don't record events from perf itself",
+			   exclude_perf),
+	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
+		    "record events on existing process id"),
+	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
+		    "record events on existing thread id"),
+	OPT_INTEGER('r', "realtime", &record.realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
+		    "collect data without buffering"),
+	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
+		    "collect raw sample records from all opened counters"),
+	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
+		    "list of cpus to monitor"),
+	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
+	OPT_STRING('o', "output", &record.data.file.path, "file",
+		    "output file name"),
+	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
+			&record.opts.no_inherit_set,
+			"child tasks do not inherit counters"),
+	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
+		    "synthesize non-sample events at the end of output"),
+	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
+	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
+		    "Fail if the specified frequency can't be used"),
+	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
+		     "profile at this frequency",
+		      record__parse_freq),
+	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
+		     "number of mmap data pages and AUX area tracing mmap pages",
+		     record__parse_mmap_pages),
+	OPT_BOOLEAN(0, "group", &record.opts.group,
+		    "put the counters into a counter group"),
+	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+			   NULL, "enables call-graph recording" ,
+			   &record_callchain_opt),
+	OPT_CALLBACK(0, "call-graph", &record.opts,
+		     "record_mode[,record_size]", record_callchain_help,
+		     &record_parse_callchain_opt),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
+	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
+		    "per thread counts"),
+	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
+		    "Record the sample physical addresses"),
+	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
+	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
+			&record.opts.sample_time_set,
+			"Record the sample timestamps"),
+	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
+			"Record the sample period"),
+	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
+		    "don't sample"),
+	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
+			&record.no_buildid_cache_set,
+			"do not update the buildid cache"),
+	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
+			&record.no_buildid_set,
+			"do not collect buildids in perf.data"),
+	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
+		     "monitor event in cgroup name only",
+		     parse_cgroups),
+	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
+		  "ms to wait before starting measurement after program start"),
+	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
+		   "user to profile"),
+
+	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
+		     "branch any", "sample any taken branches",
+		     parse_branch_stack),
+
+	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
+		     "branch filter mask", "branch stack filter modes",
+		     parse_branch_stack),
+	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
+		    "sample by weight (on special events only)"),
+	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
+		    "sample transaction flags (special events only)"),
+	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
+		    "use per-thread mmaps"),
+	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
+		    "sample selected machine registers on interrupt,"
+		    " use -I ? to list register names", parse_regs),
+	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
+		    "sample selected machine registers on interrupt,"
+		    " use -I ? to list register names", parse_regs),
+	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
+		    "Record running/enabled time of read (:S) events"),
+	OPT_CALLBACK('k', "clockid", &record.opts,
+	"clockid", "clockid to use for events, see clock_gettime()",
+	parse_clockid),
+	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
+			  "opts", "AUX area tracing Snapshot Mode", ""),
+	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
+	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
+		    "Record namespaces events"),
+	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
+		    "Record context switch events"),
+	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
+			 "Configure all used events to run in kernel space.",
+			 PARSE_OPT_EXCLUSIVE),
+	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
+			 "Configure all used events to run in user space.",
+			 PARSE_OPT_EXCLUSIVE),
+	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
+		   "clang binary to use for compiling BPF scriptlets"),
+	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
+		   "options passed to clang when compiling BPF scriptlets"),
+	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
+		    "Record build-id of all DSOs regardless of hits"),
+	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
+		    "append timestamp to output filename"),
+	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
+		    "Record timestamp boundary (time of first/last samples)"),
+	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
+			  &record.switch_output.set, "signal,size,time",
+			  "Switch output when receive SIGUSR2 or cross size,time threshold",
+			  "signal"),
+	OPT_BOOLEAN(0, "dry-run", &dry_run,
+		    "Parse options then exit"),
+	OPT_END()
+};
+
+struct option *record_options = __record_options;
+
+int cmd_record(int argc, const char **argv)
+{
+	int err;
+	struct record *rec = &record;
+	char errbuf[BUFSIZ];
+
+	setlocale(LC_ALL, "");
+
+#ifndef HAVE_LIBBPF_SUPPORT
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
+	set_nobuild('\0', "clang-path", true);
+	set_nobuild('\0', "clang-opt", true);
+# undef set_nobuild
+#endif
+
+#ifndef HAVE_BPF_PROLOGUE
+# if !defined (HAVE_DWARF_SUPPORT)
+#  define REASON  "NO_DWARF=1"
+# elif !defined (HAVE_LIBBPF_SUPPORT)
+#  define REASON  "NO_LIBBPF=1"
+# else
+#  define REASON  "this architecture doesn't support BPF prologue"
+# endif
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
+	set_nobuild('\0', "vmlinux", true);
+# undef set_nobuild
+# undef REASON
+#endif
+
+	rec->evlist = perf_evlist__new();
+	if (rec->evlist == NULL)
+		return -ENOMEM;
+
+	err = perf_config(perf_record_config, rec);
+	if (err)
+		return err;
+
+	argc = parse_options(argc, argv, record_options, record_usage,
+			    PARSE_OPT_STOP_AT_NON_OPTION);
+	if (quiet)
+		perf_quiet_option();
+
+	/* Make system wide (-a) the default target. */
+	if (!argc && target__none(&rec->opts.target))
+		rec->opts.target.system_wide = true;
+
+	if (nr_cgroups && !rec->opts.target.system_wide) {
+		usage_with_options_msg(record_usage, record_options,
+			"cgroup monitoring only available in system-wide mode");
+
+	}
+	if (rec->opts.record_switch_events &&
+	    !perf_can_record_switch_events()) {
+		ui__error("kernel does not support recording context switch events\n");
+		parse_options_usage(record_usage, record_options, "switch-events", 0);
+		return -EINVAL;
+	}
+
+	if (switch_output_setup(rec)) {
+		parse_options_usage(record_usage, record_options, "switch-output", 0);
+		return -EINVAL;
+	}
+
+	if (rec->switch_output.time) {
+		signal(SIGALRM, alarm_sig_handler);
+		alarm(rec->switch_output.time);
+	}
+
+	/*
+	 * Allow aliases to facilitate the lookup of symbols for address
+	 * filters. Refer to auxtrace_parse_filters().
+	 */
+	symbol_conf.allow_aliases = true;
+
+	symbol__init(NULL);
+
+	err = record__auxtrace_init(rec);
+	if (err)
+		goto out;
+
+	if (dry_run)
+		goto out;
+
+	err = bpf__setup_stdout(rec->evlist);
+	if (err) {
+		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Setup BPF stdout failed: %s\n",
+			 errbuf);
+		goto out;
+	}
+
+	err = -ENOMEM;
+
+	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
+		pr_warning(
+"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
+"check /proc/sys/kernel/kptr_restrict.\n\n"
+"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
+"file is not found in the buildid cache or in the vmlinux path.\n\n"
+"Samples in kernel modules won't be resolved at all.\n\n"
+"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
+"even with a suitable vmlinux or kallsyms file.\n\n");
+
+	if (rec->no_buildid_cache || rec->no_buildid) {
+		disable_buildid_cache();
+	} else if (rec->switch_output.enabled) {
+		/*
+		 * In 'perf record --switch-output', disable buildid
+		 * generation by default to reduce data file switching
+		 * overhead. Still generate buildid if they are required
+		 * explicitly using
+		 *
+		 *  perf record --switch-output --no-no-buildid \
+		 *              --no-no-buildid-cache
+		 *
+		 * Following code equals to:
+		 *
+		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
+		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
+		 *         disable_buildid_cache();
+		 */
+		bool disable = true;
+
+		if (rec->no_buildid_set && !rec->no_buildid)
+			disable = false;
+		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
+			disable = false;
+		if (disable) {
+			rec->no_buildid = true;
+			rec->no_buildid_cache = true;
+			disable_buildid_cache();
+		}
+	}
+
+	if (record.opts.overwrite)
+		record.opts.tail_synthesize = true;
+
+	if (rec->evlist->nr_entries == 0 &&
+	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
+		pr_err("Not enough memory for event selector list\n");
+		goto out;
+	}
+
+	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
+		rec->opts.no_inherit = true;
+
+	err = target__validate(&rec->opts.target);
+	if (err) {
+		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
+		ui__warning("%s\n", errbuf);
+	}
+
+	err = target__parse_uid(&rec->opts.target);
+	if (err) {
+		int saved_errno = errno;
+
+		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
+		ui__error("%s", errbuf);
+
+		err = -saved_errno;
+		goto out;
+	}
+
+	/* Enable ignoring missing threads when -u/-p option is defined. */
+	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
+
+	err = -ENOMEM;
+	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
+		usage_with_options(record_usage, record_options);
+
+	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
+	if (err)
+		goto out;
+
+	/*
+	 * We take all buildids when the file contains
+	 * AUX area tracing data because we do not decode the
+	 * trace because it would take too long.
+	 */
+	if (rec->opts.full_auxtrace)
+		rec->buildid_all = true;
+
+	if (record_opts__config(&rec->opts)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = __cmd_record(&record, argc, argv);
+out:
+	perf_evlist__delete(rec->evlist);
+	symbol__exit();
+	auxtrace_record__free(rec->itr);
+	return err;
+}
+
+static void snapshot_sig_handler(int sig __maybe_unused)
+{
+	struct record *rec = &record;
+
+	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+		trigger_hit(&auxtrace_snapshot_trigger);
+		auxtrace_record__snapshot_started = 1;
+		if (auxtrace_record__snapshot_start(record.itr))
+			trigger_error(&auxtrace_snapshot_trigger);
+	}
+
+	if (switch_output_signal(rec))
+		trigger_hit(&switch_output_trigger);
+}
+
+static void alarm_sig_handler(int sig __maybe_unused)
+{
+	struct record *rec = &record;
+
+	if (switch_output_time(rec))
+		trigger_hit(&switch_output_trigger);
+}
diff --git a/builtin-report.c b/builtin-report.c
new file mode 100644
index 0000000..0f198f6
--- /dev/null
+++ b/builtin-report.c
@@ -0,0 +1,1406 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-report.c
+ *
+ * Builtin report command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+#include "util/config.h"
+
+#include "util/annotate.h"
+#include "util/color.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/err.h>
+#include "util/symbol.h"
+#include "util/callchain.h"
+#include "util/values.h"
+
+#include "perf.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/tool.h"
+
+#include <subcmd/parse-options.h>
+#include <subcmd/exec-cmd.h>
+#include "util/parse-events.h"
+
+#include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
+#include "util/data.h"
+#include "arch/common.h"
+#include "util/time-utils.h"
+#include "util/auxtrace.h"
+#include "util/units.h"
+#include "util/branch.h"
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <signal.h>
+#include <linux/bitmap.h>
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/mman.h>
+
+struct report {
+	struct perf_tool	tool;
+	struct perf_session	*session;
+	bool			use_tui, use_gtk, use_stdio;
+	bool			show_full_info;
+	bool			show_threads;
+	bool			inverted_callchain;
+	bool			mem_mode;
+	bool			stats_mode;
+	bool			tasks_mode;
+	bool			mmaps_mode;
+	bool			header;
+	bool			header_only;
+	bool			nonany_branch_mode;
+	bool			group_set;
+	int			max_stack;
+	struct perf_read_values	show_threads_values;
+	const char		*pretty_printing_style;
+	const char		*cpu_list;
+	const char		*symbol_filter_str;
+	const char		*time_str;
+	struct perf_time_interval *ptime_range;
+	int			range_size;
+	int			range_num;
+	float			min_percent;
+	u64			nr_entries;
+	u64			queue_size;
+	int			socket_filter;
+	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+	struct branch_type_stat	brtype_stat;
+};
+
+static int report__config(const char *var, const char *value, void *cb)
+{
+	struct report *rep = cb;
+
+	if (!strcmp(var, "report.group")) {
+		symbol_conf.event_group = perf_config_bool(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "report.percent-limit")) {
+		double pcnt = strtof(value, NULL);
+
+		rep->min_percent = pcnt;
+		callchain_param.min_percent = pcnt;
+		return 0;
+	}
+	if (!strcmp(var, "report.children")) {
+		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "report.queue-size"))
+		return perf_config_u64(&rep->queue_size, var, value);
+
+	if (!strcmp(var, "report.sort_order")) {
+		default_sort_order = strdup(value);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int hist_iter__report_callback(struct hist_entry_iter *iter,
+				      struct addr_location *al, bool single,
+				      void *arg)
+{
+	int err = 0;
+	struct report *rep = arg;
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct mem_info *mi;
+	struct branch_info *bi;
+
+	if (!ui__has_annotation())
+		return 0;
+
+	hist__account_cycles(sample->branch_stack, al, sample,
+			     rep->nonany_branch_mode);
+
+	if (sort__mode == SORT_MODE__BRANCH) {
+		bi = he->branch_info;
+		err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+		if (err)
+			goto out;
+
+		err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+
+	} else if (rep->mem_mode) {
+		mi = he->mem_info;
+		err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx);
+		if (err)
+			goto out;
+
+		err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
+
+	} else if (symbol_conf.cumulate_callchain) {
+		if (single)
+			err = hist_entry__inc_addr_samples(he, sample, evsel->idx,
+							   al->addr);
+	} else {
+		err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr);
+	}
+
+out:
+	return err;
+}
+
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+				      struct addr_location *al __maybe_unused,
+				      bool single __maybe_unused,
+				      void *arg)
+{
+	struct hist_entry *he = iter->he;
+	struct report *rep = arg;
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+	struct perf_evsel *evsel = iter->evsel;
+	int err;
+
+	if (!ui__has_annotation())
+		return 0;
+
+	hist__account_cycles(sample->branch_stack, al, sample,
+			     rep->nonany_branch_mode);
+
+	bi = he->branch_info;
+	err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx);
+	if (err)
+		goto out;
+
+	err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx);
+
+	branch_type_count(&rep->brtype_stat, &bi->flags,
+			  bi->from.addr, bi->to.addr);
+
+out:
+	return err;
+}
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+static void setup_forced_leader(struct report *report,
+				struct perf_evlist *evlist)
+{
+	if (report->group_set && !evlist->nr_groups) {
+		struct perf_evsel *leader = perf_evlist__first(evlist);
+
+		perf_evlist__set_leader(evlist);
+		leader->forced_leader = true;
+	}
+}
+
+static int process_feature_event(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session __maybe_unused)
+{
+	struct report *rep = container_of(tool, struct report, tool);
+
+	if (event->feat.feat_id < HEADER_LAST_FEATURE)
+		return perf_event__process_feature(tool, event, session);
+
+	if (event->feat.feat_id != HEADER_LAST_FEATURE) {
+		pr_err("failed: wrong feature ID: %" PRIu64 "\n",
+		       event->feat.feat_id);
+		return -1;
+	}
+
+	/*
+	 * All features are received, we can force the
+	 * group if needed.
+	 */
+	setup_forced_leader(rep, session->evlist);
+	return 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct report *rep = container_of(tool, struct report, tool);
+	struct addr_location al;
+	struct hist_entry_iter iter = {
+		.evsel 			= evsel,
+		.sample 		= sample,
+		.hide_unresolved 	= symbol_conf.hide_unresolved,
+		.add_entry_cb 		= hist_iter__report_callback,
+	};
+	int ret = 0;
+
+	if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num,
+					  sample->time)) {
+		return 0;
+	}
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			 event->header.type);
+		return -1;
+	}
+
+	if (symbol_conf.hide_unresolved && al.sym == NULL)
+		goto out_put;
+
+	if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
+		goto out_put;
+
+	if (sort__mode == SORT_MODE__BRANCH) {
+		/*
+		 * A non-synthesized event might not have a branch stack if
+		 * branch stacks have been synthesized (using itrace options).
+		 */
+		if (!sample->branch_stack)
+			goto out_put;
+
+		iter.add_entry_cb = hist_iter__branch_callback;
+		iter.ops = &hist_iter_branch;
+	} else if (rep->mem_mode) {
+		iter.ops = &hist_iter_mem;
+	} else if (symbol_conf.cumulate_callchain) {
+		iter.ops = &hist_iter_cumulative;
+	} else {
+		iter.ops = &hist_iter_normal;
+	}
+
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
+	if (ret < 0)
+		pr_debug("problem adding hist entry, skipping event\n");
+out_put:
+	addr_location__put(&al);
+	return ret;
+}
+
+static int process_read_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct perf_evsel *evsel,
+			      struct machine *machine __maybe_unused)
+{
+	struct report *rep = container_of(tool, struct report, tool);
+
+	if (rep->show_threads) {
+		const char *name = evsel ? perf_evsel__name(evsel) : "unknown";
+		int err = perf_read_values_add_value(&rep->show_threads_values,
+					   event->read.pid, event->read.tid,
+					   evsel->idx,
+					   name,
+					   event->read.value);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* For pipe mode, sample_type is not currently set */
+static int report__setup_sample_type(struct report *rep)
+{
+	struct perf_session *session = rep->session;
+	u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+	bool is_pipe = perf_data__is_pipe(session->data);
+
+	if (session->itrace_synth_opts->callchain ||
+	    (!is_pipe &&
+	     perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
+	     !session->itrace_synth_opts->set))
+		sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+	if (session->itrace_synth_opts->last_branch)
+		sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+	if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+		if (perf_hpp_list.parent) {
+			ui__error("Selected --sort parent, but no "
+				    "callchain data. Did you call "
+				    "'perf record' without -g?\n");
+			return -EINVAL;
+		}
+		if (symbol_conf.use_callchain &&
+			!symbol_conf.show_branchflag_count) {
+			ui__error("Selected -g or --branch-history.\n"
+				  "But no callchain or branch data.\n"
+				  "Did you call 'perf record' without -g or -b?\n");
+			return -1;
+		}
+	} else if (!callchain_param.enabled &&
+		   callchain_param.mode != CHAIN_NONE &&
+		   !symbol_conf.use_callchain) {
+			symbol_conf.use_callchain = true;
+			if (callchain_register_param(&callchain_param) < 0) {
+				ui__error("Can't register callchain params.\n");
+				return -EINVAL;
+			}
+	}
+
+	if (symbol_conf.cumulate_callchain) {
+		/* Silently ignore if callchain is missing */
+		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+			symbol_conf.cumulate_callchain = false;
+			perf_hpp__cancel_cumulate();
+		}
+	}
+
+	if (sort__mode == SORT_MODE__BRANCH) {
+		if (!is_pipe &&
+		    !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+			ui__error("Selected -b but no branch data. "
+				  "Did you call perf record without -b?\n");
+			return -1;
+		}
+	}
+
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+		    (sample_type & PERF_SAMPLE_STACK_USER)) {
+			callchain_param.record_mode = CALLCHAIN_DWARF;
+			dwarf_callchain_users = true;
+		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			callchain_param.record_mode = CALLCHAIN_LBR;
+		else
+			callchain_param.record_mode = CALLCHAIN_FP;
+	}
+
+	/* ??? handle more cases than just ANY? */
+	if (!(perf_evlist__combined_branch_type(session->evlist) &
+				PERF_SAMPLE_BRANCH_ANY))
+		rep->nonany_branch_mode = true;
+
+	return 0;
+}
+
+static void sig_handler(int sig __maybe_unused)
+{
+	session_done = 1;
+}
+
+static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report *rep,
+					      const char *evname, FILE *fp)
+{
+	size_t ret;
+	char unit;
+	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+	u64 nr_events = hists->stats.total_period;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	char buf[512];
+	size_t size = sizeof(buf);
+	int socked_id = hists->socket_filter;
+
+	if (quiet)
+		return 0;
+
+	if (symbol_conf.filter_relative) {
+		nr_samples = hists->stats.nr_non_filtered_samples;
+		nr_events = hists->stats.total_non_filtered_period;
+	}
+
+	if (perf_evsel__is_group_event(evsel)) {
+		struct perf_evsel *pos;
+
+		perf_evsel__group_desc(evsel, buf, size);
+		evname = buf;
+
+		for_each_group_member(pos, evsel) {
+			const struct hists *pos_hists = evsel__hists(pos);
+
+			if (symbol_conf.filter_relative) {
+				nr_samples += pos_hists->stats.nr_non_filtered_samples;
+				nr_events += pos_hists->stats.total_non_filtered_period;
+			} else {
+				nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+				nr_events += pos_hists->stats.total_period;
+			}
+		}
+	}
+
+	nr_samples = convert_unit(nr_samples, &unit);
+	ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
+	if (evname != NULL) {
+		ret += fprintf(fp, " of event%s '%s'",
+			       evsel->nr_members > 1 ? "s" : "", evname);
+	}
+
+	if (rep->time_str)
+		ret += fprintf(fp, " (time slices: %s)", rep->time_str);
+
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(evname, "call-graph=no")) {
+		ret += fprintf(fp, ", show reference callgraph");
+	}
+
+	if (rep->mem_mode) {
+		ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
+		ret += fprintf(fp, "\n# Sort order   : %s", sort_order ? : default_mem_sort_order);
+	} else
+		ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
+
+	if (socked_id > -1)
+		ret += fprintf(fp, "\n# Processor Socket: %d", socked_id);
+
+	return ret + fprintf(fp, "\n#\n");
+}
+
+static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
+					 struct report *rep,
+					 const char *help)
+{
+	struct perf_evsel *pos;
+
+	if (!quiet) {
+		fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
+			evlist->stats.total_lost_samples);
+	}
+
+	evlist__for_each_entry(evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+		const char *evname = perf_evsel__name(pos);
+
+		if (symbol_conf.event_group &&
+		    !perf_evsel__is_group_leader(pos))
+			continue;
+
+		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+		hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
+			       symbol_conf.use_callchain ||
+			       symbol_conf.show_branchflag_count);
+		fprintf(stdout, "\n\n");
+	}
+
+	if (!quiet)
+		fprintf(stdout, "#\n# (%s)\n#\n", help);
+
+	if (rep->show_threads) {
+		bool style = !strcmp(rep->pretty_printing_style, "raw");
+		perf_read_values_display(stdout, &rep->show_threads_values,
+					 style);
+		perf_read_values_destroy(&rep->show_threads_values);
+	}
+
+	if (sort__mode == SORT_MODE__BRANCH)
+		branch_type_stat_display(stdout, &rep->brtype_stat);
+
+	return 0;
+}
+
+static void report__warn_kptr_restrict(const struct report *rep)
+{
+	struct map *kernel_map = machine__kernel_map(&rep->session->machines.host);
+	struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
+
+	if (perf_evlist__exclude_kernel(rep->session->evlist))
+		return;
+
+	if (kernel_map == NULL ||
+	    (kernel_map->dso->hit &&
+	     (kernel_kmap->ref_reloc_sym == NULL ||
+	      kernel_kmap->ref_reloc_sym->addr == 0))) {
+		const char *desc =
+		    "As no suitable kallsyms nor vmlinux was found, kernel samples\n"
+		    "can't be resolved.";
+
+		if (kernel_map) {
+			const struct dso *kdso = kernel_map->dso;
+			if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) {
+				desc = "If some relocation was applied (e.g. "
+				       "kexec) symbols may be misresolved.";
+			}
+		}
+
+		ui__warning(
+"Kernel address maps (/proc/{kallsyms,modules}) were restricted.\n\n"
+"Check /proc/sys/kernel/kptr_restrict before running 'perf record'.\n\n%s\n\n"
+"Samples in kernel modules can't be resolved as well.\n\n",
+		desc);
+	}
+}
+
+static int report__gtk_browse_hists(struct report *rep, const char *help)
+{
+	int (*hist_browser)(struct perf_evlist *evlist, const char *help,
+			    struct hist_browser_timer *timer, float min_pcnt);
+
+	hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists");
+
+	if (hist_browser == NULL) {
+		ui__error("GTK browser not found!\n");
+		return -1;
+	}
+
+	return hist_browser(rep->session->evlist, help, NULL, rep->min_percent);
+}
+
+static int report__browse_hists(struct report *rep)
+{
+	int ret;
+	struct perf_session *session = rep->session;
+	struct perf_evlist *evlist = session->evlist;
+	const char *help = perf_tip(system_path(TIPDIR));
+
+	if (help == NULL) {
+		/* fallback for people who don't install perf ;-) */
+		help = perf_tip(DOCDIR);
+		if (help == NULL)
+			help = "Cannot load tips.txt file, please install perf!";
+	}
+
+	switch (use_browser) {
+	case 1:
+		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
+						    rep->min_percent,
+						    &session->header.env,
+						    true);
+		/*
+		 * Usually "ret" is the last pressed key, and we only
+		 * care if the key notifies us to switch data file.
+		 */
+		if (ret != K_SWITCH_INPUT_DATA)
+			ret = 0;
+		break;
+	case 2:
+		ret = report__gtk_browse_hists(rep, help);
+		break;
+	default:
+		ret = perf_evlist__tty_browse_hists(evlist, rep, help);
+		break;
+	}
+
+	return ret;
+}
+
+static int report__collapse_hists(struct report *rep)
+{
+	struct ui_progress prog;
+	struct perf_evsel *pos;
+	int ret = 0;
+
+	ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
+
+	evlist__for_each_entry(rep->session->evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+
+		if (pos->idx == 0)
+			hists->symbol_filter_str = rep->symbol_filter_str;
+
+		hists->socket_filter = rep->socket_filter;
+
+		ret = hists__collapse_resort(hists, &prog);
+		if (ret < 0)
+			break;
+
+		/* Non-group events are considered as leader */
+		if (symbol_conf.event_group &&
+		    !perf_evsel__is_group_leader(pos)) {
+			struct hists *leader_hists = evsel__hists(pos->leader);
+
+			hists__match(leader_hists, hists);
+			hists__link(leader_hists, hists);
+		}
+	}
+
+	ui_progress__finish();
+	return ret;
+}
+
+static void report__output_resort(struct report *rep)
+{
+	struct ui_progress prog;
+	struct perf_evsel *pos;
+
+	ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
+
+	evlist__for_each_entry(rep->session->evlist, pos)
+		perf_evsel__output_resort(pos, &prog);
+
+	ui_progress__finish();
+}
+
+static void stats_setup(struct report *rep)
+{
+	memset(&rep->tool, 0, sizeof(rep->tool));
+	rep->tool.no_warn = true;
+}
+
+static int stats_print(struct report *rep)
+{
+	struct perf_session *session = rep->session;
+
+	perf_session__fprintf_nr_events(session, stdout);
+	return 0;
+}
+
+static void tasks_setup(struct report *rep)
+{
+	memset(&rep->tool, 0, sizeof(rep->tool));
+	rep->tool.ordered_events = true;
+	if (rep->mmaps_mode) {
+		rep->tool.mmap = perf_event__process_mmap;
+		rep->tool.mmap2 = perf_event__process_mmap2;
+	}
+	rep->tool.comm = perf_event__process_comm;
+	rep->tool.exit = perf_event__process_exit;
+	rep->tool.fork = perf_event__process_fork;
+	rep->tool.no_warn = true;
+}
+
+struct task {
+	struct thread		*thread;
+	struct list_head	 list;
+	struct list_head	 children;
+};
+
+static struct task *tasks_list(struct task *task, struct machine *machine)
+{
+	struct thread *parent_thread, *thread = task->thread;
+	struct task   *parent_task;
+
+	/* Already listed. */
+	if (!list_empty(&task->list))
+		return NULL;
+
+	/* Last one in the chain. */
+	if (thread->ppid == -1)
+		return task;
+
+	parent_thread = machine__find_thread(machine, -1, thread->ppid);
+	if (!parent_thread)
+		return ERR_PTR(-ENOENT);
+
+	parent_task = thread__priv(parent_thread);
+	list_add_tail(&task->list, &parent_task->children);
+	return tasks_list(parent_task, machine);
+}
+
+static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
+{
+	size_t printed = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+		struct map *map = rb_entry(nd, struct map, rb_node);
+
+		printed += fprintf(fp, "%*s  %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n",
+				   indent, "", map->start, map->end,
+				   map->prot & PROT_READ ? 'r' : '-',
+				   map->prot & PROT_WRITE ? 'w' : '-',
+				   map->prot & PROT_EXEC ? 'x' : '-',
+				   map->flags & MAP_SHARED ? 's' : 'p',
+				   map->pgoff,
+				   map->ino, map->dso->name);
+	}
+
+	return printed;
+}
+
+static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
+{
+	int printed = 0, i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		printed += maps__fprintf_task(&mg->maps[i], indent, fp);
+	return printed;
+}
+
+static void task__print_level(struct task *task, FILE *fp, int level)
+{
+	struct thread *thread = task->thread;
+	struct task *child;
+	int comm_indent = fprintf(fp, "  %8d %8d %8d |%*s",
+				  thread->pid_, thread->tid, thread->ppid,
+				  level, "");
+
+	fprintf(fp, "%s\n", thread__comm_str(thread));
+
+	map_groups__fprintf_task(thread->mg, comm_indent, fp);
+
+	if (!list_empty(&task->children)) {
+		list_for_each_entry(child, &task->children, list)
+			task__print_level(child, fp, level + 1);
+	}
+}
+
+static int tasks_print(struct report *rep, FILE *fp)
+{
+	struct perf_session *session = rep->session;
+	struct machine      *machine = &session->machines.host;
+	struct task *tasks, *task;
+	unsigned int nr = 0, itask = 0, i;
+	struct rb_node *nd;
+	LIST_HEAD(list);
+
+	/*
+	 * No locking needed while accessing machine->threads,
+	 * because --tasks is single threaded command.
+	 */
+
+	/* Count all the threads. */
+	for (i = 0; i < THREADS__TABLE_SIZE; i++)
+		nr += machine->threads[i].nr;
+
+	tasks = malloc(sizeof(*tasks) * nr);
+	if (!tasks)
+		return -ENOMEM;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+
+		for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+			task = tasks + itask++;
+
+			task->thread = rb_entry(nd, struct thread, rb_node);
+			INIT_LIST_HEAD(&task->children);
+			INIT_LIST_HEAD(&task->list);
+			thread__set_priv(task->thread, task);
+		}
+	}
+
+	/*
+	 * Iterate every task down to the unprocessed parent
+	 * and link all in task children list. Task with no
+	 * parent is added into 'list'.
+	 */
+	for (itask = 0; itask < nr; itask++) {
+		task = tasks + itask;
+
+		if (!list_empty(&task->list))
+			continue;
+
+		task = tasks_list(task, machine);
+		if (IS_ERR(task)) {
+			pr_err("Error: failed to process tasks\n");
+			free(tasks);
+			return PTR_ERR(task);
+		}
+
+		if (task)
+			list_add_tail(&task->list, &list);
+	}
+
+	fprintf(fp, "# %8s %8s %8s  %s\n", "pid", "tid", "ppid", "comm");
+
+	list_for_each_entry(task, &list, list)
+		task__print_level(task, fp, 0);
+
+	free(tasks);
+	return 0;
+}
+
+static int __cmd_report(struct report *rep)
+{
+	int ret;
+	struct perf_session *session = rep->session;
+	struct perf_evsel *pos;
+	struct perf_data *data = session->data;
+
+	signal(SIGINT, sig_handler);
+
+	if (rep->cpu_list) {
+		ret = perf_session__cpu_bitmap(session, rep->cpu_list,
+					       rep->cpu_bitmap);
+		if (ret) {
+			ui__error("failed to set cpu bitmap\n");
+			return ret;
+		}
+		session->itrace_synth_opts->cpu_bitmap = rep->cpu_bitmap;
+	}
+
+	if (rep->show_threads) {
+		ret = perf_read_values_init(&rep->show_threads_values);
+		if (ret)
+			return ret;
+	}
+
+	ret = report__setup_sample_type(rep);
+	if (ret) {
+		/* report__setup_sample_type() already showed error message */
+		return ret;
+	}
+
+	if (rep->stats_mode)
+		stats_setup(rep);
+
+	if (rep->tasks_mode)
+		tasks_setup(rep);
+
+	ret = perf_session__process_events(session);
+	if (ret) {
+		ui__error("failed to process sample\n");
+		return ret;
+	}
+
+	if (rep->stats_mode)
+		return stats_print(rep);
+
+	if (rep->tasks_mode)
+		return tasks_print(rep, stdout);
+
+	report__warn_kptr_restrict(rep);
+
+	evlist__for_each_entry(session->evlist, pos)
+		rep->nr_entries += evsel__hists(pos)->nr_entries;
+
+	if (use_browser == 0) {
+		if (verbose > 3)
+			perf_session__fprintf(session, stdout);
+
+		if (verbose > 2)
+			perf_session__fprintf_dsos(session, stdout);
+
+		if (dump_trace) {
+			perf_session__fprintf_nr_events(session, stdout);
+			perf_evlist__fprintf_nr_events(session->evlist, stdout);
+			return 0;
+		}
+	}
+
+	ret = report__collapse_hists(rep);
+	if (ret) {
+		ui__error("failed to process hist entry\n");
+		return ret;
+	}
+
+	if (session_done())
+		return 0;
+
+	/*
+	 * recalculate number of entries after collapsing since it
+	 * might be changed during the collapse phase.
+	 */
+	rep->nr_entries = 0;
+	evlist__for_each_entry(session->evlist, pos)
+		rep->nr_entries += evsel__hists(pos)->nr_entries;
+
+	if (rep->nr_entries == 0) {
+		ui__error("The %s file has no samples!\n", data->file.path);
+		return 0;
+	}
+
+	report__output_resort(rep);
+
+	return report__browse_hists(rep);
+}
+
+static int
+report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+	struct callchain_param *callchain = opt->value;
+
+	callchain->enabled = !unset;
+	/*
+	 * --no-call-graph
+	 */
+	if (unset) {
+		symbol_conf.use_callchain = false;
+		callchain->mode = CHAIN_NONE;
+		return 0;
+	}
+
+	return parse_callchain_report_opt(arg);
+}
+
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+				const char *arg, int unset __maybe_unused)
+{
+	if (arg) {
+		int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+			pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+			return -1;
+		}
+		have_ignore_callees = 1;
+	}
+
+	return 0;
+}
+
+static int
+parse_branch_mode(const struct option *opt,
+		  const char *str __maybe_unused, int unset)
+{
+	int *branch_mode = opt->value;
+
+	*branch_mode = !unset;
+	return 0;
+}
+
+static int
+parse_percent_limit(const struct option *opt, const char *str,
+		    int unset __maybe_unused)
+{
+	struct report *rep = opt->value;
+	double pcnt = strtof(str, NULL);
+
+	rep->min_percent = pcnt;
+	callchain_param.min_percent = pcnt;
+	return 0;
+}
+
+#define CALLCHAIN_DEFAULT_OPT  "graph,0.5,caller,function,percent"
+
+const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+				     CALLCHAIN_REPORT_HELP
+				     "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+
+int cmd_report(int argc, const char **argv)
+{
+	struct perf_session *session;
+	struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
+	struct stat st;
+	bool has_br_stack = false;
+	int branch_mode = -1;
+	bool branch_call_mode = false;
+	char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
+	const char * const report_usage[] = {
+		"perf report [<options>]",
+		NULL
+	};
+	struct report report = {
+		.tool = {
+			.sample		 = process_sample_event,
+			.mmap		 = perf_event__process_mmap,
+			.mmap2		 = perf_event__process_mmap2,
+			.comm		 = perf_event__process_comm,
+			.namespaces	 = perf_event__process_namespaces,
+			.exit		 = perf_event__process_exit,
+			.fork		 = perf_event__process_fork,
+			.lost		 = perf_event__process_lost,
+			.read		 = process_read_event,
+			.attr		 = perf_event__process_attr,
+			.tracing_data	 = perf_event__process_tracing_data,
+			.build_id	 = perf_event__process_build_id,
+			.id_index	 = perf_event__process_id_index,
+			.auxtrace_info	 = perf_event__process_auxtrace_info,
+			.auxtrace	 = perf_event__process_auxtrace,
+			.feature	 = process_feature_event,
+			.ordered_events	 = true,
+			.ordering_requires_timestamps = true,
+		},
+		.max_stack		 = PERF_MAX_STACK_DEPTH,
+		.pretty_printing_style	 = "normal",
+		.socket_filter		 = -1,
+	};
+	const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
+	OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"),
+	OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+                    "don't load vmlinux even if found"),
+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+		   "file", "kallsyms pathname"),
+	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
+	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
+		    "Show a column with the number of samples"),
+	OPT_BOOLEAN('T', "threads", &report.show_threads,
+		    "Show per-thread event counters"),
+	OPT_STRING(0, "pretty", &report.pretty_printing_style, "key",
+		   "pretty printing style key: normal raw"),
+	OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"),
+	OPT_BOOLEAN(0, "gtk", &report.use_gtk, "Use the GTK2 interface"),
+	OPT_BOOLEAN(0, "stdio", &report.use_stdio,
+		    "Use the stdio interface"),
+	OPT_BOOLEAN(0, "header", &report.header, "Show data header."),
+	OPT_BOOLEAN(0, "header-only", &report.header_only,
+		    "Show only data header."),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+		   " Please refer the man page for the complete list."),
+	OPT_STRING('F', "fields", &field_order, "key[,keys...]",
+		   "output field(s): overhead, period, sample plus all of sort keys"),
+	OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization,
+		    "Show sample percentage for different cpu modes"),
+	OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
+		    "Show sample percentage for different cpu modes", PARSE_OPT_HIDDEN),
+	OPT_STRING('p', "parent", &parent_pattern, "regex",
+		   "regex filter to identify parent, see: '--sort parent'"),
+	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
+		    "Only display entries with parent-match"),
+	OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+			     "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+			     report_callchain_help, &report_parse_callchain_opt,
+			     callchain_default_opt),
+	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+		    "Accumulate callchains of children and show total overhead as well"),
+	OPT_INTEGER(0, "max-stack", &report.max_stack,
+		    "Set the maximum stack depth when parsing the callchain, "
+		    "anything beyond the specified depth will be ignored. "
+		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
+		    "alias for inverted call graph"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
+	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+		   "only consider symbols in these dsos"),
+	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+		   "only consider symbols in these comms"),
+	OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+		   "only consider symbols in these pids"),
+	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+		   "only consider symbols in these tids"),
+	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+		   "only consider these symbols"),
+	OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter",
+		   "only show symbols that (partially) match with this filter"),
+	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
+		   "width[,width...]",
+		   "don't try to adjust column width, use these fixed values"),
+	OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
+		   "separator for columns, no spaces will be added between "
+		   "columns '.' is reserved."),
+	OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved,
+		    "Only display entries resolved to a symbol"),
+	OPT_CALLBACK(0, "symfs", NULL, "directory",
+		     "Look for files with symbols relative to this directory",
+		     symbol__config_symfs),
+	OPT_STRING('C', "cpu", &report.cpu_list, "cpu",
+		   "list of cpus to profile"),
+	OPT_BOOLEAN('I', "show-info", &report.show_full_info,
+		    "Display extended information about perf.data file"),
+	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
+		    "Interleave source code with assembly code (default)"),
+	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
+		    "Display raw encoding of assembly instructions (default)"),
+	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
+	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+		    "Show a column with the sum of periods"),
+	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set,
+		    "Show event group information together"),
+	OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
+		    "use branch records for per branch histogram filling",
+		    parse_branch_mode),
+	OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
+		    "add last branch records to call history"),
+	OPT_STRING(0, "objdump", &objdump_path, "path",
+		   "objdump binary to use for disassembly and annotations"),
+	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+		    "Disable symbol demangling"),
+	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+		    "Enable kernel symbol demangling"),
+	OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+	OPT_CALLBACK(0, "percent-limit", &report, "percent",
+		     "Don't show entries under that percent", parse_percent_limit),
+	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+		     "how to display percentage of filtered entries", parse_filter_percentage),
+	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+			    "Instruction Tracing options",
+			    itrace_parse_synth_opts),
+	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
+			"Show full source file name path for source lines"),
+	OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+		    "Show callgraph from reference event"),
+	OPT_INTEGER(0, "socket-filter", &report.socket_filter,
+		    "only show processor socket that match with this filter"),
+	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
+	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
+			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
+			     stdio__config_color, "always"),
+	OPT_STRING(0, "time", &report.time_str, "str",
+		   "Time span of interest (start,stop)"),
+	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+		    "Show inline function"),
+	OPT_END()
+	};
+	struct perf_data data = {
+		.mode  = PERF_DATA_MODE_READ,
+	};
+	int ret = hists__init();
+
+	if (ret < 0)
+		return ret;
+
+	ret = perf_config(report__config, &report);
+	if (ret)
+		return ret;
+
+	argc = parse_options(argc, argv, options, report_usage, 0);
+	if (argc) {
+		/*
+		 * Special case: if there's an argument left then assume that
+		 * it's a symbol filter:
+		 */
+		if (argc > 1)
+			usage_with_options(report_usage, options);
+
+		report.symbol_filter_str = argv[0];
+	}
+
+	if (report.mmaps_mode)
+		report.tasks_mode = true;
+
+	if (quiet)
+		perf_quiet_option();
+
+	if (symbol_conf.vmlinux_name &&
+	    access(symbol_conf.vmlinux_name, R_OK)) {
+		pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
+		return -EINVAL;
+	}
+	if (symbol_conf.kallsyms_name &&
+	    access(symbol_conf.kallsyms_name, R_OK)) {
+		pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
+		return -EINVAL;
+	}
+
+	if (report.inverted_callchain)
+		callchain_param.order = ORDER_CALLER;
+	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
+		callchain_param.order = ORDER_CALLER;
+
+	if (itrace_synth_opts.callchain &&
+	    (int)itrace_synth_opts.callchain_sz > report.max_stack)
+		report.max_stack = itrace_synth_opts.callchain_sz;
+
+	if (!input_name || !strlen(input_name)) {
+		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+			input_name = "-";
+		else
+			input_name = "perf.data";
+	}
+
+	data.file.path = input_name;
+	data.force     = symbol_conf.force;
+
+repeat:
+	session = perf_session__new(&data, false, &report.tool);
+	if (session == NULL)
+		return -1;
+
+	if (report.queue_size) {
+		ordered_events__set_alloc_size(&session->ordered_events,
+					       report.queue_size);
+	}
+
+	session->itrace_synth_opts = &itrace_synth_opts;
+
+	report.session = session;
+
+	has_br_stack = perf_header__has_feat(&session->header,
+					     HEADER_BRANCH_STACK);
+
+	setup_forced_leader(&report, session->evlist);
+
+	if (itrace_synth_opts.last_branch)
+		has_br_stack = true;
+
+	if (has_br_stack && branch_call_mode)
+		symbol_conf.show_branchflag_count = true;
+
+	memset(&report.brtype_stat, 0, sizeof(struct branch_type_stat));
+
+	/*
+	 * Branch mode is a tristate:
+	 * -1 means default, so decide based on the file having branch data.
+	 * 0/1 means the user chose a mode.
+	 */
+	if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
+	    !branch_call_mode) {
+		sort__mode = SORT_MODE__BRANCH;
+		symbol_conf.cumulate_callchain = false;
+	}
+	if (branch_call_mode) {
+		callchain_param.key = CCKEY_ADDRESS;
+		callchain_param.branch_callstack = 1;
+		symbol_conf.use_callchain = true;
+		callchain_register_param(&callchain_param);
+		if (sort_order == NULL)
+			sort_order = "srcline,symbol,dso";
+	}
+
+	if (report.mem_mode) {
+		if (sort__mode == SORT_MODE__BRANCH) {
+			pr_err("branch and mem mode incompatible\n");
+			goto error;
+		}
+		sort__mode = SORT_MODE__MEMORY;
+		symbol_conf.cumulate_callchain = false;
+	}
+
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(report_usage, options, "F", 1);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto error;
+		}
+
+		perf_hpp_list.need_collapse = true;
+	}
+
+	if (report.use_stdio)
+		use_browser = 0;
+	else if (report.use_tui)
+		use_browser = 1;
+	else if (report.use_gtk)
+		use_browser = 2;
+
+	/* Force tty output for header output and per-thread stat. */
+	if (report.header || report.header_only || report.show_threads)
+		use_browser = 0;
+	if (report.header || report.header_only)
+		report.tool.show_feat_hdr = SHOW_FEAT_HEADER;
+	if (report.show_full_info)
+		report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
+	if (report.stats_mode || report.tasks_mode)
+		use_browser = 0;
+	if (report.stats_mode && report.tasks_mode) {
+		pr_err("Error: --tasks and --mmaps can't be used together with --stats\n");
+		goto error;
+	}
+
+	if (strcmp(input_name, "-") != 0)
+		setup_browser(true);
+	else
+		use_browser = 0;
+
+	if (setup_sorting(session->evlist) < 0) {
+		if (sort_order)
+			parse_options_usage(report_usage, options, "s", 1);
+		if (field_order)
+			parse_options_usage(sort_order ? NULL : report_usage,
+					    options, "F", 1);
+		goto error;
+	}
+
+	if ((report.header || report.header_only) && !quiet) {
+		perf_session__fprintf_info(session, stdout,
+					   report.show_full_info);
+		if (report.header_only) {
+			ret = 0;
+			goto error;
+		}
+	} else if (use_browser == 0 && !quiet &&
+		   !report.stats_mode && !report.tasks_mode) {
+		fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
+		      stdout);
+	}
+
+	/*
+	 * Only in the TUI browser we are doing integrated annotation,
+	 * so don't allocate extra space that won't be used in the stdio
+	 * implementation.
+	 */
+	if (ui__has_annotation()) {
+		ret = symbol__annotation_init();
+		if (ret < 0)
+			goto error;
+		/*
+ 		 * For searching by name on the "Browse map details".
+ 		 * providing it only in verbose mode not to bloat too
+ 		 * much struct symbol.
+ 		 */
+		if (verbose > 0) {
+			/*
+			 * XXX: Need to provide a less kludgy way to ask for
+			 * more space per symbol, the u32 is for the index on
+			 * the ui browser.
+			 * See symbol__browser_index.
+			 */
+			symbol_conf.priv_size += sizeof(u32);
+			symbol_conf.sort_by_name = true;
+		}
+		annotation_config__init();
+	}
+
+	if (symbol__init(&session->header.env) < 0)
+		goto error;
+
+	report.ptime_range = perf_time__range_alloc(report.time_str,
+						    &report.range_size);
+	if (!report.ptime_range) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) {
+		if (session->evlist->first_sample_time == 0 &&
+		    session->evlist->last_sample_time == 0) {
+			pr_err("HINT: no first/last sample time found in perf data.\n"
+			       "Please use latest perf binary to execute 'perf record'\n"
+			       "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
+			ret = -EINVAL;
+			goto error;
+		}
+
+		report.range_num = perf_time__percent_parse_str(
+					report.ptime_range, report.range_size,
+					report.time_str,
+					session->evlist->first_sample_time,
+					session->evlist->last_sample_time);
+
+		if (report.range_num < 0) {
+			pr_err("Invalid time string\n");
+			ret = -EINVAL;
+			goto error;
+		}
+	} else {
+		report.range_num = 1;
+	}
+
+	if (session->tevent.pevent &&
+	    pevent_set_function_resolver(session->tevent.pevent,
+					 machine__resolve_kernel_addr,
+					 &session->machines.host) < 0) {
+		pr_err("%s: failed to set libtraceevent function resolver\n",
+		       __func__);
+		return -1;
+	}
+
+	sort__setup_elide(stdout);
+
+	ret = __cmd_report(&report);
+	if (ret == K_SWITCH_INPUT_DATA) {
+		perf_session__delete(session);
+		goto repeat;
+	} else
+		ret = 0;
+
+error:
+	zfree(&report.ptime_range);
+
+	perf_session__delete(session);
+	return ret;
+}
diff --git a/builtin-sched.c b/builtin-sched.c
new file mode 100644
index 0000000..4dfdee6
--- /dev/null
+++ b/builtin-sched.c
@@ -0,0 +1,3532 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/evlist.h"
+#include "util/cache.h"
+#include "util/evsel.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/cloexec.h"
+#include "util/thread_map.h"
+#include "util/color.h"
+#include "util/stat.h"
+#include "util/callchain.h"
+#include "util/time-utils.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <inttypes.h>
+
+#include <errno.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
+#include <api/fs/fs.h>
+#include <linux/time64.h>
+
+#include "sane_ctype.h"
+
+#define PR_SET_NAME		15               /* Set process name */
+#define MAX_CPUS		4096
+#define COMM_LEN		20
+#define SYM_LEN			129
+#define MAX_PID			1024000
+
+struct sched_atom;
+
+struct task_desc {
+	unsigned long		nr;
+	unsigned long		pid;
+	char			comm[COMM_LEN];
+
+	unsigned long		nr_events;
+	unsigned long		curr_event;
+	struct sched_atom	**atoms;
+
+	pthread_t		thread;
+	sem_t			sleep_sem;
+
+	sem_t			ready_for_work;
+	sem_t			work_done_sem;
+
+	u64			cpu_usage;
+};
+
+enum sched_event_type {
+	SCHED_EVENT_RUN,
+	SCHED_EVENT_SLEEP,
+	SCHED_EVENT_WAKEUP,
+	SCHED_EVENT_MIGRATION,
+};
+
+struct sched_atom {
+	enum sched_event_type	type;
+	int			specific_wait;
+	u64			timestamp;
+	u64			duration;
+	unsigned long		nr;
+	sem_t			*wait_sem;
+	struct task_desc	*wakee;
+};
+
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
+
+/* task state bitmask, copied from include/linux/sched.h */
+#define TASK_RUNNING		0
+#define TASK_INTERRUPTIBLE	1
+#define TASK_UNINTERRUPTIBLE	2
+#define __TASK_STOPPED		4
+#define __TASK_TRACED		8
+/* in tsk->exit_state */
+#define EXIT_DEAD		16
+#define EXIT_ZOMBIE		32
+#define EXIT_TRACE		(EXIT_ZOMBIE | EXIT_DEAD)
+/* in tsk->state again */
+#define TASK_DEAD		64
+#define TASK_WAKEKILL		128
+#define TASK_WAKING		256
+#define TASK_PARKED		512
+
+enum thread_state {
+	THREAD_SLEEPING = 0,
+	THREAD_WAIT_CPU,
+	THREAD_SCHED_IN,
+	THREAD_IGNORE
+};
+
+struct work_atom {
+	struct list_head	list;
+	enum thread_state	state;
+	u64			sched_out_time;
+	u64			wake_up_time;
+	u64			sched_in_time;
+	u64			runtime;
+};
+
+struct work_atoms {
+	struct list_head	work_list;
+	struct thread		*thread;
+	struct rb_node		node;
+	u64			max_lat;
+	u64			max_lat_at;
+	u64			total_lat;
+	u64			nb_atoms;
+	u64			total_runtime;
+	int			num_merged;
+};
+
+typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
+
+struct perf_sched;
+
+struct trace_sched_handler {
+	int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine);
+
+	int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			     struct perf_sample *sample, struct machine *machine);
+
+	int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine);
+
+	/* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
+	int (*fork_event)(struct perf_sched *sched, union perf_event *event,
+			  struct machine *machine);
+
+	int (*migrate_task_event)(struct perf_sched *sched,
+				  struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct machine *machine);
+};
+
+#define COLOR_PIDS PERF_COLOR_BLUE
+#define COLOR_CPUS PERF_COLOR_BG_RED
+
+struct perf_sched_map {
+	DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
+	int			*comp_cpus;
+	bool			 comp;
+	struct thread_map	*color_pids;
+	const char		*color_pids_str;
+	struct cpu_map		*color_cpus;
+	const char		*color_cpus_str;
+	struct cpu_map		*cpus;
+	const char		*cpus_str;
+};
+
+struct perf_sched {
+	struct perf_tool tool;
+	const char	 *sort_order;
+	unsigned long	 nr_tasks;
+	struct task_desc **pid_to_task;
+	struct task_desc **tasks;
+	const struct trace_sched_handler *tp_handler;
+	pthread_mutex_t	 start_work_mutex;
+	pthread_mutex_t	 work_done_wait_mutex;
+	int		 profile_cpu;
+/*
+ * Track the current task - that way we can know whether there's any
+ * weird events, such as a task being switched away that is not current.
+ */
+	int		 max_cpu;
+	u32		 curr_pid[MAX_CPUS];
+	struct thread	 *curr_thread[MAX_CPUS];
+	char		 next_shortname1;
+	char		 next_shortname2;
+	unsigned int	 replay_repeat;
+	unsigned long	 nr_run_events;
+	unsigned long	 nr_sleep_events;
+	unsigned long	 nr_wakeup_events;
+	unsigned long	 nr_sleep_corrections;
+	unsigned long	 nr_run_events_optimized;
+	unsigned long	 targetless_wakeups;
+	unsigned long	 multitarget_wakeups;
+	unsigned long	 nr_runs;
+	unsigned long	 nr_timestamps;
+	unsigned long	 nr_unordered_timestamps;
+	unsigned long	 nr_context_switch_bugs;
+	unsigned long	 nr_events;
+	unsigned long	 nr_lost_chunks;
+	unsigned long	 nr_lost_events;
+	u64		 run_measurement_overhead;
+	u64		 sleep_measurement_overhead;
+	u64		 start_time;
+	u64		 cpu_usage;
+	u64		 runavg_cpu_usage;
+	u64		 parent_cpu_usage;
+	u64		 runavg_parent_cpu_usage;
+	u64		 sum_runtime;
+	u64		 sum_fluct;
+	u64		 run_avg;
+	u64		 all_runtime;
+	u64		 all_count;
+	u64		 cpu_last_switched[MAX_CPUS];
+	struct rb_root	 atom_root, sorted_atom_root, merged_atom_root;
+	struct list_head sort_list, cmp_pid;
+	bool force;
+	bool skip_merge;
+	struct perf_sched_map map;
+
+	/* options for timehist command */
+	bool		summary;
+	bool		summary_only;
+	bool		idle_hist;
+	bool		show_callchain;
+	unsigned int	max_stack;
+	bool		show_cpu_visual;
+	bool		show_wakeups;
+	bool		show_next;
+	bool		show_migrations;
+	bool		show_state;
+	u64		skipped_samples;
+	const char	*time_str;
+	struct perf_time_interval ptime;
+	struct perf_time_interval hist_time;
+};
+
+/* per thread run time data */
+struct thread_runtime {
+	u64 last_time;      /* time of previous sched in/out event */
+	u64 dt_run;         /* run time */
+	u64 dt_sleep;       /* time between CPU access by sleep (off cpu) */
+	u64 dt_iowait;      /* time between CPU access by iowait (off cpu) */
+	u64 dt_preempt;     /* time between CPU access by preempt (off cpu) */
+	u64 dt_delay;       /* time between wakeup and sched-in */
+	u64 ready_to_run;   /* time of wakeup */
+
+	struct stats run_stats;
+	u64 total_run_time;
+	u64 total_sleep_time;
+	u64 total_iowait_time;
+	u64 total_preempt_time;
+	u64 total_delay_time;
+
+	int last_state;
+
+	char shortname[3];
+	bool comm_changed;
+
+	u64 migrations;
+};
+
+/* per event run time data */
+struct evsel_runtime {
+	u64 *last_time; /* time this event was last seen per cpu */
+	u32 ncpu;       /* highest cpu slot allocated */
+};
+
+/* per cpu idle time data */
+struct idle_thread_runtime {
+	struct thread_runtime	tr;
+	struct thread		*last_thread;
+	struct rb_root		sorted_root;
+	struct callchain_root	callchain;
+	struct callchain_cursor	cursor;
+};
+
+/* track idle times per cpu */
+static struct thread **idle_threads;
+static int idle_max_cpu;
+static char idle_comm[] = "<idle>";
+
+static u64 get_nsecs(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+
+	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
+{
+	u64 T0 = get_nsecs(), T1;
+
+	do {
+		T1 = get_nsecs();
+	} while (T1 + sched->run_measurement_overhead < T0 + nsecs);
+}
+
+static void sleep_nsecs(u64 nsecs)
+{
+	struct timespec ts;
+
+	ts.tv_nsec = nsecs % 999999999;
+	ts.tv_sec = nsecs / 999999999;
+
+	nanosleep(&ts, NULL);
+}
+
+static void calibrate_run_measurement_overhead(struct perf_sched *sched)
+{
+	u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		T0 = get_nsecs();
+		burn_nsecs(sched, 0);
+		T1 = get_nsecs();
+		delta = T1-T0;
+		min_delta = min(min_delta, delta);
+	}
+	sched->run_measurement_overhead = min_delta;
+
+	printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
+}
+
+static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
+{
+	u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		T0 = get_nsecs();
+		sleep_nsecs(10000);
+		T1 = get_nsecs();
+		delta = T1-T0;
+		min_delta = min(min_delta, delta);
+	}
+	min_delta -= 10000;
+	sched->sleep_measurement_overhead = min_delta;
+
+	printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
+}
+
+static struct sched_atom *
+get_new_event(struct task_desc *task, u64 timestamp)
+{
+	struct sched_atom *event = zalloc(sizeof(*event));
+	unsigned long idx = task->nr_events;
+	size_t size;
+
+	event->timestamp = timestamp;
+	event->nr = idx;
+
+	task->nr_events++;
+	size = sizeof(struct sched_atom *) * task->nr_events;
+	task->atoms = realloc(task->atoms, size);
+	BUG_ON(!task->atoms);
+
+	task->atoms[idx] = event;
+
+	return event;
+}
+
+static struct sched_atom *last_event(struct task_desc *task)
+{
+	if (!task->nr_events)
+		return NULL;
+
+	return task->atoms[task->nr_events - 1];
+}
+
+static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task,
+				u64 timestamp, u64 duration)
+{
+	struct sched_atom *event, *curr_event = last_event(task);
+
+	/*
+	 * optimize an existing RUN event by merging this one
+	 * to it:
+	 */
+	if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
+		sched->nr_run_events_optimized++;
+		curr_event->duration += duration;
+		return;
+	}
+
+	event = get_new_event(task, timestamp);
+
+	event->type = SCHED_EVENT_RUN;
+	event->duration = duration;
+
+	sched->nr_run_events++;
+}
+
+static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task,
+				   u64 timestamp, struct task_desc *wakee)
+{
+	struct sched_atom *event, *wakee_event;
+
+	event = get_new_event(task, timestamp);
+	event->type = SCHED_EVENT_WAKEUP;
+	event->wakee = wakee;
+
+	wakee_event = last_event(wakee);
+	if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
+		sched->targetless_wakeups++;
+		return;
+	}
+	if (wakee_event->wait_sem) {
+		sched->multitarget_wakeups++;
+		return;
+	}
+
+	wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
+	sem_init(wakee_event->wait_sem, 0, 0);
+	wakee_event->specific_wait = 1;
+	event->wait_sem = wakee_event->wait_sem;
+
+	sched->nr_wakeup_events++;
+}
+
+static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
+				  u64 timestamp, u64 task_state __maybe_unused)
+{
+	struct sched_atom *event = get_new_event(task, timestamp);
+
+	event->type = SCHED_EVENT_SLEEP;
+
+	sched->nr_sleep_events++;
+}
+
+static struct task_desc *register_pid(struct perf_sched *sched,
+				      unsigned long pid, const char *comm)
+{
+	struct task_desc *task;
+	static int pid_max;
+
+	if (sched->pid_to_task == NULL) {
+		if (sysctl__read_int("kernel/pid_max", &pid_max) < 0)
+			pid_max = MAX_PID;
+		BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL);
+	}
+	if (pid >= (unsigned long)pid_max) {
+		BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) *
+			sizeof(struct task_desc *))) == NULL);
+		while (pid >= (unsigned long)pid_max)
+			sched->pid_to_task[pid_max++] = NULL;
+	}
+
+	task = sched->pid_to_task[pid];
+
+	if (task)
+		return task;
+
+	task = zalloc(sizeof(*task));
+	task->pid = pid;
+	task->nr = sched->nr_tasks;
+	strcpy(task->comm, comm);
+	/*
+	 * every task starts in sleeping state - this gets ignored
+	 * if there's no wakeup pointing to this sleep state:
+	 */
+	add_sched_event_sleep(sched, task, 0, 0);
+
+	sched->pid_to_task[pid] = task;
+	sched->nr_tasks++;
+	sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *));
+	BUG_ON(!sched->tasks);
+	sched->tasks[task->nr] = task;
+
+	if (verbose > 0)
+		printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
+
+	return task;
+}
+
+
+static void print_task_traces(struct perf_sched *sched)
+{
+	struct task_desc *task;
+	unsigned long i;
+
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
+		printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
+			task->nr, task->comm, task->pid, task->nr_events);
+	}
+}
+
+static void add_cross_task_wakeups(struct perf_sched *sched)
+{
+	struct task_desc *task1, *task2;
+	unsigned long i, j;
+
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task1 = sched->tasks[i];
+		j = i + 1;
+		if (j == sched->nr_tasks)
+			j = 0;
+		task2 = sched->tasks[j];
+		add_sched_event_wakeup(sched, task1, 0, task2);
+	}
+}
+
+static void perf_sched__process_event(struct perf_sched *sched,
+				      struct sched_atom *atom)
+{
+	int ret = 0;
+
+	switch (atom->type) {
+		case SCHED_EVENT_RUN:
+			burn_nsecs(sched, atom->duration);
+			break;
+		case SCHED_EVENT_SLEEP:
+			if (atom->wait_sem)
+				ret = sem_wait(atom->wait_sem);
+			BUG_ON(ret);
+			break;
+		case SCHED_EVENT_WAKEUP:
+			if (atom->wait_sem)
+				ret = sem_post(atom->wait_sem);
+			BUG_ON(ret);
+			break;
+		case SCHED_EVENT_MIGRATION:
+			break;
+		default:
+			BUG_ON(1);
+	}
+}
+
+static u64 get_cpu_usage_nsec_parent(void)
+{
+	struct rusage ru;
+	u64 sum;
+	int err;
+
+	err = getrusage(RUSAGE_SELF, &ru);
+	BUG_ON(err);
+
+	sum =  ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC;
+	sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC;
+
+	return sum;
+}
+
+static int self_open_counters(struct perf_sched *sched, unsigned long cur_task)
+{
+	struct perf_event_attr attr;
+	char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE];
+	int fd;
+	struct rlimit limit;
+	bool need_privilege = false;
+
+	memset(&attr, 0, sizeof(attr));
+
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_TASK_CLOCK;
+
+force_again:
+	fd = sys_perf_event_open(&attr, 0, -1, -1,
+				 perf_event_open_cloexec_flag());
+
+	if (fd < 0) {
+		if (errno == EMFILE) {
+			if (sched->force) {
+				BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1);
+				limit.rlim_cur += sched->nr_tasks - cur_task;
+				if (limit.rlim_cur > limit.rlim_max) {
+					limit.rlim_max = limit.rlim_cur;
+					need_privilege = true;
+				}
+				if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
+					if (need_privilege && errno == EPERM)
+						strcpy(info, "Need privilege\n");
+				} else
+					goto force_again;
+			} else
+				strcpy(info, "Have a try with -f option\n");
+		}
+		pr_err("Error: sys_perf_event_open() syscall returned "
+		       "with %d (%s)\n%s", fd,
+		       str_error_r(errno, sbuf, sizeof(sbuf)), info);
+		exit(EXIT_FAILURE);
+	}
+	return fd;
+}
+
+static u64 get_cpu_usage_nsec_self(int fd)
+{
+	u64 runtime;
+	int ret;
+
+	ret = read(fd, &runtime, sizeof(runtime));
+	BUG_ON(ret != sizeof(runtime));
+
+	return runtime;
+}
+
+struct sched_thread_parms {
+	struct task_desc  *task;
+	struct perf_sched *sched;
+	int fd;
+};
+
+static void *thread_func(void *ctx)
+{
+	struct sched_thread_parms *parms = ctx;
+	struct task_desc *this_task = parms->task;
+	struct perf_sched *sched = parms->sched;
+	u64 cpu_usage_0, cpu_usage_1;
+	unsigned long i, ret;
+	char comm2[22];
+	int fd = parms->fd;
+
+	zfree(&parms);
+
+	sprintf(comm2, ":%s", this_task->comm);
+	prctl(PR_SET_NAME, comm2);
+	if (fd < 0)
+		return NULL;
+again:
+	ret = sem_post(&this_task->ready_for_work);
+	BUG_ON(ret);
+	ret = pthread_mutex_lock(&sched->start_work_mutex);
+	BUG_ON(ret);
+	ret = pthread_mutex_unlock(&sched->start_work_mutex);
+	BUG_ON(ret);
+
+	cpu_usage_0 = get_cpu_usage_nsec_self(fd);
+
+	for (i = 0; i < this_task->nr_events; i++) {
+		this_task->curr_event = i;
+		perf_sched__process_event(sched, this_task->atoms[i]);
+	}
+
+	cpu_usage_1 = get_cpu_usage_nsec_self(fd);
+	this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
+	ret = sem_post(&this_task->work_done_sem);
+	BUG_ON(ret);
+
+	ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
+	BUG_ON(ret);
+	ret = pthread_mutex_unlock(&sched->work_done_wait_mutex);
+	BUG_ON(ret);
+
+	goto again;
+}
+
+static void create_tasks(struct perf_sched *sched)
+{
+	struct task_desc *task;
+	pthread_attr_t attr;
+	unsigned long i;
+	int err;
+
+	err = pthread_attr_init(&attr);
+	BUG_ON(err);
+	err = pthread_attr_setstacksize(&attr,
+			(size_t) max(16 * 1024, PTHREAD_STACK_MIN));
+	BUG_ON(err);
+	err = pthread_mutex_lock(&sched->start_work_mutex);
+	BUG_ON(err);
+	err = pthread_mutex_lock(&sched->work_done_wait_mutex);
+	BUG_ON(err);
+	for (i = 0; i < sched->nr_tasks; i++) {
+		struct sched_thread_parms *parms = malloc(sizeof(*parms));
+		BUG_ON(parms == NULL);
+		parms->task = task = sched->tasks[i];
+		parms->sched = sched;
+		parms->fd = self_open_counters(sched, i);
+		sem_init(&task->sleep_sem, 0, 0);
+		sem_init(&task->ready_for_work, 0, 0);
+		sem_init(&task->work_done_sem, 0, 0);
+		task->curr_event = 0;
+		err = pthread_create(&task->thread, &attr, thread_func, parms);
+		BUG_ON(err);
+	}
+}
+
+static void wait_for_tasks(struct perf_sched *sched)
+{
+	u64 cpu_usage_0, cpu_usage_1;
+	struct task_desc *task;
+	unsigned long i, ret;
+
+	sched->start_time = get_nsecs();
+	sched->cpu_usage = 0;
+	pthread_mutex_unlock(&sched->work_done_wait_mutex);
+
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
+		ret = sem_wait(&task->ready_for_work);
+		BUG_ON(ret);
+		sem_init(&task->ready_for_work, 0, 0);
+	}
+	ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
+	BUG_ON(ret);
+
+	cpu_usage_0 = get_cpu_usage_nsec_parent();
+
+	pthread_mutex_unlock(&sched->start_work_mutex);
+
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
+		ret = sem_wait(&task->work_done_sem);
+		BUG_ON(ret);
+		sem_init(&task->work_done_sem, 0, 0);
+		sched->cpu_usage += task->cpu_usage;
+		task->cpu_usage = 0;
+	}
+
+	cpu_usage_1 = get_cpu_usage_nsec_parent();
+	if (!sched->runavg_cpu_usage)
+		sched->runavg_cpu_usage = sched->cpu_usage;
+	sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat;
+
+	sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
+	if (!sched->runavg_parent_cpu_usage)
+		sched->runavg_parent_cpu_usage = sched->parent_cpu_usage;
+	sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) +
+					 sched->parent_cpu_usage)/sched->replay_repeat;
+
+	ret = pthread_mutex_lock(&sched->start_work_mutex);
+	BUG_ON(ret);
+
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
+		sem_init(&task->sleep_sem, 0, 0);
+		task->curr_event = 0;
+	}
+}
+
+static void run_one_test(struct perf_sched *sched)
+{
+	u64 T0, T1, delta, avg_delta, fluct;
+
+	T0 = get_nsecs();
+	wait_for_tasks(sched);
+	T1 = get_nsecs();
+
+	delta = T1 - T0;
+	sched->sum_runtime += delta;
+	sched->nr_runs++;
+
+	avg_delta = sched->sum_runtime / sched->nr_runs;
+	if (delta < avg_delta)
+		fluct = avg_delta - delta;
+	else
+		fluct = delta - avg_delta;
+	sched->sum_fluct += fluct;
+	if (!sched->run_avg)
+		sched->run_avg = delta;
+	sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
+
+	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC);
+
+	printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC);
+
+	printf("cpu: %0.2f / %0.2f",
+		(double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC);
+
+#if 0
+	/*
+	 * rusage statistics done by the parent, these are less
+	 * accurate than the sched->sum_exec_runtime based statistics:
+	 */
+	printf(" [%0.2f / %0.2f]",
+		(double)sched->parent_cpu_usage / NSEC_PER_MSEC,
+		(double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC);
+#endif
+
+	printf("\n");
+
+	if (sched->nr_sleep_corrections)
+		printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections);
+	sched->nr_sleep_corrections = 0;
+}
+
+static void test_calibrations(struct perf_sched *sched)
+{
+	u64 T0, T1;
+
+	T0 = get_nsecs();
+	burn_nsecs(sched, NSEC_PER_MSEC);
+	T1 = get_nsecs();
+
+	printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
+
+	T0 = get_nsecs();
+	sleep_nsecs(NSEC_PER_MSEC);
+	T1 = get_nsecs();
+
+	printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
+}
+
+static int
+replay_wakeup_event(struct perf_sched *sched,
+		    struct perf_evsel *evsel, struct perf_sample *sample,
+		    struct machine *machine __maybe_unused)
+{
+	const char *comm = perf_evsel__strval(evsel, sample, "comm");
+	const u32 pid	 = perf_evsel__intval(evsel, sample, "pid");
+	struct task_desc *waker, *wakee;
+
+	if (verbose > 0) {
+		printf("sched_wakeup event %p\n", evsel);
+
+		printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
+	}
+
+	waker = register_pid(sched, sample->tid, "<unknown>");
+	wakee = register_pid(sched, pid, comm);
+
+	add_sched_event_wakeup(sched, waker, sample->time, wakee);
+	return 0;
+}
+
+static int replay_switch_event(struct perf_sched *sched,
+			       struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       struct machine *machine __maybe_unused)
+{
+	const char *prev_comm  = perf_evsel__strval(evsel, sample, "prev_comm"),
+		   *next_comm  = perf_evsel__strval(evsel, sample, "next_comm");
+	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+	struct task_desc *prev, __maybe_unused *next;
+	u64 timestamp0, timestamp = sample->time;
+	int cpu = sample->cpu;
+	s64 delta;
+
+	if (verbose > 0)
+		printf("sched_switch event %p\n", evsel);
+
+	if (cpu >= MAX_CPUS || cpu < 0)
+		return 0;
+
+	timestamp0 = sched->cpu_last_switched[cpu];
+	if (timestamp0)
+		delta = timestamp - timestamp0;
+	else
+		delta = 0;
+
+	if (delta < 0) {
+		pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+		return -1;
+	}
+
+	pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
+		 prev_comm, prev_pid, next_comm, next_pid, delta);
+
+	prev = register_pid(sched, prev_pid, prev_comm);
+	next = register_pid(sched, next_pid, next_comm);
+
+	sched->cpu_last_switched[cpu] = timestamp;
+
+	add_sched_event_run(sched, prev, timestamp, delta);
+	add_sched_event_sleep(sched, prev, timestamp, prev_state);
+
+	return 0;
+}
+
+static int replay_fork_event(struct perf_sched *sched,
+			     union perf_event *event,
+			     struct machine *machine)
+{
+	struct thread *child, *parent;
+
+	child = machine__findnew_thread(machine, event->fork.pid,
+					event->fork.tid);
+	parent = machine__findnew_thread(machine, event->fork.ppid,
+					 event->fork.ptid);
+
+	if (child == NULL || parent == NULL) {
+		pr_debug("thread does not exist on fork event: child %p, parent %p\n",
+				 child, parent);
+		goto out_put;
+	}
+
+	if (verbose > 0) {
+		printf("fork event\n");
+		printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
+		printf("...  child: %s/%d\n", thread__comm_str(child), child->tid);
+	}
+
+	register_pid(sched, parent->tid, thread__comm_str(parent));
+	register_pid(sched, child->tid, thread__comm_str(child));
+out_put:
+	thread__put(child);
+	thread__put(parent);
+	return 0;
+}
+
+struct sort_dimension {
+	const char		*name;
+	sort_fn_t		cmp;
+	struct list_head	list;
+};
+
+/*
+ * handle runtime stats saved per thread
+ */
+static struct thread_runtime *thread__init_runtime(struct thread *thread)
+{
+	struct thread_runtime *r;
+
+	r = zalloc(sizeof(struct thread_runtime));
+	if (!r)
+		return NULL;
+
+	init_stats(&r->run_stats);
+	thread__set_priv(thread, r);
+
+	return r;
+}
+
+static struct thread_runtime *thread__get_runtime(struct thread *thread)
+{
+	struct thread_runtime *tr;
+
+	tr = thread__priv(thread);
+	if (tr == NULL) {
+		tr = thread__init_runtime(thread);
+		if (tr == NULL)
+			pr_debug("Failed to malloc memory for runtime data.\n");
+	}
+
+	return tr;
+}
+
+static int
+thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
+{
+	struct sort_dimension *sort;
+	int ret = 0;
+
+	BUG_ON(list_empty(list));
+
+	list_for_each_entry(sort, list, list) {
+		ret = sort->cmp(l, r);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static struct work_atoms *
+thread_atoms_search(struct rb_root *root, struct thread *thread,
+			 struct list_head *sort_list)
+{
+	struct rb_node *node = root->rb_node;
+	struct work_atoms key = { .thread = thread };
+
+	while (node) {
+		struct work_atoms *atoms;
+		int cmp;
+
+		atoms = container_of(node, struct work_atoms, node);
+
+		cmp = thread_lat_cmp(sort_list, &key, atoms);
+		if (cmp > 0)
+			node = node->rb_left;
+		else if (cmp < 0)
+			node = node->rb_right;
+		else {
+			BUG_ON(thread != atoms->thread);
+			return atoms;
+		}
+	}
+	return NULL;
+}
+
+static void
+__thread_latency_insert(struct rb_root *root, struct work_atoms *data,
+			 struct list_head *sort_list)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	while (*new) {
+		struct work_atoms *this;
+		int cmp;
+
+		this = container_of(*new, struct work_atoms, node);
+		parent = *new;
+
+		cmp = thread_lat_cmp(sort_list, data, this);
+
+		if (cmp > 0)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
+{
+	struct work_atoms *atoms = zalloc(sizeof(*atoms));
+	if (!atoms) {
+		pr_err("No memory at %s\n", __func__);
+		return -1;
+	}
+
+	atoms->thread = thread__get(thread);
+	INIT_LIST_HEAD(&atoms->work_list);
+	__thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
+	return 0;
+}
+
+static char sched_out_state(u64 prev_state)
+{
+	const char *str = TASK_STATE_TO_CHAR_STR;
+
+	return str[prev_state];
+}
+
+static int
+add_sched_out_event(struct work_atoms *atoms,
+		    char run_state,
+		    u64 timestamp)
+{
+	struct work_atom *atom = zalloc(sizeof(*atom));
+	if (!atom) {
+		pr_err("Non memory at %s", __func__);
+		return -1;
+	}
+
+	atom->sched_out_time = timestamp;
+
+	if (run_state == 'R') {
+		atom->state = THREAD_WAIT_CPU;
+		atom->wake_up_time = atom->sched_out_time;
+	}
+
+	list_add_tail(&atom->list, &atoms->work_list);
+	return 0;
+}
+
+static void
+add_runtime_event(struct work_atoms *atoms, u64 delta,
+		  u64 timestamp __maybe_unused)
+{
+	struct work_atom *atom;
+
+	BUG_ON(list_empty(&atoms->work_list));
+
+	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+
+	atom->runtime += delta;
+	atoms->total_runtime += delta;
+}
+
+static void
+add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
+{
+	struct work_atom *atom;
+	u64 delta;
+
+	if (list_empty(&atoms->work_list))
+		return;
+
+	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+
+	if (atom->state != THREAD_WAIT_CPU)
+		return;
+
+	if (timestamp < atom->wake_up_time) {
+		atom->state = THREAD_IGNORE;
+		return;
+	}
+
+	atom->state = THREAD_SCHED_IN;
+	atom->sched_in_time = timestamp;
+
+	delta = atom->sched_in_time - atom->wake_up_time;
+	atoms->total_lat += delta;
+	if (delta > atoms->max_lat) {
+		atoms->max_lat = delta;
+		atoms->max_lat_at = timestamp;
+	}
+	atoms->nb_atoms++;
+}
+
+static int latency_switch_event(struct perf_sched *sched,
+				struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct machine *machine)
+{
+	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+	struct work_atoms *out_events, *in_events;
+	struct thread *sched_out, *sched_in;
+	u64 timestamp0, timestamp = sample->time;
+	int cpu = sample->cpu, err = -1;
+	s64 delta;
+
+	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
+
+	timestamp0 = sched->cpu_last_switched[cpu];
+	sched->cpu_last_switched[cpu] = timestamp;
+	if (timestamp0)
+		delta = timestamp - timestamp0;
+	else
+		delta = 0;
+
+	if (delta < 0) {
+		pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+		return -1;
+	}
+
+	sched_out = machine__findnew_thread(machine, -1, prev_pid);
+	sched_in = machine__findnew_thread(machine, -1, next_pid);
+	if (sched_out == NULL || sched_in == NULL)
+		goto out_put;
+
+	out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
+	if (!out_events) {
+		if (thread_atoms_insert(sched, sched_out))
+			goto out_put;
+		out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
+		if (!out_events) {
+			pr_err("out-event: Internal tree error");
+			goto out_put;
+		}
+	}
+	if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
+		return -1;
+
+	in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
+	if (!in_events) {
+		if (thread_atoms_insert(sched, sched_in))
+			goto out_put;
+		in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
+		if (!in_events) {
+			pr_err("in-event: Internal tree error");
+			goto out_put;
+		}
+		/*
+		 * Take came in we have not heard about yet,
+		 * add in an initial atom in runnable state:
+		 */
+		if (add_sched_out_event(in_events, 'R', timestamp))
+			goto out_put;
+	}
+	add_sched_in_event(in_events, timestamp);
+	err = 0;
+out_put:
+	thread__put(sched_out);
+	thread__put(sched_in);
+	return err;
+}
+
+static int latency_runtime_event(struct perf_sched *sched,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample,
+				 struct machine *machine)
+{
+	const u32 pid	   = perf_evsel__intval(evsel, sample, "pid");
+	const u64 runtime  = perf_evsel__intval(evsel, sample, "runtime");
+	struct thread *thread = machine__findnew_thread(machine, -1, pid);
+	struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
+	u64 timestamp = sample->time;
+	int cpu = sample->cpu, err = -1;
+
+	if (thread == NULL)
+		return -1;
+
+	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
+	if (!atoms) {
+		if (thread_atoms_insert(sched, thread))
+			goto out_put;
+		atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
+		if (!atoms) {
+			pr_err("in-event: Internal tree error");
+			goto out_put;
+		}
+		if (add_sched_out_event(atoms, 'R', timestamp))
+			goto out_put;
+	}
+
+	add_runtime_event(atoms, runtime, timestamp);
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int latency_wakeup_event(struct perf_sched *sched,
+				struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct machine *machine)
+{
+	const u32 pid	  = perf_evsel__intval(evsel, sample, "pid");
+	struct work_atoms *atoms;
+	struct work_atom *atom;
+	struct thread *wakee;
+	u64 timestamp = sample->time;
+	int err = -1;
+
+	wakee = machine__findnew_thread(machine, -1, pid);
+	if (wakee == NULL)
+		return -1;
+	atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
+	if (!atoms) {
+		if (thread_atoms_insert(sched, wakee))
+			goto out_put;
+		atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
+		if (!atoms) {
+			pr_err("wakeup-event: Internal tree error");
+			goto out_put;
+		}
+		if (add_sched_out_event(atoms, 'S', timestamp))
+			goto out_put;
+	}
+
+	BUG_ON(list_empty(&atoms->work_list));
+
+	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+
+	/*
+	 * As we do not guarantee the wakeup event happens when
+	 * task is out of run queue, also may happen when task is
+	 * on run queue and wakeup only change ->state to TASK_RUNNING,
+	 * then we should not set the ->wake_up_time when wake up a
+	 * task which is on run queue.
+	 *
+	 * You WILL be missing events if you've recorded only
+	 * one CPU, or are only looking at only one, so don't
+	 * skip in this case.
+	 */
+	if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
+		goto out_ok;
+
+	sched->nr_timestamps++;
+	if (atom->sched_out_time > timestamp) {
+		sched->nr_unordered_timestamps++;
+		goto out_ok;
+	}
+
+	atom->state = THREAD_WAIT_CPU;
+	atom->wake_up_time = timestamp;
+out_ok:
+	err = 0;
+out_put:
+	thread__put(wakee);
+	return err;
+}
+
+static int latency_migrate_task_event(struct perf_sched *sched,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	u64 timestamp = sample->time;
+	struct work_atoms *atoms;
+	struct work_atom *atom;
+	struct thread *migrant;
+	int err = -1;
+
+	/*
+	 * Only need to worry about migration when profiling one CPU.
+	 */
+	if (sched->profile_cpu == -1)
+		return 0;
+
+	migrant = machine__findnew_thread(machine, -1, pid);
+	if (migrant == NULL)
+		return -1;
+	atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
+	if (!atoms) {
+		if (thread_atoms_insert(sched, migrant))
+			goto out_put;
+		register_pid(sched, migrant->tid, thread__comm_str(migrant));
+		atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
+		if (!atoms) {
+			pr_err("migration-event: Internal tree error");
+			goto out_put;
+		}
+		if (add_sched_out_event(atoms, 'R', timestamp))
+			goto out_put;
+	}
+
+	BUG_ON(list_empty(&atoms->work_list));
+
+	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+	atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
+
+	sched->nr_timestamps++;
+
+	if (atom->sched_out_time > timestamp)
+		sched->nr_unordered_timestamps++;
+	err = 0;
+out_put:
+	thread__put(migrant);
+	return err;
+}
+
+static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
+{
+	int i;
+	int ret;
+	u64 avg;
+	char max_lat_at[32];
+
+	if (!work_list->nb_atoms)
+		return;
+	/*
+	 * Ignore idle threads:
+	 */
+	if (!strcmp(thread__comm_str(work_list->thread), "swapper"))
+		return;
+
+	sched->all_runtime += work_list->total_runtime;
+	sched->all_count   += work_list->nb_atoms;
+
+	if (work_list->num_merged > 1)
+		ret = printf("  %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
+	else
+		ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+
+	for (i = 0; i < 24 - ret; i++)
+		printf(" ");
+
+	avg = work_list->total_lat / work_list->nb_atoms;
+	timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
+
+	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
+	      (double)work_list->total_runtime / NSEC_PER_MSEC,
+		 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
+		 (double)work_list->max_lat / NSEC_PER_MSEC,
+		 max_lat_at);
+}
+
+static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+	if (l->thread == r->thread)
+		return 0;
+	if (l->thread->tid < r->thread->tid)
+		return -1;
+	if (l->thread->tid > r->thread->tid)
+		return 1;
+	return (int)(l->thread - r->thread);
+}
+
+static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+	u64 avgl, avgr;
+
+	if (!l->nb_atoms)
+		return -1;
+
+	if (!r->nb_atoms)
+		return 1;
+
+	avgl = l->total_lat / l->nb_atoms;
+	avgr = r->total_lat / r->nb_atoms;
+
+	if (avgl < avgr)
+		return -1;
+	if (avgl > avgr)
+		return 1;
+
+	return 0;
+}
+
+static int max_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+	if (l->max_lat < r->max_lat)
+		return -1;
+	if (l->max_lat > r->max_lat)
+		return 1;
+
+	return 0;
+}
+
+static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+	if (l->nb_atoms < r->nb_atoms)
+		return -1;
+	if (l->nb_atoms > r->nb_atoms)
+		return 1;
+
+	return 0;
+}
+
+static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+	if (l->total_runtime < r->total_runtime)
+		return -1;
+	if (l->total_runtime > r->total_runtime)
+		return 1;
+
+	return 0;
+}
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+	size_t i;
+	static struct sort_dimension avg_sort_dimension = {
+		.name = "avg",
+		.cmp  = avg_cmp,
+	};
+	static struct sort_dimension max_sort_dimension = {
+		.name = "max",
+		.cmp  = max_cmp,
+	};
+	static struct sort_dimension pid_sort_dimension = {
+		.name = "pid",
+		.cmp  = pid_cmp,
+	};
+	static struct sort_dimension runtime_sort_dimension = {
+		.name = "runtime",
+		.cmp  = runtime_cmp,
+	};
+	static struct sort_dimension switch_sort_dimension = {
+		.name = "switch",
+		.cmp  = switch_cmp,
+	};
+	struct sort_dimension *available_sorts[] = {
+		&pid_sort_dimension,
+		&avg_sort_dimension,
+		&max_sort_dimension,
+		&switch_sort_dimension,
+		&runtime_sort_dimension,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
+		if (!strcmp(available_sorts[i]->name, tok)) {
+			list_add_tail(&available_sorts[i]->list, list);
+
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static void perf_sched__sort_lat(struct perf_sched *sched)
+{
+	struct rb_node *node;
+	struct rb_root *root = &sched->atom_root;
+again:
+	for (;;) {
+		struct work_atoms *data;
+		node = rb_first(root);
+		if (!node)
+			break;
+
+		rb_erase(node, root);
+		data = rb_entry(node, struct work_atoms, node);
+		__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
+	}
+	if (root == &sched->atom_root) {
+		root = &sched->merged_atom_root;
+		goto again;
+	}
+}
+
+static int process_sched_wakeup_event(struct perf_tool *tool,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	if (sched->tp_handler->wakeup_event)
+		return sched->tp_handler->wakeup_event(sched, evsel, sample, machine);
+
+	return 0;
+}
+
+union map_priv {
+	void	*ptr;
+	bool	 color;
+};
+
+static bool thread__has_color(struct thread *thread)
+{
+	union map_priv priv = {
+		.ptr = thread__priv(thread),
+	};
+
+	return priv.color;
+}
+
+static struct thread*
+map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
+{
+	struct thread *thread = machine__findnew_thread(machine, pid, tid);
+	union map_priv priv = {
+		.color = false,
+	};
+
+	if (!sched->map.color_pids || !thread || thread__priv(thread))
+		return thread;
+
+	if (thread_map__has(sched->map.color_pids, tid))
+		priv.color = true;
+
+	thread__set_priv(thread, priv.ptr);
+	return thread;
+}
+
+static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine)
+{
+	const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	struct thread *sched_in;
+	struct thread_runtime *tr;
+	int new_shortname;
+	u64 timestamp0, timestamp = sample->time;
+	s64 delta;
+	int i, this_cpu = sample->cpu;
+	int cpus_nr;
+	bool new_cpu = false;
+	const char *color = PERF_COLOR_NORMAL;
+	char stimestamp[32];
+
+	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
+
+	if (this_cpu > sched->max_cpu)
+		sched->max_cpu = this_cpu;
+
+	if (sched->map.comp) {
+		cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
+		if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+			sched->map.comp_cpus[cpus_nr++] = this_cpu;
+			new_cpu = true;
+		}
+	} else
+		cpus_nr = sched->max_cpu;
+
+	timestamp0 = sched->cpu_last_switched[this_cpu];
+	sched->cpu_last_switched[this_cpu] = timestamp;
+	if (timestamp0)
+		delta = timestamp - timestamp0;
+	else
+		delta = 0;
+
+	if (delta < 0) {
+		pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+		return -1;
+	}
+
+	sched_in = map__findnew_thread(sched, machine, -1, next_pid);
+	if (sched_in == NULL)
+		return -1;
+
+	tr = thread__get_runtime(sched_in);
+	if (tr == NULL) {
+		thread__put(sched_in);
+		return -1;
+	}
+
+	sched->curr_thread[this_cpu] = thread__get(sched_in);
+
+	printf("  ");
+
+	new_shortname = 0;
+	if (!tr->shortname[0]) {
+		if (!strcmp(thread__comm_str(sched_in), "swapper")) {
+			/*
+			 * Don't allocate a letter-number for swapper:0
+			 * as a shortname. Instead, we use '.' for it.
+			 */
+			tr->shortname[0] = '.';
+			tr->shortname[1] = ' ';
+		} else {
+			tr->shortname[0] = sched->next_shortname1;
+			tr->shortname[1] = sched->next_shortname2;
+
+			if (sched->next_shortname1 < 'Z') {
+				sched->next_shortname1++;
+			} else {
+				sched->next_shortname1 = 'A';
+				if (sched->next_shortname2 < '9')
+					sched->next_shortname2++;
+				else
+					sched->next_shortname2 = '0';
+			}
+		}
+		new_shortname = 1;
+	}
+
+	for (i = 0; i < cpus_nr; i++) {
+		int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
+		struct thread *curr_thread = sched->curr_thread[cpu];
+		struct thread_runtime *curr_tr;
+		const char *pid_color = color;
+		const char *cpu_color = color;
+
+		if (curr_thread && thread__has_color(curr_thread))
+			pid_color = COLOR_PIDS;
+
+		if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+			continue;
+
+		if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+			cpu_color = COLOR_CPUS;
+
+		if (cpu != this_cpu)
+			color_fprintf(stdout, color, " ");
+		else
+			color_fprintf(stdout, cpu_color, "*");
+
+		if (sched->curr_thread[cpu]) {
+			curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
+			if (curr_tr == NULL) {
+				thread__put(sched_in);
+				return -1;
+			}
+			color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
+		} else
+			color_fprintf(stdout, color, "   ");
+	}
+
+	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+		goto out;
+
+	timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
+	color_fprintf(stdout, color, "  %12s secs ", stimestamp);
+	if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
+		const char *pid_color = color;
+
+		if (thread__has_color(sched_in))
+			pid_color = COLOR_PIDS;
+
+		color_fprintf(stdout, pid_color, "%s => %s:%d",
+		       tr->shortname, thread__comm_str(sched_in), sched_in->tid);
+		tr->comm_changed = false;
+	}
+
+	if (sched->map.comp && new_cpu)
+		color_fprintf(stdout, color, " (CPU %d)", this_cpu);
+
+out:
+	color_fprintf(stdout, color, "\n");
+
+	thread__put(sched_in);
+
+	return 0;
+}
+
+static int process_sched_switch_event(struct perf_tool *tool,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	int this_cpu = sample->cpu, err = 0;
+	u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+	    next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+
+	if (sched->curr_pid[this_cpu] != (u32)-1) {
+		/*
+		 * Are we trying to switch away a PID that is
+		 * not current?
+		 */
+		if (sched->curr_pid[this_cpu] != prev_pid)
+			sched->nr_context_switch_bugs++;
+	}
+
+	if (sched->tp_handler->switch_event)
+		err = sched->tp_handler->switch_event(sched, evsel, sample, machine);
+
+	sched->curr_pid[this_cpu] = next_pid;
+	return err;
+}
+
+static int process_sched_runtime_event(struct perf_tool *tool,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	if (sched->tp_handler->runtime_event)
+		return sched->tp_handler->runtime_event(sched, evsel, sample, machine);
+
+	return 0;
+}
+
+static int perf_sched__process_fork_event(struct perf_tool *tool,
+					  union perf_event *event,
+					  struct perf_sample *sample,
+					  struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	/* run the fork event through the perf machineruy */
+	perf_event__process_fork(tool, event, sample, machine);
+
+	/* and then run additional processing needed for this command */
+	if (sched->tp_handler->fork_event)
+		return sched->tp_handler->fork_event(sched, event, machine);
+
+	return 0;
+}
+
+static int process_sched_migrate_task_event(struct perf_tool *tool,
+					    struct perf_evsel *evsel,
+					    struct perf_sample *sample,
+					    struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	if (sched->tp_handler->migrate_task_event)
+		return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine);
+
+	return 0;
+}
+
+typedef int (*tracepoint_handler)(struct perf_tool *tool,
+				  struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct machine *machine);
+
+static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused,
+						 union perf_event *event __maybe_unused,
+						 struct perf_sample *sample,
+						 struct perf_evsel *evsel,
+						 struct machine *machine)
+{
+	int err = 0;
+
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
+		err = f(tool, evsel, sample, machine);
+	}
+
+	return err;
+}
+
+static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	struct thread *thread;
+	struct thread_runtime *tr;
+	int err;
+
+	err = perf_event__process_comm(tool, event, sample, machine);
+	if (err)
+		return err;
+
+	thread = machine__find_thread(machine, sample->pid, sample->tid);
+	if (!thread) {
+		pr_err("Internal error: can't find thread\n");
+		return -1;
+	}
+
+	tr = thread__get_runtime(thread);
+	if (tr == NULL) {
+		thread__put(thread);
+		return -1;
+	}
+
+	tr->comm_changed = true;
+	thread__put(thread);
+
+	return 0;
+}
+
+static int perf_sched__read_events(struct perf_sched *sched)
+{
+	const struct perf_evsel_str_handler handlers[] = {
+		{ "sched:sched_switch",	      process_sched_switch_event, },
+		{ "sched:sched_stat_runtime", process_sched_runtime_event, },
+		{ "sched:sched_wakeup",	      process_sched_wakeup_event, },
+		{ "sched:sched_wakeup_new",   process_sched_wakeup_event, },
+		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
+	};
+	struct perf_session *session;
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = sched->force,
+	};
+	int rc = -1;
+
+	session = perf_session__new(&data, false, &sched->tool);
+	if (session == NULL) {
+		pr_debug("No Memory for session\n");
+		return -1;
+	}
+
+	symbol__init(&session->header.env);
+
+	if (perf_session__set_tracepoints_handlers(session, handlers))
+		goto out_delete;
+
+	if (perf_session__has_traces(session, "record -R")) {
+		int err = perf_session__process_events(session);
+		if (err) {
+			pr_err("Failed to process events, error %d", err);
+			goto out_delete;
+		}
+
+		sched->nr_events      = session->evlist->stats.nr_events[0];
+		sched->nr_lost_events = session->evlist->stats.total_lost;
+		sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
+	}
+
+	rc = 0;
+out_delete:
+	perf_session__delete(session);
+	return rc;
+}
+
+/*
+ * scheduling times are printed as msec.usec
+ */
+static inline void print_sched_time(unsigned long long nsecs, int width)
+{
+	unsigned long msecs;
+	unsigned long usecs;
+
+	msecs  = nsecs / NSEC_PER_MSEC;
+	nsecs -= msecs * NSEC_PER_MSEC;
+	usecs  = nsecs / NSEC_PER_USEC;
+	printf("%*lu.%03lu ", width, msecs, usecs);
+}
+
+/*
+ * returns runtime data for event, allocating memory for it the
+ * first time it is used.
+ */
+static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel)
+{
+	struct evsel_runtime *r = evsel->priv;
+
+	if (r == NULL) {
+		r = zalloc(sizeof(struct evsel_runtime));
+		evsel->priv = r;
+	}
+
+	return r;
+}
+
+/*
+ * save last time event was seen per cpu
+ */
+static void perf_evsel__save_time(struct perf_evsel *evsel,
+				  u64 timestamp, u32 cpu)
+{
+	struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+
+	if (r == NULL)
+		return;
+
+	if ((cpu >= r->ncpu) || (r->last_time == NULL)) {
+		int i, n = __roundup_pow_of_two(cpu+1);
+		void *p = r->last_time;
+
+		p = realloc(r->last_time, n * sizeof(u64));
+		if (!p)
+			return;
+
+		r->last_time = p;
+		for (i = r->ncpu; i < n; ++i)
+			r->last_time[i] = (u64) 0;
+
+		r->ncpu = n;
+	}
+
+	r->last_time[cpu] = timestamp;
+}
+
+/* returns last time this event was seen on the given cpu */
+static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu)
+{
+	struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+
+	if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
+		return 0;
+
+	return r->last_time[cpu];
+}
+
+static int comm_width = 30;
+
+static char *timehist_get_commstr(struct thread *thread)
+{
+	static char str[32];
+	const char *comm = thread__comm_str(thread);
+	pid_t tid = thread->tid;
+	pid_t pid = thread->pid_;
+	int n;
+
+	if (pid == 0)
+		n = scnprintf(str, sizeof(str), "%s", comm);
+
+	else if (tid != pid)
+		n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid);
+
+	else
+		n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid);
+
+	if (n > comm_width)
+		comm_width = n;
+
+	return str;
+}
+
+static void timehist_header(struct perf_sched *sched)
+{
+	u32 ncpus = sched->max_cpu + 1;
+	u32 i, j;
+
+	printf("%15s %6s ", "time", "cpu");
+
+	if (sched->show_cpu_visual) {
+		printf(" ");
+		for (i = 0, j = 0; i < ncpus; ++i) {
+			printf("%x", j++);
+			if (j > 15)
+				j = 0;
+		}
+		printf(" ");
+	}
+
+	printf(" %-*s  %9s  %9s  %9s", comm_width,
+		"task name", "wait time", "sch delay", "run time");
+
+	if (sched->show_state)
+		printf("  %s", "state");
+
+	printf("\n");
+
+	/*
+	 * units row
+	 */
+	printf("%15s %-6s ", "", "");
+
+	if (sched->show_cpu_visual)
+		printf(" %*s ", ncpus, "");
+
+	printf(" %-*s  %9s  %9s  %9s", comm_width,
+	       "[tid/pid]", "(msec)", "(msec)", "(msec)");
+
+	if (sched->show_state)
+		printf("  %5s", "");
+
+	printf("\n");
+
+	/*
+	 * separator
+	 */
+	printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line);
+
+	if (sched->show_cpu_visual)
+		printf(" %.*s ", ncpus, graph_dotted_line);
+
+	printf(" %.*s  %.9s  %.9s  %.9s", comm_width,
+		graph_dotted_line, graph_dotted_line, graph_dotted_line,
+		graph_dotted_line);
+
+	if (sched->show_state)
+		printf("  %.5s", graph_dotted_line);
+
+	printf("\n");
+}
+
+static char task_state_char(struct thread *thread, int state)
+{
+	static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+	unsigned bit = state ? ffs(state) : 0;
+
+	/* 'I' for idle */
+	if (thread->tid == 0)
+		return 'I';
+
+	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
+static void timehist_print_sample(struct perf_sched *sched,
+				  struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct addr_location *al,
+				  struct thread *thread,
+				  u64 t, int state)
+{
+	struct thread_runtime *tr = thread__priv(thread);
+	const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
+	const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	u32 max_cpus = sched->max_cpu + 1;
+	char tstr[64];
+	char nstr[30];
+	u64 wait_time;
+
+	timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
+	printf("%15s [%04d] ", tstr, sample->cpu);
+
+	if (sched->show_cpu_visual) {
+		u32 i;
+		char c;
+
+		printf(" ");
+		for (i = 0; i < max_cpus; ++i) {
+			/* flag idle times with 'i'; others are sched events */
+			if (i == sample->cpu)
+				c = (thread->tid == 0) ? 'i' : 's';
+			else
+				c = ' ';
+			printf("%c", c);
+		}
+		printf(" ");
+	}
+
+	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+	wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt;
+	print_sched_time(wait_time, 6);
+
+	print_sched_time(tr->dt_delay, 6);
+	print_sched_time(tr->dt_run, 6);
+
+	if (sched->show_state)
+		printf(" %5c ", task_state_char(thread, state));
+
+	if (sched->show_next) {
+		snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid);
+		printf(" %-*s", comm_width, nstr);
+	}
+
+	if (sched->show_wakeups && !sched->show_next)
+		printf("  %-*s", comm_width, "");
+
+	if (thread->tid == 0)
+		goto out;
+
+	if (sched->show_callchain)
+		printf("  ");
+
+	sample__fprintf_sym(sample, al, 0,
+			    EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
+			    EVSEL__PRINT_CALLCHAIN_ARROW |
+			    EVSEL__PRINT_SKIP_IGNORED,
+			    &callchain_cursor, stdout);
+
+out:
+	printf("\n");
+}
+
+/*
+ * Explanation of delta-time stats:
+ *
+ *            t = time of current schedule out event
+ *        tprev = time of previous sched out event
+ *                also time of schedule-in event for current task
+ *    last_time = time of last sched change event for current task
+ *                (i.e, time process was last scheduled out)
+ * ready_to_run = time of wakeup for current task
+ *
+ * -----|------------|------------|------------|------
+ *    last         ready        tprev          t
+ *    time         to run
+ *
+ *      |-------- dt_wait --------|
+ *                   |- dt_delay -|-- dt_run --|
+ *
+ *   dt_run = run time of current task
+ *  dt_wait = time between last schedule out event for task and tprev
+ *            represents time spent off the cpu
+ * dt_delay = time between wakeup and schedule-in of task
+ */
+
+static void timehist_update_runtime_stats(struct thread_runtime *r,
+					 u64 t, u64 tprev)
+{
+	r->dt_delay   = 0;
+	r->dt_sleep   = 0;
+	r->dt_iowait  = 0;
+	r->dt_preempt = 0;
+	r->dt_run     = 0;
+
+	if (tprev) {
+		r->dt_run = t - tprev;
+		if (r->ready_to_run) {
+			if (r->ready_to_run > tprev)
+				pr_debug("time travel: wakeup time for task > previous sched_switch event\n");
+			else
+				r->dt_delay = tprev - r->ready_to_run;
+		}
+
+		if (r->last_time > tprev)
+			pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
+		else if (r->last_time) {
+			u64 dt_wait = tprev - r->last_time;
+
+			if (r->last_state == TASK_RUNNING)
+				r->dt_preempt = dt_wait;
+			else if (r->last_state == TASK_UNINTERRUPTIBLE)
+				r->dt_iowait = dt_wait;
+			else
+				r->dt_sleep = dt_wait;
+		}
+	}
+
+	update_stats(&r->run_stats, r->dt_run);
+
+	r->total_run_time     += r->dt_run;
+	r->total_delay_time   += r->dt_delay;
+	r->total_sleep_time   += r->dt_sleep;
+	r->total_iowait_time  += r->dt_iowait;
+	r->total_preempt_time += r->dt_preempt;
+}
+
+static bool is_idle_sample(struct perf_sample *sample,
+			   struct perf_evsel *evsel)
+{
+	/* pid 0 == swapper == idle task */
+	if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
+		return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
+
+	return sample->pid == 0;
+}
+
+static void save_task_callchain(struct perf_sched *sched,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct callchain_cursor *cursor = &callchain_cursor;
+	struct thread *thread;
+
+	/* want main thread for process - has maps */
+	thread = machine__findnew_thread(machine, sample->pid, sample->pid);
+	if (thread == NULL) {
+		pr_debug("Failed to get thread for pid %d.\n", sample->pid);
+		return;
+	}
+
+	if (!symbol_conf.use_callchain || sample->callchain == NULL)
+		return;
+
+	if (thread__resolve_callchain(thread, cursor, evsel, sample,
+				      NULL, NULL, sched->max_stack + 2) != 0) {
+		if (verbose > 0)
+			pr_err("Failed to resolve callchain. Skipping\n");
+
+		return;
+	}
+
+	callchain_cursor_commit(cursor);
+
+	while (true) {
+		struct callchain_cursor_node *node;
+		struct symbol *sym;
+
+		node = callchain_cursor_current(cursor);
+		if (node == NULL)
+			break;
+
+		sym = node->sym;
+		if (sym) {
+			if (!strcmp(sym->name, "schedule") ||
+			    !strcmp(sym->name, "__schedule") ||
+			    !strcmp(sym->name, "preempt_schedule"))
+				sym->ignore = 1;
+		}
+
+		callchain_cursor_advance(cursor);
+	}
+}
+
+static int init_idle_thread(struct thread *thread)
+{
+	struct idle_thread_runtime *itr;
+
+	thread__set_comm(thread, idle_comm, 0);
+
+	itr = zalloc(sizeof(*itr));
+	if (itr == NULL)
+		return -ENOMEM;
+
+	init_stats(&itr->tr.run_stats);
+	callchain_init(&itr->callchain);
+	callchain_cursor_reset(&itr->cursor);
+	thread__set_priv(thread, itr);
+
+	return 0;
+}
+
+/*
+ * Track idle stats per cpu by maintaining a local thread
+ * struct for the idle task on each cpu.
+ */
+static int init_idle_threads(int ncpu)
+{
+	int i, ret;
+
+	idle_threads = zalloc(ncpu * sizeof(struct thread *));
+	if (!idle_threads)
+		return -ENOMEM;
+
+	idle_max_cpu = ncpu;
+
+	/* allocate the actual thread struct if needed */
+	for (i = 0; i < ncpu; ++i) {
+		idle_threads[i] = thread__new(0, 0);
+		if (idle_threads[i] == NULL)
+			return -ENOMEM;
+
+		ret = init_idle_thread(idle_threads[i]);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void free_idle_threads(void)
+{
+	int i;
+
+	if (idle_threads == NULL)
+		return;
+
+	for (i = 0; i < idle_max_cpu; ++i) {
+		if ((idle_threads[i]))
+			thread__delete(idle_threads[i]);
+	}
+
+	free(idle_threads);
+}
+
+static struct thread *get_idle_thread(int cpu)
+{
+	/*
+	 * expand/allocate array of pointers to local thread
+	 * structs if needed
+	 */
+	if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) {
+		int i, j = __roundup_pow_of_two(cpu+1);
+		void *p;
+
+		p = realloc(idle_threads, j * sizeof(struct thread *));
+		if (!p)
+			return NULL;
+
+		idle_threads = (struct thread **) p;
+		for (i = idle_max_cpu; i < j; ++i)
+			idle_threads[i] = NULL;
+
+		idle_max_cpu = j;
+	}
+
+	/* allocate a new thread struct if needed */
+	if (idle_threads[cpu] == NULL) {
+		idle_threads[cpu] = thread__new(0, 0);
+
+		if (idle_threads[cpu]) {
+			if (init_idle_thread(idle_threads[cpu]) < 0)
+				return NULL;
+		}
+	}
+
+	return idle_threads[cpu];
+}
+
+static void save_idle_callchain(struct idle_thread_runtime *itr,
+				struct perf_sample *sample)
+{
+	if (!symbol_conf.use_callchain || sample->callchain == NULL)
+		return;
+
+	callchain_cursor__copy(&itr->cursor, &callchain_cursor);
+}
+
+static struct thread *timehist_get_thread(struct perf_sched *sched,
+					  struct perf_sample *sample,
+					  struct machine *machine,
+					  struct perf_evsel *evsel)
+{
+	struct thread *thread;
+
+	if (is_idle_sample(sample, evsel)) {
+		thread = get_idle_thread(sample->cpu);
+		if (thread == NULL)
+			pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
+
+	} else {
+		/* there were samples with tid 0 but non-zero pid */
+		thread = machine__findnew_thread(machine, sample->pid,
+						 sample->tid ?: sample->pid);
+		if (thread == NULL) {
+			pr_debug("Failed to get thread for tid %d. skipping sample.\n",
+				 sample->tid);
+		}
+
+		save_task_callchain(sched, sample, evsel, machine);
+		if (sched->idle_hist) {
+			struct thread *idle;
+			struct idle_thread_runtime *itr;
+
+			idle = get_idle_thread(sample->cpu);
+			if (idle == NULL) {
+				pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
+				return NULL;
+			}
+
+			itr = thread__priv(idle);
+			if (itr == NULL)
+				return NULL;
+
+			itr->last_thread = thread;
+
+			/* copy task callchain when entering to idle */
+			if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
+				save_idle_callchain(itr, sample);
+		}
+	}
+
+	return thread;
+}
+
+static bool timehist_skip_sample(struct perf_sched *sched,
+				 struct thread *thread,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample)
+{
+	bool rc = false;
+
+	if (thread__is_filtered(thread)) {
+		rc = true;
+		sched->skipped_samples++;
+	}
+
+	if (sched->idle_hist) {
+		if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
+			rc = true;
+		else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
+			 perf_evsel__intval(evsel, sample, "next_pid") != 0)
+			rc = true;
+	}
+
+	return rc;
+}
+
+static void timehist_print_wakeup_event(struct perf_sched *sched,
+					struct perf_evsel *evsel,
+					struct perf_sample *sample,
+					struct machine *machine,
+					struct thread *awakened)
+{
+	struct thread *thread;
+	char tstr[64];
+
+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	if (thread == NULL)
+		return;
+
+	/* show wakeup unless both awakee and awaker are filtered */
+	if (timehist_skip_sample(sched, thread, evsel, sample) &&
+	    timehist_skip_sample(sched, awakened, evsel, sample)) {
+		return;
+	}
+
+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+	printf("%15s [%04d] ", tstr, sample->cpu);
+	if (sched->show_cpu_visual)
+		printf(" %*s ", sched->max_cpu + 1, "");
+
+	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+	/* dt spacer */
+	printf("  %9s  %9s  %9s ", "", "", "");
+
+	printf("awakened: %s", timehist_get_commstr(awakened));
+
+	printf("\n");
+}
+
+static int timehist_sched_wakeup_event(struct perf_tool *tool,
+				       union perf_event *event __maybe_unused,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct thread *thread;
+	struct thread_runtime *tr = NULL;
+	/* want pid of awakened task not pid in sample */
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+	thread = machine__findnew_thread(machine, 0, pid);
+	if (thread == NULL)
+		return -1;
+
+	tr = thread__get_runtime(thread);
+	if (tr == NULL)
+		return -1;
+
+	if (tr->ready_to_run == 0)
+		tr->ready_to_run = sample->time;
+
+	/* show wakeups if requested */
+	if (sched->show_wakeups &&
+	    !perf_time__skip_sample(&sched->ptime, sample->time))
+		timehist_print_wakeup_event(sched, evsel, sample, machine, thread);
+
+	return 0;
+}
+
+static void timehist_print_migration_event(struct perf_sched *sched,
+					struct perf_evsel *evsel,
+					struct perf_sample *sample,
+					struct machine *machine,
+					struct thread *migrated)
+{
+	struct thread *thread;
+	char tstr[64];
+	u32 max_cpus = sched->max_cpu + 1;
+	u32 ocpu, dcpu;
+
+	if (sched->summary_only)
+		return;
+
+	max_cpus = sched->max_cpu + 1;
+	ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
+	dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
+
+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	if (thread == NULL)
+		return;
+
+	if (timehist_skip_sample(sched, thread, evsel, sample) &&
+	    timehist_skip_sample(sched, migrated, evsel, sample)) {
+		return;
+	}
+
+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+	printf("%15s [%04d] ", tstr, sample->cpu);
+
+	if (sched->show_cpu_visual) {
+		u32 i;
+		char c;
+
+		printf("  ");
+		for (i = 0; i < max_cpus; ++i) {
+			c = (i == sample->cpu) ? 'm' : ' ';
+			printf("%c", c);
+		}
+		printf("  ");
+	}
+
+	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+	/* dt spacer */
+	printf("  %9s  %9s  %9s ", "", "", "");
+
+	printf("migrated: %s", timehist_get_commstr(migrated));
+	printf(" cpu %d => %d", ocpu, dcpu);
+
+	printf("\n");
+}
+
+static int timehist_migrate_task_event(struct perf_tool *tool,
+				       union perf_event *event __maybe_unused,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct thread *thread;
+	struct thread_runtime *tr = NULL;
+	/* want pid of migrated task not pid in sample */
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+	thread = machine__findnew_thread(machine, 0, pid);
+	if (thread == NULL)
+		return -1;
+
+	tr = thread__get_runtime(thread);
+	if (tr == NULL)
+		return -1;
+
+	tr->migrations++;
+
+	/* show migrations if requested */
+	timehist_print_migration_event(sched, evsel, sample, machine, thread);
+
+	return 0;
+}
+
+static int timehist_sched_change_event(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct perf_time_interval *ptime = &sched->ptime;
+	struct addr_location al;
+	struct thread *thread;
+	struct thread_runtime *tr = NULL;
+	u64 tprev, t = sample->time;
+	int rc = 0;
+	int state = perf_evsel__intval(evsel, sample, "prev_state");
+
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_err("problem processing %d event. skipping it\n",
+		       event->header.type);
+		rc = -1;
+		goto out;
+	}
+
+	thread = timehist_get_thread(sched, sample, machine, evsel);
+	if (thread == NULL) {
+		rc = -1;
+		goto out;
+	}
+
+	if (timehist_skip_sample(sched, thread, evsel, sample))
+		goto out;
+
+	tr = thread__get_runtime(thread);
+	if (tr == NULL) {
+		rc = -1;
+		goto out;
+	}
+
+	tprev = perf_evsel__get_time(evsel, sample->cpu);
+
+	/*
+	 * If start time given:
+	 * - sample time is under window user cares about - skip sample
+	 * - tprev is under window user cares about  - reset to start of window
+	 */
+	if (ptime->start && ptime->start > t)
+		goto out;
+
+	if (tprev && ptime->start > tprev)
+		tprev = ptime->start;
+
+	/*
+	 * If end time given:
+	 * - previous sched event is out of window - we are done
+	 * - sample time is beyond window user cares about - reset it
+	 *   to close out stats for time window interest
+	 */
+	if (ptime->end) {
+		if (tprev > ptime->end)
+			goto out;
+
+		if (t > ptime->end)
+			t = ptime->end;
+	}
+
+	if (!sched->idle_hist || thread->tid == 0) {
+		timehist_update_runtime_stats(tr, t, tprev);
+
+		if (sched->idle_hist) {
+			struct idle_thread_runtime *itr = (void *)tr;
+			struct thread_runtime *last_tr;
+
+			BUG_ON(thread->tid != 0);
+
+			if (itr->last_thread == NULL)
+				goto out;
+
+			/* add current idle time as last thread's runtime */
+			last_tr = thread__get_runtime(itr->last_thread);
+			if (last_tr == NULL)
+				goto out;
+
+			timehist_update_runtime_stats(last_tr, t, tprev);
+			/*
+			 * remove delta time of last thread as it's not updated
+			 * and otherwise it will show an invalid value next
+			 * time.  we only care total run time and run stat.
+			 */
+			last_tr->dt_run = 0;
+			last_tr->dt_delay = 0;
+			last_tr->dt_sleep = 0;
+			last_tr->dt_iowait = 0;
+			last_tr->dt_preempt = 0;
+
+			if (itr->cursor.nr)
+				callchain_append(&itr->callchain, &itr->cursor, t - tprev);
+
+			itr->last_thread = NULL;
+		}
+	}
+
+	if (!sched->summary_only)
+		timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
+
+out:
+	if (sched->hist_time.start == 0 && t >= ptime->start)
+		sched->hist_time.start = t;
+	if (ptime->end == 0 || t <= ptime->end)
+		sched->hist_time.end = t;
+
+	if (tr) {
+		/* time of this sched_switch event becomes last time task seen */
+		tr->last_time = sample->time;
+
+		/* last state is used to determine where to account wait time */
+		tr->last_state = state;
+
+		/* sched out event for task so reset ready to run time */
+		tr->ready_to_run = 0;
+	}
+
+	perf_evsel__save_time(evsel, sample->time, sample->cpu);
+
+	return rc;
+}
+
+static int timehist_sched_switch_event(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_evsel *evsel,
+			     struct perf_sample *sample,
+			     struct machine *machine __maybe_unused)
+{
+	return timehist_sched_change_event(tool, event, evsel, sample, machine);
+}
+
+static int process_lost(struct perf_tool *tool __maybe_unused,
+			union perf_event *event,
+			struct perf_sample *sample,
+			struct machine *machine __maybe_unused)
+{
+	char tstr[64];
+
+	timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+	printf("%15s ", tstr);
+	printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
+
+	return 0;
+}
+
+
+static void print_thread_runtime(struct thread *t,
+				 struct thread_runtime *r)
+{
+	double mean = avg_stats(&r->run_stats);
+	float stddev;
+
+	printf("%*s   %5d  %9" PRIu64 " ",
+	       comm_width, timehist_get_commstr(t), t->ppid,
+	       (u64) r->run_stats.n);
+
+	print_sched_time(r->total_run_time, 8);
+	stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean);
+	print_sched_time(r->run_stats.min, 6);
+	printf(" ");
+	print_sched_time((u64) mean, 6);
+	printf(" ");
+	print_sched_time(r->run_stats.max, 6);
+	printf("  ");
+	printf("%5.2f", stddev);
+	printf("   %5" PRIu64, r->migrations);
+	printf("\n");
+}
+
+static void print_thread_waittime(struct thread *t,
+				  struct thread_runtime *r)
+{
+	printf("%*s   %5d  %9" PRIu64 " ",
+	       comm_width, timehist_get_commstr(t), t->ppid,
+	       (u64) r->run_stats.n);
+
+	print_sched_time(r->total_run_time, 8);
+	print_sched_time(r->total_sleep_time, 6);
+	printf(" ");
+	print_sched_time(r->total_iowait_time, 6);
+	printf(" ");
+	print_sched_time(r->total_preempt_time, 6);
+	printf(" ");
+	print_sched_time(r->total_delay_time, 6);
+	printf("\n");
+}
+
+struct total_run_stats {
+	struct perf_sched *sched;
+	u64  sched_count;
+	u64  task_count;
+	u64  total_run_time;
+};
+
+static int __show_thread_runtime(struct thread *t, void *priv)
+{
+	struct total_run_stats *stats = priv;
+	struct thread_runtime *r;
+
+	if (thread__is_filtered(t))
+		return 0;
+
+	r = thread__priv(t);
+	if (r && r->run_stats.n) {
+		stats->task_count++;
+		stats->sched_count += r->run_stats.n;
+		stats->total_run_time += r->total_run_time;
+
+		if (stats->sched->show_state)
+			print_thread_waittime(t, r);
+		else
+			print_thread_runtime(t, r);
+	}
+
+	return 0;
+}
+
+static int show_thread_runtime(struct thread *t, void *priv)
+{
+	if (t->dead)
+		return 0;
+
+	return __show_thread_runtime(t, priv);
+}
+
+static int show_deadthread_runtime(struct thread *t, void *priv)
+{
+	if (!t->dead)
+		return 0;
+
+	return __show_thread_runtime(t, priv);
+}
+
+static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+	const char *sep = " <- ";
+	struct callchain_list *chain;
+	size_t ret = 0;
+	char bf[1024];
+	bool first;
+
+	if (node == NULL)
+		return 0;
+
+	ret = callchain__fprintf_folded(fp, node->parent);
+	first = (ret == 0);
+
+	list_for_each_entry(chain, &node->val, list) {
+		if (chain->ip >= PERF_CONTEXT_MAX)
+			continue;
+		if (chain->ms.sym && chain->ms.sym->ignore)
+			continue;
+		ret += fprintf(fp, "%s%s", first ? "" : sep,
+			       callchain_list__sym_name(chain, bf, sizeof(bf),
+							false));
+		first = false;
+	}
+
+	return ret;
+}
+
+static size_t timehist_print_idlehist_callchain(struct rb_root *root)
+{
+	size_t ret = 0;
+	FILE *fp = stdout;
+	struct callchain_node *chain;
+	struct rb_node *rb_node = rb_first(root);
+
+	printf("  %16s  %8s  %s\n", "Idle time (msec)", "Count", "Callchains");
+	printf("  %.16s  %.8s  %.50s\n", graph_dotted_line, graph_dotted_line,
+	       graph_dotted_line);
+
+	while (rb_node) {
+		chain = rb_entry(rb_node, struct callchain_node, rb_node);
+		rb_node = rb_next(rb_node);
+
+		ret += fprintf(fp, "  ");
+		print_sched_time(chain->hit, 12);
+		ret += 16;  /* print_sched_time returns 2nd arg + 4 */
+		ret += fprintf(fp, " %8d  ", chain->count);
+		ret += callchain__fprintf_folded(fp, chain);
+		ret += fprintf(fp, "\n");
+	}
+
+	return ret;
+}
+
+static void timehist_print_summary(struct perf_sched *sched,
+				   struct perf_session *session)
+{
+	struct machine *m = &session->machines.host;
+	struct total_run_stats totals;
+	u64 task_count;
+	struct thread *t;
+	struct thread_runtime *r;
+	int i;
+	u64 hist_time = sched->hist_time.end - sched->hist_time.start;
+
+	memset(&totals, 0, sizeof(totals));
+	totals.sched = sched;
+
+	if (sched->idle_hist) {
+		printf("\nIdle-time summary\n");
+		printf("%*s  parent  sched-out  ", comm_width, "comm");
+		printf("  idle-time   min-idle    avg-idle    max-idle  stddev  migrations\n");
+	} else if (sched->show_state) {
+		printf("\nWait-time summary\n");
+		printf("%*s  parent   sched-in  ", comm_width, "comm");
+		printf("   run-time      sleep      iowait     preempt       delay\n");
+	} else {
+		printf("\nRuntime summary\n");
+		printf("%*s  parent   sched-in  ", comm_width, "comm");
+		printf("   run-time    min-run     avg-run     max-run  stddev  migrations\n");
+	}
+	printf("%*s            (count)  ", comm_width, "");
+	printf("     (msec)     (msec)      (msec)      (msec)       %s\n",
+	       sched->show_state ? "(msec)" : "%");
+	printf("%.117s\n", graph_dotted_line);
+
+	machine__for_each_thread(m, show_thread_runtime, &totals);
+	task_count = totals.task_count;
+	if (!task_count)
+		printf("<no still running tasks>\n");
+
+	printf("\nTerminated tasks:\n");
+	machine__for_each_thread(m, show_deadthread_runtime, &totals);
+	if (task_count == totals.task_count)
+		printf("<no terminated tasks>\n");
+
+	/* CPU idle stats not tracked when samples were skipped */
+	if (sched->skipped_samples && !sched->idle_hist)
+		return;
+
+	printf("\nIdle stats:\n");
+	for (i = 0; i < idle_max_cpu; ++i) {
+		t = idle_threads[i];
+		if (!t)
+			continue;
+
+		r = thread__priv(t);
+		if (r && r->run_stats.n) {
+			totals.sched_count += r->run_stats.n;
+			printf("    CPU %2d idle for ", i);
+			print_sched_time(r->total_run_time, 6);
+			printf(" msec  (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
+		} else
+			printf("    CPU %2d idle entire time window\n", i);
+	}
+
+	if (sched->idle_hist && symbol_conf.use_callchain) {
+		callchain_param.mode  = CHAIN_FOLDED;
+		callchain_param.value = CCVAL_PERIOD;
+
+		callchain_register_param(&callchain_param);
+
+		printf("\nIdle stats by callchain:\n");
+		for (i = 0; i < idle_max_cpu; ++i) {
+			struct idle_thread_runtime *itr;
+
+			t = idle_threads[i];
+			if (!t)
+				continue;
+
+			itr = thread__priv(t);
+			if (itr == NULL)
+				continue;
+
+			callchain_param.sort(&itr->sorted_root, &itr->callchain,
+					     0, &callchain_param);
+
+			printf("  CPU %2d:", i);
+			print_sched_time(itr->tr.total_run_time, 6);
+			printf(" msec\n");
+			timehist_print_idlehist_callchain(&itr->sorted_root);
+			printf("\n");
+		}
+	}
+
+	printf("\n"
+	       "    Total number of unique tasks: %" PRIu64 "\n"
+	       "Total number of context switches: %" PRIu64 "\n",
+	       totals.task_count, totals.sched_count);
+
+	printf("           Total run time (msec): ");
+	print_sched_time(totals.total_run_time, 2);
+	printf("\n");
+
+	printf("    Total scheduling time (msec): ");
+	print_sched_time(hist_time, 2);
+	printf(" (x %d)\n", sched->max_cpu);
+}
+
+typedef int (*sched_handler)(struct perf_tool *tool,
+			  union perf_event *event,
+			  struct perf_evsel *evsel,
+			  struct perf_sample *sample,
+			  struct machine *machine);
+
+static int perf_timehist__process_sample(struct perf_tool *tool,
+					 union perf_event *event,
+					 struct perf_sample *sample,
+					 struct perf_evsel *evsel,
+					 struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	int err = 0;
+	int this_cpu = sample->cpu;
+
+	if (this_cpu > sched->max_cpu)
+		sched->max_cpu = this_cpu;
+
+	if (evsel->handler != NULL) {
+		sched_handler f = evsel->handler;
+
+		err = f(tool, event, evsel, sample, machine);
+	}
+
+	return err;
+}
+
+static int timehist_check_attr(struct perf_sched *sched,
+			       struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	struct evsel_runtime *er;
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		er = perf_evsel__get_runtime(evsel);
+		if (er == NULL) {
+			pr_err("Failed to allocate memory for evsel runtime data\n");
+			return -1;
+		}
+
+		if (sched->show_callchain &&
+		    !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) {
+			pr_info("Samples do not have callchains.\n");
+			sched->show_callchain = 0;
+			symbol_conf.use_callchain = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int perf_sched__timehist(struct perf_sched *sched)
+{
+	const struct perf_evsel_str_handler handlers[] = {
+		{ "sched:sched_switch",       timehist_sched_switch_event, },
+		{ "sched:sched_wakeup",	      timehist_sched_wakeup_event, },
+		{ "sched:sched_wakeup_new",   timehist_sched_wakeup_event, },
+	};
+	const struct perf_evsel_str_handler migrate_handlers[] = {
+		{ "sched:sched_migrate_task", timehist_migrate_task_event, },
+	};
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = sched->force,
+	};
+
+	struct perf_session *session;
+	struct perf_evlist *evlist;
+	int err = -1;
+
+	/*
+	 * event handlers for timehist option
+	 */
+	sched->tool.sample	 = perf_timehist__process_sample;
+	sched->tool.mmap	 = perf_event__process_mmap;
+	sched->tool.comm	 = perf_event__process_comm;
+	sched->tool.exit	 = perf_event__process_exit;
+	sched->tool.fork	 = perf_event__process_fork;
+	sched->tool.lost	 = process_lost;
+	sched->tool.attr	 = perf_event__process_attr;
+	sched->tool.tracing_data = perf_event__process_tracing_data;
+	sched->tool.build_id	 = perf_event__process_build_id;
+
+	sched->tool.ordered_events = true;
+	sched->tool.ordering_requires_timestamps = true;
+
+	symbol_conf.use_callchain = sched->show_callchain;
+
+	session = perf_session__new(&data, false, &sched->tool);
+	if (session == NULL)
+		return -ENOMEM;
+
+	evlist = session->evlist;
+
+	symbol__init(&session->header.env);
+
+	if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) {
+		pr_err("Invalid time string\n");
+		return -EINVAL;
+	}
+
+	if (timehist_check_attr(sched, evlist) != 0)
+		goto out;
+
+	setup_pager();
+
+	/* setup per-evsel handlers */
+	if (perf_session__set_tracepoints_handlers(session, handlers))
+		goto out;
+
+	/* sched_switch event at a minimum needs to exist */
+	if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+						  "sched:sched_switch")) {
+		pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
+		goto out;
+	}
+
+	if (sched->show_migrations &&
+	    perf_session__set_tracepoints_handlers(session, migrate_handlers))
+		goto out;
+
+	/* pre-allocate struct for per-CPU idle stats */
+	sched->max_cpu = session->header.env.nr_cpus_online;
+	if (sched->max_cpu == 0)
+		sched->max_cpu = 4;
+	if (init_idle_threads(sched->max_cpu))
+		goto out;
+
+	/* summary_only implies summary option, but don't overwrite summary if set */
+	if (sched->summary_only)
+		sched->summary = sched->summary_only;
+
+	if (!sched->summary_only)
+		timehist_header(sched);
+
+	err = perf_session__process_events(session);
+	if (err) {
+		pr_err("Failed to process events, error %d", err);
+		goto out;
+	}
+
+	sched->nr_events      = evlist->stats.nr_events[0];
+	sched->nr_lost_events = evlist->stats.total_lost;
+	sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST];
+
+	if (sched->summary)
+		timehist_print_summary(sched, session);
+
+out:
+	free_idle_threads();
+	perf_session__delete(session);
+
+	return err;
+}
+
+
+static void print_bad_events(struct perf_sched *sched)
+{
+	if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
+		printf("  INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
+			(double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0,
+			sched->nr_unordered_timestamps, sched->nr_timestamps);
+	}
+	if (sched->nr_lost_events && sched->nr_events) {
+		printf("  INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
+			(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
+			sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
+	}
+	if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
+		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
+			(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
+			sched->nr_context_switch_bugs, sched->nr_timestamps);
+		if (sched->nr_lost_events)
+			printf(" (due to lost events?)");
+		printf("\n");
+	}
+}
+
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct work_atoms *this;
+	const char *comm = thread__comm_str(data->thread), *this_comm;
+
+	while (*new) {
+		int cmp;
+
+		this = container_of(*new, struct work_atoms, node);
+		parent = *new;
+
+		this_comm = thread__comm_str(this->thread);
+		cmp = strcmp(comm, this_comm);
+		if (cmp > 0) {
+			new = &((*new)->rb_left);
+		} else if (cmp < 0) {
+			new = &((*new)->rb_right);
+		} else {
+			this->num_merged++;
+			this->total_runtime += data->total_runtime;
+			this->nb_atoms += data->nb_atoms;
+			this->total_lat += data->total_lat;
+			list_splice(&data->work_list, &this->work_list);
+			if (this->max_lat < data->max_lat) {
+				this->max_lat = data->max_lat;
+				this->max_lat_at = data->max_lat_at;
+			}
+			zfree(&data);
+			return;
+		}
+	}
+
+	data->num_merged++;
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void perf_sched__merge_lat(struct perf_sched *sched)
+{
+	struct work_atoms *data;
+	struct rb_node *node;
+
+	if (sched->skip_merge)
+		return;
+
+	while ((node = rb_first(&sched->atom_root))) {
+		rb_erase(node, &sched->atom_root);
+		data = rb_entry(node, struct work_atoms, node);
+		__merge_work_atoms(&sched->merged_atom_root, data);
+	}
+}
+
+static int perf_sched__lat(struct perf_sched *sched)
+{
+	struct rb_node *next;
+
+	setup_pager();
+
+	if (perf_sched__read_events(sched))
+		return -1;
+
+	perf_sched__merge_lat(sched);
+	perf_sched__sort_lat(sched);
+
+	printf("\n -----------------------------------------------------------------------------------------------------------------\n");
+	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at       |\n");
+	printf(" -----------------------------------------------------------------------------------------------------------------\n");
+
+	next = rb_first(&sched->sorted_atom_root);
+
+	while (next) {
+		struct work_atoms *work_list;
+
+		work_list = rb_entry(next, struct work_atoms, node);
+		output_lat_thread(sched, work_list);
+		next = rb_next(next);
+		thread__zput(work_list->thread);
+	}
+
+	printf(" -----------------------------------------------------------------------------------------------------------------\n");
+	printf("  TOTAL:                |%11.3f ms |%9" PRIu64 " |\n",
+		(double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count);
+
+	printf(" ---------------------------------------------------\n");
+
+	print_bad_events(sched);
+	printf("\n");
+
+	return 0;
+}
+
+static int setup_map_cpus(struct perf_sched *sched)
+{
+	struct cpu_map *map;
+
+	sched->max_cpu  = sysconf(_SC_NPROCESSORS_CONF);
+
+	if (sched->map.comp) {
+		sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+		if (!sched->map.comp_cpus)
+			return -1;
+	}
+
+	if (!sched->map.cpus_str)
+		return 0;
+
+	map = cpu_map__new(sched->map.cpus_str);
+	if (!map) {
+		pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
+		return -1;
+	}
+
+	sched->map.cpus = map;
+	return 0;
+}
+
+static int setup_color_pids(struct perf_sched *sched)
+{
+	struct thread_map *map;
+
+	if (!sched->map.color_pids_str)
+		return 0;
+
+	map = thread_map__new_by_tid_str(sched->map.color_pids_str);
+	if (!map) {
+		pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
+		return -1;
+	}
+
+	sched->map.color_pids = map;
+	return 0;
+}
+
+static int setup_color_cpus(struct perf_sched *sched)
+{
+	struct cpu_map *map;
+
+	if (!sched->map.color_cpus_str)
+		return 0;
+
+	map = cpu_map__new(sched->map.color_cpus_str);
+	if (!map) {
+		pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
+		return -1;
+	}
+
+	sched->map.color_cpus = map;
+	return 0;
+}
+
+static int perf_sched__map(struct perf_sched *sched)
+{
+	if (setup_map_cpus(sched))
+		return -1;
+
+	if (setup_color_pids(sched))
+		return -1;
+
+	if (setup_color_cpus(sched))
+		return -1;
+
+	setup_pager();
+	if (perf_sched__read_events(sched))
+		return -1;
+	print_bad_events(sched);
+	return 0;
+}
+
+static int perf_sched__replay(struct perf_sched *sched)
+{
+	unsigned long i;
+
+	calibrate_run_measurement_overhead(sched);
+	calibrate_sleep_measurement_overhead(sched);
+
+	test_calibrations(sched);
+
+	if (perf_sched__read_events(sched))
+		return -1;
+
+	printf("nr_run_events:        %ld\n", sched->nr_run_events);
+	printf("nr_sleep_events:      %ld\n", sched->nr_sleep_events);
+	printf("nr_wakeup_events:     %ld\n", sched->nr_wakeup_events);
+
+	if (sched->targetless_wakeups)
+		printf("target-less wakeups:  %ld\n", sched->targetless_wakeups);
+	if (sched->multitarget_wakeups)
+		printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups);
+	if (sched->nr_run_events_optimized)
+		printf("run atoms optimized: %ld\n",
+			sched->nr_run_events_optimized);
+
+	print_task_traces(sched);
+	add_cross_task_wakeups(sched);
+
+	create_tasks(sched);
+	printf("------------------------------------------------------------\n");
+	for (i = 0; i < sched->replay_repeat; i++)
+		run_one_test(sched);
+
+	return 0;
+}
+
+static void setup_sorting(struct perf_sched *sched, const struct option *options,
+			  const char * const usage_msg[])
+{
+	char *tmp, *tok, *str = strdup(sched->sort_order);
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		if (sort_dimension__add(tok, &sched->sort_list) < 0) {
+			usage_with_options_msg(usage_msg, options,
+					"Unknown --sort key: `%s'", tok);
+		}
+	}
+
+	free(str);
+
+	sort_dimension__add("pid", &sched->cmp_pid);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-a",
+		"-R",
+		"-m", "1024",
+		"-c", "1",
+		"-e", "sched:sched_switch",
+		"-e", "sched:sched_stat_wait",
+		"-e", "sched:sched_stat_sleep",
+		"-e", "sched:sched_stat_iowait",
+		"-e", "sched:sched_stat_runtime",
+		"-e", "sched:sched_process_fork",
+		"-e", "sched:sched_wakeup",
+		"-e", "sched:sched_wakeup_new",
+		"-e", "sched:sched_migrate_task",
+	};
+
+	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_record(i, rec_argv);
+}
+
+int cmd_sched(int argc, const char **argv)
+{
+	const char default_sort_order[] = "avg, max, switch, runtime";
+	struct perf_sched sched = {
+		.tool = {
+			.sample		 = perf_sched__process_tracepoint_sample,
+			.comm		 = perf_sched__process_comm,
+			.namespaces	 = perf_event__process_namespaces,
+			.lost		 = perf_event__process_lost,
+			.fork		 = perf_sched__process_fork_event,
+			.ordered_events = true,
+		},
+		.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
+		.sort_list	      = LIST_HEAD_INIT(sched.sort_list),
+		.start_work_mutex     = PTHREAD_MUTEX_INITIALIZER,
+		.work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.sort_order	      = default_sort_order,
+		.replay_repeat	      = 10,
+		.profile_cpu	      = -1,
+		.next_shortname1      = 'A',
+		.next_shortname2      = '0',
+		.skip_merge           = 0,
+		.show_callchain	      = 1,
+		.max_stack            = 5,
+	};
+	const struct option sched_options[] = {
+	OPT_STRING('i', "input", &input_name, "file",
+		    "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
+	OPT_END()
+	};
+	const struct option latency_options[] = {
+	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
+		   "sort by key(s): runtime, switch, avg, max"),
+	OPT_INTEGER('C', "CPU", &sched.profile_cpu,
+		    "CPU to profile on"),
+	OPT_BOOLEAN('p', "pids", &sched.skip_merge,
+		    "latency stats per pid instead of per comm"),
+	OPT_PARENT(sched_options)
+	};
+	const struct option replay_options[] = {
+	OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
+		     "repeat the workload replay N times (-1: infinite)"),
+	OPT_PARENT(sched_options)
+	};
+	const struct option map_options[] = {
+	OPT_BOOLEAN(0, "compact", &sched.map.comp,
+		    "map output in compact mode"),
+	OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
+		   "highlight given pids in map"),
+	OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
+                    "highlight given CPUs in map"),
+	OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
+                    "display given CPUs in map"),
+	OPT_PARENT(sched_options)
+	};
+	const struct option timehist_options[] = {
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+		   "file", "kallsyms pathname"),
+	OPT_BOOLEAN('g', "call-graph", &sched.show_callchain,
+		    "Display call chains if present (default on)"),
+	OPT_UINTEGER(0, "max-stack", &sched.max_stack,
+		   "Maximum number of functions to display backtrace."),
+	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+		    "Look for files with symbols relative to this directory"),
+	OPT_BOOLEAN('s', "summary", &sched.summary_only,
+		    "Show only syscall summary with statistics"),
+	OPT_BOOLEAN('S', "with-summary", &sched.summary,
+		    "Show all syscalls and summary with statistics"),
+	OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
+	OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"),
+	OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"),
+	OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
+	OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"),
+	OPT_STRING(0, "time", &sched.time_str, "str",
+		   "Time span for analysis (start,stop)"),
+	OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
+	OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+		   "analyze events only for given process id(s)"),
+	OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+		   "analyze events only for given thread id(s)"),
+	OPT_PARENT(sched_options)
+	};
+
+	const char * const latency_usage[] = {
+		"perf sched latency [<options>]",
+		NULL
+	};
+	const char * const replay_usage[] = {
+		"perf sched replay [<options>]",
+		NULL
+	};
+	const char * const map_usage[] = {
+		"perf sched map [<options>]",
+		NULL
+	};
+	const char * const timehist_usage[] = {
+		"perf sched timehist [<options>]",
+		NULL
+	};
+	const char *const sched_subcommands[] = { "record", "latency", "map",
+						  "replay", "script",
+						  "timehist", NULL };
+	const char *sched_usage[] = {
+		NULL,
+		NULL
+	};
+	struct trace_sched_handler lat_ops  = {
+		.wakeup_event	    = latency_wakeup_event,
+		.switch_event	    = latency_switch_event,
+		.runtime_event	    = latency_runtime_event,
+		.migrate_task_event = latency_migrate_task_event,
+	};
+	struct trace_sched_handler map_ops  = {
+		.switch_event	    = map_switch_event,
+	};
+	struct trace_sched_handler replay_ops  = {
+		.wakeup_event	    = replay_wakeup_event,
+		.switch_event	    = replay_switch_event,
+		.fork_event	    = replay_fork_event,
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
+		sched.curr_pid[i] = -1;
+
+	argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
+					sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		usage_with_options(sched_usage, sched_options);
+
+	/*
+	 * Aliased to 'perf script' for now:
+	 */
+	if (!strcmp(argv[0], "script"))
+		return cmd_script(argc, argv);
+
+	if (!strncmp(argv[0], "rec", 3)) {
+		return __cmd_record(argc, argv);
+	} else if (!strncmp(argv[0], "lat", 3)) {
+		sched.tp_handler = &lat_ops;
+		if (argc > 1) {
+			argc = parse_options(argc, argv, latency_options, latency_usage, 0);
+			if (argc)
+				usage_with_options(latency_usage, latency_options);
+		}
+		setup_sorting(&sched, latency_options, latency_usage);
+		return perf_sched__lat(&sched);
+	} else if (!strcmp(argv[0], "map")) {
+		if (argc) {
+			argc = parse_options(argc, argv, map_options, map_usage, 0);
+			if (argc)
+				usage_with_options(map_usage, map_options);
+		}
+		sched.tp_handler = &map_ops;
+		setup_sorting(&sched, latency_options, latency_usage);
+		return perf_sched__map(&sched);
+	} else if (!strncmp(argv[0], "rep", 3)) {
+		sched.tp_handler = &replay_ops;
+		if (argc) {
+			argc = parse_options(argc, argv, replay_options, replay_usage, 0);
+			if (argc)
+				usage_with_options(replay_usage, replay_options);
+		}
+		return perf_sched__replay(&sched);
+	} else if (!strcmp(argv[0], "timehist")) {
+		if (argc) {
+			argc = parse_options(argc, argv, timehist_options,
+					     timehist_usage, 0);
+			if (argc)
+				usage_with_options(timehist_usage, timehist_options);
+		}
+		if ((sched.show_wakeups || sched.show_next) &&
+		    sched.summary_only) {
+			pr_err(" Error: -s and -[n|w] are mutually exclusive.\n");
+			parse_options_usage(timehist_usage, timehist_options, "s", true);
+			if (sched.show_wakeups)
+				parse_options_usage(NULL, timehist_options, "w", true);
+			if (sched.show_next)
+				parse_options_usage(NULL, timehist_options, "n", true);
+			return -EINVAL;
+		}
+
+		return perf_sched__timehist(&sched);
+	} else {
+		usage_with_options(sched_usage, sched_options);
+	}
+
+	return 0;
+}
diff --git a/builtin-script.c b/builtin-script.c
new file mode 100644
index 0000000..e0a9845
--- /dev/null
+++ b/builtin-script.c
@@ -0,0 +1,3515 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+
+#include "perf.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include <subcmd/exec-cmd.h>
+#include "util/header.h"
+#include <subcmd/parse-options.h>
+#include "util/perf_regs.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/trace-event.h"
+#include "util/util.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/sort.h"
+#include "util/data.h"
+#include "util/auxtrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
+#include "util/color.h"
+#include "util/string2.h"
+#include "util/thread-stack.h"
+#include "util/time-utils.h"
+#include "util/path.h"
+#include "print_binary.h"
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+#include <linux/time64.h>
+#include "asm/bug.h"
+#include "util/mem-events.h"
+#include "util/dump-insn.h"
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sane_ctype.h"
+
+static char const		*script_name;
+static char const		*generate_script_lang;
+static bool			debug_mode;
+static u64			last_timestamp;
+static u64			nr_unordered;
+static bool			no_callchain;
+static bool			latency_format;
+static bool			system_wide;
+static bool			print_flags;
+static bool			nanosecs;
+static const char		*cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+static struct perf_stat_config	stat_config;
+static int			max_blocks;
+
+unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
+
+enum perf_output_field {
+	PERF_OUTPUT_COMM            = 1U << 0,
+	PERF_OUTPUT_TID             = 1U << 1,
+	PERF_OUTPUT_PID             = 1U << 2,
+	PERF_OUTPUT_TIME            = 1U << 3,
+	PERF_OUTPUT_CPU             = 1U << 4,
+	PERF_OUTPUT_EVNAME          = 1U << 5,
+	PERF_OUTPUT_TRACE           = 1U << 6,
+	PERF_OUTPUT_IP              = 1U << 7,
+	PERF_OUTPUT_SYM             = 1U << 8,
+	PERF_OUTPUT_DSO             = 1U << 9,
+	PERF_OUTPUT_ADDR            = 1U << 10,
+	PERF_OUTPUT_SYMOFFSET       = 1U << 11,
+	PERF_OUTPUT_SRCLINE         = 1U << 12,
+	PERF_OUTPUT_PERIOD          = 1U << 13,
+	PERF_OUTPUT_IREGS	    = 1U << 14,
+	PERF_OUTPUT_BRSTACK	    = 1U << 15,
+	PERF_OUTPUT_BRSTACKSYM	    = 1U << 16,
+	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
+	PERF_OUTPUT_WEIGHT	    = 1U << 18,
+	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
+	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
+	PERF_OUTPUT_INSN	    = 1U << 21,
+	PERF_OUTPUT_INSNLEN	    = 1U << 22,
+	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
+	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
+	PERF_OUTPUT_SYNTH           = 1U << 25,
+	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
+	PERF_OUTPUT_UREGS	    = 1U << 27,
+	PERF_OUTPUT_METRIC	    = 1U << 28,
+	PERF_OUTPUT_MISC            = 1U << 29,
+};
+
+struct output_option {
+	const char *str;
+	enum perf_output_field field;
+} all_output_options[] = {
+	{.str = "comm",  .field = PERF_OUTPUT_COMM},
+	{.str = "tid",   .field = PERF_OUTPUT_TID},
+	{.str = "pid",   .field = PERF_OUTPUT_PID},
+	{.str = "time",  .field = PERF_OUTPUT_TIME},
+	{.str = "cpu",   .field = PERF_OUTPUT_CPU},
+	{.str = "event", .field = PERF_OUTPUT_EVNAME},
+	{.str = "trace", .field = PERF_OUTPUT_TRACE},
+	{.str = "ip",    .field = PERF_OUTPUT_IP},
+	{.str = "sym",   .field = PERF_OUTPUT_SYM},
+	{.str = "dso",   .field = PERF_OUTPUT_DSO},
+	{.str = "addr",  .field = PERF_OUTPUT_ADDR},
+	{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
+	{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
+	{.str = "period", .field = PERF_OUTPUT_PERIOD},
+	{.str = "iregs", .field = PERF_OUTPUT_IREGS},
+	{.str = "uregs", .field = PERF_OUTPUT_UREGS},
+	{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
+	{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
+	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
+	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
+	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
+	{.str = "insn", .field = PERF_OUTPUT_INSN},
+	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
+	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
+	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
+	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
+	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
+	{.str = "metric", .field = PERF_OUTPUT_METRIC},
+	{.str = "misc", .field = PERF_OUTPUT_MISC},
+};
+
+enum {
+	OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX,
+	OUTPUT_TYPE_MAX
+};
+
+/* default set to maintain compatibility with current format */
+static struct {
+	bool user_set;
+	bool wildcard_set;
+	unsigned int print_ip_opts;
+	u64 fields;
+	u64 invalid_fields;
+} output[OUTPUT_TYPE_MAX] = {
+
+	[PERF_TYPE_HARDWARE] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_PERIOD,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
+
+	[PERF_TYPE_SOFTWARE] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
+
+		.invalid_fields = PERF_OUTPUT_TRACE,
+	},
+
+	[PERF_TYPE_TRACEPOINT] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+				  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
+	},
+
+	[PERF_TYPE_RAW] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
+			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
+			      PERF_OUTPUT_PHYS_ADDR,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
+
+	[PERF_TYPE_BREAKPOINT] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_PERIOD,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
+
+	[OUTPUT_TYPE_SYNTH] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_SYNTH,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
+};
+
+struct perf_evsel_script {
+       char *filename;
+       FILE *fp;
+       u64  samples;
+       /* For metric output */
+       u64  val;
+       int  gnum;
+};
+
+static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
+{
+	return (struct perf_evsel_script *)evsel->priv;
+}
+
+static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
+							struct perf_data *data)
+{
+	struct perf_evsel_script *es = zalloc(sizeof(*es));
+
+	if (es != NULL) {
+		if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
+			goto out_free;
+		es->fp = fopen(es->filename, "w");
+		if (es->fp == NULL)
+			goto out_free_filename;
+	}
+
+	return es;
+out_free_filename:
+	zfree(&es->filename);
+out_free:
+	free(es);
+	return NULL;
+}
+
+static void perf_evsel_script__delete(struct perf_evsel_script *es)
+{
+	zfree(&es->filename);
+	fclose(es->fp);
+	es->fp = NULL;
+	free(es);
+}
+
+static int perf_evsel_script__fprintf(struct perf_evsel_script *es, FILE *fp)
+{
+	struct stat st;
+
+	fstat(fileno(es->fp), &st);
+	return fprintf(fp, "[ perf script: Wrote %.3f MB %s (%" PRIu64 " samples) ]\n",
+		       st.st_size / 1024.0 / 1024.0, es->filename, es->samples);
+}
+
+static inline int output_type(unsigned int type)
+{
+	switch (type) {
+	case PERF_TYPE_SYNTH:
+		return OUTPUT_TYPE_SYNTH;
+	default:
+		return type;
+	}
+}
+
+static inline unsigned int attr_type(unsigned int type)
+{
+	switch (type) {
+	case OUTPUT_TYPE_SYNTH:
+		return PERF_TYPE_SYNTH;
+	default:
+		return type;
+	}
+}
+
+static bool output_set_by_user(void)
+{
+	int j;
+	for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+		if (output[j].user_set)
+			return true;
+	}
+	return false;
+}
+
+static const char *output_field2str(enum perf_output_field field)
+{
+	int i, imax = ARRAY_SIZE(all_output_options);
+	const char *str = "";
+
+	for (i = 0; i < imax; ++i) {
+		if (all_output_options[i].field == field) {
+			str = all_output_options[i].str;
+			break;
+		}
+	}
+	return str;
+}
+
+#define PRINT_FIELD(x)  (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
+
+static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
+				      u64 sample_type, const char *sample_msg,
+				      enum perf_output_field field,
+				      bool allow_user_set)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	int type = output_type(attr->type);
+	const char *evname;
+
+	if (attr->sample_type & sample_type)
+		return 0;
+
+	if (output[type].user_set) {
+		if (allow_user_set)
+			return 0;
+		evname = perf_evsel__name(evsel);
+		pr_err("Samples for '%s' event do not have %s attribute set. "
+		       "Cannot print '%s' field.\n",
+		       evname, sample_msg, output_field2str(field));
+		return -1;
+	}
+
+	/* user did not ask for it explicitly so remove from the default list */
+	output[type].fields &= ~field;
+	evname = perf_evsel__name(evsel);
+	pr_debug("Samples for '%s' event do not have %s attribute set. "
+		 "Skipping '%s' field.\n",
+		 evname, sample_msg, output_field2str(field));
+
+	return 0;
+}
+
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+				   u64 sample_type, const char *sample_msg,
+				   enum perf_output_field field)
+{
+	return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
+					  false);
+}
+
+static int perf_evsel__check_attr(struct perf_evsel *evsel,
+				  struct perf_session *session)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	bool allow_user_set;
+
+	if (perf_header__has_feat(&session->header, HEADER_STAT))
+		return 0;
+
+	allow_user_set = perf_header__has_feat(&session->header,
+					       HEADER_AUXTRACE);
+
+	if (PRINT_FIELD(TRACE) &&
+		!perf_session__has_traces(session, "record -R"))
+		return -EINVAL;
+
+	if (PRINT_FIELD(IP)) {
+		if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
+					    PERF_OUTPUT_IP))
+			return -EINVAL;
+	}
+
+	if (PRINT_FIELD(ADDR) &&
+		perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+					   PERF_OUTPUT_ADDR, allow_user_set))
+		return -EINVAL;
+
+	if (PRINT_FIELD(DATA_SRC) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
+					PERF_OUTPUT_DATA_SRC))
+		return -EINVAL;
+
+	if (PRINT_FIELD(WEIGHT) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
+					PERF_OUTPUT_WEIGHT))
+		return -EINVAL;
+
+	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
+		pr_err("Display of symbols requested but neither sample IP nor "
+			   "sample address\nis selected. Hence, no addresses to convert "
+		       "to symbols.\n");
+		return -EINVAL;
+	}
+	if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
+		pr_err("Display of offsets requested but symbol is not"
+		       "selected.\n");
+		return -EINVAL;
+	}
+	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
+		pr_err("Display of DSO requested but no address to convert.  Select\n"
+		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
+		return -EINVAL;
+	}
+	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
+		pr_err("Display of source line number requested but sample IP is not\n"
+		       "selected. Hence, no address to lookup the source line number.\n");
+		return -EINVAL;
+	}
+	if (PRINT_FIELD(BRSTACKINSN) &&
+	    !(perf_evlist__combined_branch_type(session->evlist) &
+	      PERF_SAMPLE_BRANCH_ANY)) {
+		pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
+		       "Hint: run 'perf record -b ...'\n");
+		return -EINVAL;
+	}
+	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
+					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+		return -EINVAL;
+
+	if (PRINT_FIELD(TIME) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
+					PERF_OUTPUT_TIME))
+		return -EINVAL;
+
+	if (PRINT_FIELD(CPU) &&
+		perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+					   PERF_OUTPUT_CPU, allow_user_set))
+		return -EINVAL;
+
+	if (PRINT_FIELD(IREGS) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
+					PERF_OUTPUT_IREGS))
+		return -EINVAL;
+
+	if (PRINT_FIELD(UREGS) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
+					PERF_OUTPUT_UREGS))
+		return -EINVAL;
+
+	if (PRINT_FIELD(PHYS_ADDR) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
+					PERF_OUTPUT_PHYS_ADDR))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void set_print_ip_opts(struct perf_event_attr *attr)
+{
+	unsigned int type = output_type(attr->type);
+
+	output[type].print_ip_opts = 0;
+	if (PRINT_FIELD(IP))
+		output[type].print_ip_opts |= EVSEL__PRINT_IP;
+
+	if (PRINT_FIELD(SYM))
+		output[type].print_ip_opts |= EVSEL__PRINT_SYM;
+
+	if (PRINT_FIELD(DSO))
+		output[type].print_ip_opts |= EVSEL__PRINT_DSO;
+
+	if (PRINT_FIELD(SYMOFFSET))
+		output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
+
+	if (PRINT_FIELD(SRCLINE))
+		output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
+}
+
+/*
+ * verify all user requested events exist and the samples
+ * have the expected data
+ */
+static int perf_session__check_output_opt(struct perf_session *session)
+{
+	unsigned int j;
+	struct perf_evsel *evsel;
+
+	for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+		evsel = perf_session__find_first_evtype(session, attr_type(j));
+
+		/*
+		 * even if fields is set to 0 (ie., show nothing) event must
+		 * exist if user explicitly includes it on the command line
+		 */
+		if (!evsel && output[j].user_set && !output[j].wildcard_set &&
+		    j != OUTPUT_TYPE_SYNTH) {
+			pr_err("%s events do not exist. "
+			       "Remove corresponding -F option to proceed.\n",
+			       event_type(j));
+			return -1;
+		}
+
+		if (evsel && output[j].fields &&
+			perf_evsel__check_attr(evsel, session))
+			return -1;
+
+		if (evsel == NULL)
+			continue;
+
+		set_print_ip_opts(&evsel->attr);
+	}
+
+	if (!no_callchain) {
+		bool use_callchain = false;
+		bool not_pipe = false;
+
+		evlist__for_each_entry(session->evlist, evsel) {
+			not_pipe = true;
+			if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+				use_callchain = true;
+				break;
+			}
+		}
+		if (not_pipe && !use_callchain)
+			symbol_conf.use_callchain = false;
+	}
+
+	/*
+	 * set default for tracepoints to print symbols only
+	 * if callchains are present
+	 */
+	if (symbol_conf.use_callchain &&
+	    !output[PERF_TYPE_TRACEPOINT].user_set) {
+		struct perf_event_attr *attr;
+
+		j = PERF_TYPE_TRACEPOINT;
+
+		evlist__for_each_entry(session->evlist, evsel) {
+			if (evsel->attr.type != j)
+				continue;
+
+			attr = &evsel->attr;
+
+			if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+				output[j].fields |= PERF_OUTPUT_IP;
+				output[j].fields |= PERF_OUTPUT_SYM;
+				output[j].fields |= PERF_OUTPUT_DSO;
+				set_print_ip_opts(attr);
+				goto out;
+			}
+		}
+	}
+
+out:
+	return 0;
+}
+
+static int perf_sample__fprintf_iregs(struct perf_sample *sample,
+				      struct perf_event_attr *attr, FILE *fp)
+{
+	struct regs_dump *regs = &sample->intr_regs;
+	uint64_t mask = attr->sample_regs_intr;
+	unsigned i = 0, r;
+	int printed = 0;
+
+	if (!regs)
+		return 0;
+
+	for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+		u64 val = regs->regs[i++];
+		printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+	}
+
+	return printed;
+}
+
+static int perf_sample__fprintf_uregs(struct perf_sample *sample,
+				      struct perf_event_attr *attr, FILE *fp)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	uint64_t mask = attr->sample_regs_user;
+	unsigned i = 0, r;
+	int printed = 0;
+
+	if (!regs || !regs->regs)
+		return 0;
+
+	printed += fprintf(fp, " ABI:%" PRIu64 " ", regs->abi);
+
+	for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+		u64 val = regs->regs[i++];
+		printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+	}
+
+	return printed;
+}
+
+static int perf_sample__fprintf_start(struct perf_sample *sample,
+				      struct thread *thread,
+				      struct perf_evsel *evsel,
+				      u32 type, FILE *fp)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	unsigned long secs;
+	unsigned long long nsecs;
+	int printed = 0;
+
+	if (PRINT_FIELD(COMM)) {
+		if (latency_format)
+			printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
+		else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
+			printed += fprintf(fp, "%s ", thread__comm_str(thread));
+		else
+			printed += fprintf(fp, "%16s ", thread__comm_str(thread));
+	}
+
+	if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
+		printed += fprintf(fp, "%5d/%-5d ", sample->pid, sample->tid);
+	else if (PRINT_FIELD(PID))
+		printed += fprintf(fp, "%5d ", sample->pid);
+	else if (PRINT_FIELD(TID))
+		printed += fprintf(fp, "%5d ", sample->tid);
+
+	if (PRINT_FIELD(CPU)) {
+		if (latency_format)
+			printed += fprintf(fp, "%3d ", sample->cpu);
+		else
+			printed += fprintf(fp, "[%03d] ", sample->cpu);
+	}
+
+	if (PRINT_FIELD(MISC)) {
+		int ret = 0;
+
+		#define has(m) \
+			(sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m
+
+		if (has(KERNEL))
+			ret += fprintf(fp, "K");
+		if (has(USER))
+			ret += fprintf(fp, "U");
+		if (has(HYPERVISOR))
+			ret += fprintf(fp, "H");
+		if (has(GUEST_KERNEL))
+			ret += fprintf(fp, "G");
+		if (has(GUEST_USER))
+			ret += fprintf(fp, "g");
+
+		switch (type) {
+		case PERF_RECORD_MMAP:
+		case PERF_RECORD_MMAP2:
+			if (has(MMAP_DATA))
+				ret += fprintf(fp, "M");
+			break;
+		case PERF_RECORD_COMM:
+			if (has(COMM_EXEC))
+				ret += fprintf(fp, "E");
+			break;
+		case PERF_RECORD_SWITCH:
+		case PERF_RECORD_SWITCH_CPU_WIDE:
+			if (has(SWITCH_OUT)) {
+				ret += fprintf(fp, "S");
+				if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT)
+					ret += fprintf(fp, "p");
+			}
+		default:
+			break;
+		}
+
+		#undef has
+
+		ret += fprintf(fp, "%*s", 6 - ret, " ");
+		printed += ret;
+	}
+
+	if (PRINT_FIELD(TIME)) {
+		nsecs = sample->time;
+		secs = nsecs / NSEC_PER_SEC;
+		nsecs -= secs * NSEC_PER_SEC;
+
+		if (nanosecs)
+			printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs);
+		else {
+			char sample_time[32];
+			timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time));
+			printed += fprintf(fp, "%12s: ", sample_time);
+		}
+	}
+
+	return printed;
+}
+
+static inline char
+mispred_str(struct branch_entry *br)
+{
+	if (!(br->flags.mispred  || br->flags.predicted))
+		return '-';
+
+	return br->flags.predicted ? 'P' : 'M';
+}
+
+static int perf_sample__fprintf_brstack(struct perf_sample *sample,
+					struct thread *thread,
+					struct perf_event_attr *attr, FILE *fp)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+	int printed = 0;
+
+	if (!(br && br->nr))
+		return 0;
+
+	for (i = 0; i < br->nr; i++) {
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		if (PRINT_FIELD(DSO)) {
+			memset(&alf, 0, sizeof(alf));
+			memset(&alt, 0, sizeof(alt));
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		}
+
+		printed += fprintf(fp, " 0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printed += fprintf(fp, "(");
+			printed += map__fprintf_dsoname(alf.map, fp);
+			printed += fprintf(fp, ")");
+		}
+
+		printed += fprintf(fp, "/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printed += fprintf(fp, "(");
+			printed += map__fprintf_dsoname(alt.map, fp);
+			printed += fprintf(fp, ")");
+		}
+
+		printed += fprintf(fp, "/%c/%c/%c/%d ",
+			mispred_str( br->entries + i),
+			br->entries[i].flags.in_tx? 'X' : '-',
+			br->entries[i].flags.abort? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+
+	return printed;
+}
+
+static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
+					   struct thread *thread,
+					   struct perf_event_attr *attr, FILE *fp)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+	int printed = 0;
+
+	if (!(br && br->nr))
+		return 0;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map)
+			alf.sym = map__find_symbol(alf.map, alf.addr);
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map)
+			alt.sym = map__find_symbol(alt.map, alt.addr);
+
+		printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
+		if (PRINT_FIELD(DSO)) {
+			printed += fprintf(fp, "(");
+			printed += map__fprintf_dsoname(alf.map, fp);
+			printed += fprintf(fp, ")");
+		}
+		printed += fprintf(fp, "%c", '/');
+		printed += symbol__fprintf_symname_offs(alt.sym, &alt, fp);
+		if (PRINT_FIELD(DSO)) {
+			printed += fprintf(fp, "(");
+			printed += map__fprintf_dsoname(alt.map, fp);
+			printed += fprintf(fp, ")");
+		}
+		printed += fprintf(fp, "/%c/%c/%c/%d ",
+			mispred_str( br->entries + i),
+			br->entries[i].flags.in_tx? 'X' : '-',
+			br->entries[i].flags.abort? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+
+	return printed;
+}
+
+static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
+					   struct thread *thread,
+					   struct perf_event_attr *attr, FILE *fp)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+	int printed = 0;
+
+	if (!(br && br->nr))
+		return 0;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map && !alf.map->dso->adjust_symbols)
+			from = map__map_ip(alf.map, from);
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map && !alt.map->dso->adjust_symbols)
+			to = map__map_ip(alt.map, to);
+
+		printed += fprintf(fp, " 0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printed += fprintf(fp, "(");
+			printed += map__fprintf_dsoname(alf.map, fp);
+			printed += fprintf(fp, ")");
+		}
+		printed += fprintf(fp, "/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printed += fprintf(fp, "(");
+			printed += map__fprintf_dsoname(alt.map, fp);
+			printed += fprintf(fp, ")");
+		}
+		printed += fprintf(fp, "/%c/%c/%c/%d ",
+			mispred_str(br->entries + i),
+			br->entries[i].flags.in_tx ? 'X' : '-',
+			br->entries[i].flags.abort ? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+
+	return printed;
+}
+#define MAXBB 16384UL
+
+static int grab_bb(u8 *buffer, u64 start, u64 end,
+		    struct machine *machine, struct thread *thread,
+		    bool *is64bit, u8 *cpumode, bool last)
+{
+	long offset, len;
+	struct addr_location al;
+	bool kernel;
+
+	if (!start || !end)
+		return 0;
+
+	kernel = machine__kernel_ip(machine, start);
+	if (kernel)
+		*cpumode = PERF_RECORD_MISC_KERNEL;
+	else
+		*cpumode = PERF_RECORD_MISC_USER;
+
+	/*
+	 * Block overlaps between kernel and user.
+	 * This can happen due to ring filtering
+	 * On Intel CPUs the entry into the kernel is filtered,
+	 * but the exit is not. Let the caller patch it up.
+	 */
+	if (kernel != machine__kernel_ip(machine, end)) {
+		pr_debug("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", start, end);
+		return -ENXIO;
+	}
+
+	memset(&al, 0, sizeof(al));
+	if (end - start > MAXBB - MAXINSN) {
+		if (last)
+			pr_debug("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
+		else
+			pr_debug("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start);
+		return 0;
+	}
+
+	thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
+	if (!al.map || !al.map->dso) {
+		pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
+		return 0;
+	}
+	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
+		pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
+		return 0;
+	}
+
+	/* Load maps to ensure dso->is_64_bit has been updated */
+	map__load(al.map);
+
+	offset = al.map->map_ip(al.map, start);
+	len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer,
+				    end - start + MAXINSN);
+
+	*is64bit = al.map->dso->is_64_bit;
+	if (len <= 0)
+		pr_debug("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
+			start, end);
+	return len;
+}
+
+static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
+			    struct perf_insn *x, u8 *inbuf, int len,
+			    int insn, FILE *fp)
+{
+	int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
+			      dump_insn(x, ip, inbuf, len, NULL),
+			      en->flags.predicted ? " PRED" : "",
+			      en->flags.mispred ? " MISPRED" : "",
+			      en->flags.in_tx ? " INTX" : "",
+			      en->flags.abort ? " ABORT" : "");
+	if (en->flags.cycles) {
+		printed += fprintf(fp, " %d cycles", en->flags.cycles);
+		if (insn)
+			printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
+	}
+	return printed + fprintf(fp, "\n");
+}
+
+static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
+			   u8 cpumode, int cpu, struct symbol **lastsym,
+			   struct perf_event_attr *attr, FILE *fp)
+{
+	struct addr_location al;
+	int off, printed = 0;
+
+	memset(&al, 0, sizeof(al));
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
+	if (!al.map)
+		thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
+				      addr, &al);
+	if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
+		return 0;
+
+	al.cpu = cpu;
+	al.sym = NULL;
+	if (al.map)
+		al.sym = map__find_symbol(al.map, al.addr);
+
+	if (!al.sym)
+		return 0;
+
+	if (al.addr < al.sym->end)
+		off = al.addr - al.sym->start;
+	else
+		off = al.addr - al.map->start - al.sym->start;
+	printed += fprintf(fp, "\t%s", al.sym->name);
+	if (off)
+		printed += fprintf(fp, "%+d", off);
+	printed += fprintf(fp, ":");
+	if (PRINT_FIELD(SRCLINE))
+		printed += map__fprintf_srcline(al.map, al.addr, "\t", fp);
+	printed += fprintf(fp, "\n");
+	*lastsym = al.sym;
+
+	return printed;
+}
+
+static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
+					    struct thread *thread,
+					    struct perf_event_attr *attr,
+					    struct machine *machine, FILE *fp)
+{
+	struct branch_stack *br = sample->branch_stack;
+	u64 start, end;
+	int i, insn, len, nr, ilen, printed = 0;
+	struct perf_insn x;
+	u8 buffer[MAXBB];
+	unsigned off;
+	struct symbol *lastsym = NULL;
+
+	if (!(br && br->nr))
+		return 0;
+	nr = br->nr;
+	if (max_blocks && nr > max_blocks + 1)
+		nr = max_blocks + 1;
+
+	x.thread = thread;
+	x.cpu = sample->cpu;
+
+	printed += fprintf(fp, "%c", '\n');
+
+	/* Handle first from jump, of which we don't know the entry. */
+	len = grab_bb(buffer, br->entries[nr-1].from,
+			br->entries[nr-1].from,
+			machine, thread, &x.is64bit, &x.cpumode, false);
+	if (len > 0) {
+		printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
+					   x.cpumode, x.cpu, &lastsym, attr, fp);
+		printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+					    &x, buffer, len, 0, fp);
+	}
+
+	/* Print all blocks */
+	for (i = nr - 2; i >= 0; i--) {
+		if (br->entries[i].from || br->entries[i].to)
+			pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
+				 br->entries[i].from,
+				 br->entries[i].to);
+		start = br->entries[i + 1].to;
+		end   = br->entries[i].from;
+
+		len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
+		/* Patch up missing kernel transfers due to ring filters */
+		if (len == -ENXIO && i > 0) {
+			end = br->entries[--i].from;
+			pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
+			len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
+		}
+		if (len <= 0)
+			continue;
+
+		insn = 0;
+		for (off = 0;; off += ilen) {
+			uint64_t ip = start + off;
+
+			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
+			if (ip == end) {
+				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+				break;
+			} else {
+				printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
+						   dump_insn(&x, ip, buffer + off, len - off, &ilen));
+				if (ilen == 0)
+					break;
+				insn++;
+			}
+		}
+	}
+
+	/*
+	 * Hit the branch? In this case we are already done, and the target
+	 * has not been executed yet.
+	 */
+	if (br->entries[0].from == sample->ip)
+		goto out;
+	if (br->entries[0].flags.abort)
+		goto out;
+
+	/*
+	 * Print final block upto sample
+	 */
+	start = br->entries[0].to;
+	end = sample->ip;
+	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
+	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
+	if (len <= 0) {
+		/* Print at least last IP if basic block did not work */
+		len = grab_bb(buffer, sample->ip, sample->ip,
+			      machine, thread, &x.is64bit, &x.cpumode, false);
+		if (len <= 0)
+			goto out;
+
+		printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
+			dump_insn(&x, sample->ip, buffer, len, NULL));
+		goto out;
+	}
+	for (off = 0; off <= end - start; off += ilen) {
+		printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off,
+				   dump_insn(&x, start + off, buffer + off, len - off, &ilen));
+		if (ilen == 0)
+			break;
+	}
+out:
+	return printed;
+}
+
+static int perf_sample__fprintf_addr(struct perf_sample *sample,
+				     struct thread *thread,
+				     struct perf_event_attr *attr, FILE *fp)
+{
+	struct addr_location al;
+	int printed = fprintf(fp, "%16" PRIx64, sample->addr);
+
+	if (!sample_addr_correlates_sym(attr))
+		goto out;
+
+	thread__resolve(thread, &al, sample);
+
+	if (PRINT_FIELD(SYM)) {
+		printed += fprintf(fp, " ");
+		if (PRINT_FIELD(SYMOFFSET))
+			printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
+		else
+			printed += symbol__fprintf_symname(al.sym, fp);
+	}
+
+	if (PRINT_FIELD(DSO)) {
+		printed += fprintf(fp, " (");
+		printed += map__fprintf_dsoname(al.map, fp);
+		printed += fprintf(fp, ")");
+	}
+out:
+	return printed;
+}
+
+static int perf_sample__fprintf_callindent(struct perf_sample *sample,
+					   struct perf_evsel *evsel,
+					   struct thread *thread,
+					   struct addr_location *al, FILE *fp)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	size_t depth = thread_stack__depth(thread);
+	struct addr_location addr_al;
+	const char *name = NULL;
+	static int spacing;
+	int len = 0;
+	u64 ip = 0;
+
+	/*
+	 * The 'return' has already been popped off the stack so the depth has
+	 * to be adjusted to match the 'call'.
+	 */
+	if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
+		depth += 1;
+
+	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+		if (sample_addr_correlates_sym(attr)) {
+			thread__resolve(thread, &addr_al, sample);
+			if (addr_al.sym)
+				name = addr_al.sym->name;
+			else
+				ip = sample->addr;
+		} else {
+			ip = sample->addr;
+		}
+	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+		if (al->sym)
+			name = al->sym->name;
+		else
+			ip = sample->ip;
+	}
+
+	if (name)
+		len = fprintf(fp, "%*s%s", (int)depth * 4, "", name);
+	else if (ip)
+		len = fprintf(fp, "%*s%16" PRIx64, (int)depth * 4, "", ip);
+
+	if (len < 0)
+		return len;
+
+	/*
+	 * Try to keep the output length from changing frequently so that the
+	 * output lines up more nicely.
+	 */
+	if (len > spacing || (len && len < spacing - 52))
+		spacing = round_up(len + 4, 32);
+
+	if (len < spacing)
+		len += fprintf(fp, "%*s", spacing - len, "");
+
+	return len;
+}
+
+static int perf_sample__fprintf_insn(struct perf_sample *sample,
+				     struct perf_event_attr *attr,
+				     struct thread *thread,
+				     struct machine *machine, FILE *fp)
+{
+	int printed = 0;
+
+	if (PRINT_FIELD(INSNLEN))
+		printed += fprintf(fp, " ilen: %d", sample->insn_len);
+	if (PRINT_FIELD(INSN)) {
+		int i;
+
+		printed += fprintf(fp, " insn:");
+		for (i = 0; i < sample->insn_len; i++)
+			printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]);
+	}
+	if (PRINT_FIELD(BRSTACKINSN))
+		printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
+
+	return printed;
+}
+
+static int perf_sample__fprintf_bts(struct perf_sample *sample,
+				    struct perf_evsel *evsel,
+				    struct thread *thread,
+				    struct addr_location *al,
+				    struct machine *machine, FILE *fp)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	unsigned int type = output_type(attr->type);
+	bool print_srcline_last = false;
+	int printed = 0;
+
+	if (PRINT_FIELD(CALLINDENT))
+		printed += perf_sample__fprintf_callindent(sample, evsel, thread, al, fp);
+
+	/* print branch_from information */
+	if (PRINT_FIELD(IP)) {
+		unsigned int print_opts = output[type].print_ip_opts;
+		struct callchain_cursor *cursor = NULL;
+
+		if (symbol_conf.use_callchain && sample->callchain &&
+		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+					      sample, NULL, NULL, scripting_max_stack) == 0)
+			cursor = &callchain_cursor;
+
+		if (cursor == NULL) {
+			printed += fprintf(fp, " ");
+			if (print_opts & EVSEL__PRINT_SRCLINE) {
+				print_srcline_last = true;
+				print_opts &= ~EVSEL__PRINT_SRCLINE;
+			}
+		} else
+			printed += fprintf(fp, "\n");
+
+		printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp);
+	}
+
+	/* print branch_to information */
+	if (PRINT_FIELD(ADDR) ||
+	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
+	     !output[type].user_set)) {
+		printed += fprintf(fp, " => ");
+		printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
+	}
+
+	if (print_srcline_last)
+		printed += map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
+
+	printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+	return printed + fprintf(fp, "\n");
+}
+
+static struct {
+	u32 flags;
+	const char *name;
+} sample_flags[] = {
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
+	{PERF_IP_FLAG_BRANCH, "jmp"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |	PERF_IP_FLAG_INTERRUPT, "hw int"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+	{0, NULL}
+};
+
+static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
+{
+	const char *chars = PERF_IP_FLAG_CHARS;
+	const int n = strlen(PERF_IP_FLAG_CHARS);
+	bool in_tx = flags & PERF_IP_FLAG_IN_TX;
+	const char *name = NULL;
+	char str[33];
+	int i, pos = 0;
+
+	for (i = 0; sample_flags[i].name ; i++) {
+		if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) {
+			name = sample_flags[i].name;
+			break;
+		}
+	}
+
+	for (i = 0; i < n; i++, flags >>= 1) {
+		if (flags & 1)
+			str[pos++] = chars[i];
+	}
+	for (; i < 32; i++, flags >>= 1) {
+		if (flags & 1)
+			str[pos++] = '?';
+	}
+	str[pos] = 0;
+
+	if (name)
+		return fprintf(fp, "  %-7s%4s ", name, in_tx ? "(x)" : "");
+
+	return fprintf(fp, "  %-11s ", str);
+}
+
+struct printer_data {
+	int line_no;
+	bool hit_nul;
+	bool is_printable;
+};
+
+static int sample__fprintf_bpf_output(enum binary_printer_ops op,
+				      unsigned int val,
+				      void *extra, FILE *fp)
+{
+	unsigned char ch = (unsigned char)val;
+	struct printer_data *printer_data = extra;
+	int printed = 0;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printed += fprintf(fp, "\n");
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printed += fprintf(fp, "%17s", !printer_data->line_no ? "BPF output:" :
+						        "           ");
+		break;
+	case BINARY_PRINT_ADDR:
+		printed += fprintf(fp, " %04x:", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		printed += fprintf(fp, " %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		printed += fprintf(fp, "   ");
+		break;
+	case BINARY_PRINT_SEP:
+		printed += fprintf(fp, "  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		if (printer_data->hit_nul && ch)
+			printer_data->is_printable = false;
+
+		if (!isprint(ch)) {
+			printed += fprintf(fp, "%c", '.');
+
+			if (!printer_data->is_printable)
+				break;
+
+			if (ch == '\0')
+				printer_data->hit_nul = true;
+			else
+				printer_data->is_printable = false;
+		} else {
+			printed += fprintf(fp, "%c", ch);
+		}
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		printed += fprintf(fp, " ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		printed += fprintf(fp, "\n");
+		printer_data->line_no++;
+		break;
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+
+	return printed;
+}
+
+static int perf_sample__fprintf_bpf_output(struct perf_sample *sample, FILE *fp)
+{
+	unsigned int nr_bytes = sample->raw_size;
+	struct printer_data printer_data = {0, false, true};
+	int printed = binary__fprintf(sample->raw_data, nr_bytes, 8,
+				      sample__fprintf_bpf_output, &printer_data, fp);
+
+	if (printer_data.is_printable && printer_data.hit_nul)
+		printed += fprintf(fp, "%17s \"%s\"\n", "BPF string:", (char *)(sample->raw_data));
+
+	return printed;
+}
+
+static int perf_sample__fprintf_spacing(int len, int spacing, FILE *fp)
+{
+	if (len > 0 && len < spacing)
+		return fprintf(fp, "%*s", spacing - len, "");
+
+	return 0;
+}
+
+static int perf_sample__fprintf_pt_spacing(int len, FILE *fp)
+{
+	return perf_sample__fprintf_spacing(len, 34, fp);
+}
+
+static int perf_sample__fprintf_synth_ptwrite(struct perf_sample *sample, FILE *fp)
+{
+	struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return 0;
+
+	len = fprintf(fp, " IP: %u payload: %#" PRIx64 " ",
+		     data->ip, le64_to_cpu(data->payload));
+	return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_mwait(struct perf_sample *sample, FILE *fp)
+{
+	struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return 0;
+
+	len = fprintf(fp, " hints: %#x extensions: %#x ",
+		      data->hints, data->extensions);
+	return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_pwre(struct perf_sample *sample, FILE *fp)
+{
+	struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return 0;
+
+	len = fprintf(fp, " hw: %u cstate: %u sub-cstate: %u ",
+		      data->hw, data->cstate, data->subcstate);
+	return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_exstop(struct perf_sample *sample, FILE *fp)
+{
+	struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return 0;
+
+	len = fprintf(fp, " IP: %u ", data->ip);
+	return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_pwrx(struct perf_sample *sample, FILE *fp)
+{
+	struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return 0;
+
+	len = fprintf(fp, " deepest cstate: %u last cstate: %u wake reason: %#x ",
+		     data->deepest_cstate, data->last_cstate,
+		     data->wake_reason);
+	return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_cbr(struct perf_sample *sample, FILE *fp)
+{
+	struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
+	unsigned int percent, freq;
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return 0;
+
+	freq = (le32_to_cpu(data->freq) + 500) / 1000;
+	len = fprintf(fp, " cbr: %2u freq: %4u MHz ", data->cbr, freq);
+	if (data->max_nonturbo) {
+		percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
+		len += fprintf(fp, "(%3u%%) ", percent);
+	}
+	return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth(struct perf_sample *sample,
+				      struct perf_evsel *evsel, FILE *fp)
+{
+	switch (evsel->attr.config) {
+	case PERF_SYNTH_INTEL_PTWRITE:
+		return perf_sample__fprintf_synth_ptwrite(sample, fp);
+	case PERF_SYNTH_INTEL_MWAIT:
+		return perf_sample__fprintf_synth_mwait(sample, fp);
+	case PERF_SYNTH_INTEL_PWRE:
+		return perf_sample__fprintf_synth_pwre(sample, fp);
+	case PERF_SYNTH_INTEL_EXSTOP:
+		return perf_sample__fprintf_synth_exstop(sample, fp);
+	case PERF_SYNTH_INTEL_PWRX:
+		return perf_sample__fprintf_synth_pwrx(sample, fp);
+	case PERF_SYNTH_INTEL_CBR:
+		return perf_sample__fprintf_synth_cbr(sample, fp);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+struct perf_script {
+	struct perf_tool	tool;
+	struct perf_session	*session;
+	bool			show_task_events;
+	bool			show_mmap_events;
+	bool			show_switch_events;
+	bool			show_namespace_events;
+	bool			show_lost_events;
+	bool			show_round_events;
+	bool			allocated;
+	bool			per_event_dump;
+	struct cpu_map		*cpus;
+	struct thread_map	*threads;
+	int			name_width;
+	const char              *time_str;
+	struct perf_time_interval *ptime_range;
+	int			range_size;
+	int			range_num;
+};
+
+static int perf_evlist__max_name_len(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	int max = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		int len = strlen(perf_evsel__name(evsel));
+
+		max = MAX(len, max);
+	}
+
+	return max;
+}
+
+static int data_src__fprintf(u64 data_src, FILE *fp)
+{
+	struct mem_info mi = { .data_src.val = data_src };
+	char decode[100];
+	char out[100];
+	static int maxlen;
+	int len;
+
+	perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+	len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+	if (maxlen < len)
+		maxlen = len;
+
+	return fprintf(fp, "%-*s", maxlen, out);
+}
+
+struct metric_ctx {
+	struct perf_sample	*sample;
+	struct thread		*thread;
+	struct perf_evsel	*evsel;
+	FILE 			*fp;
+};
+
+static void script_print_metric(void *ctx, const char *color,
+			        const char *fmt,
+			        const char *unit, double val)
+{
+	struct metric_ctx *mctx = ctx;
+
+	if (!fmt)
+		return;
+	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+				   PERF_RECORD_SAMPLE, mctx->fp);
+	fputs("\tmetric: ", mctx->fp);
+	if (color)
+		color_fprintf(mctx->fp, color, fmt, val);
+	else
+		printf(fmt, val);
+	fprintf(mctx->fp, " %s\n", unit);
+}
+
+static void script_new_line(void *ctx)
+{
+	struct metric_ctx *mctx = ctx;
+
+	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+				   PERF_RECORD_SAMPLE, mctx->fp);
+	fputs("\tmetric: ", mctx->fp);
+}
+
+static void perf_sample__fprint_metric(struct perf_script *script,
+				       struct thread *thread,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       FILE *fp)
+{
+	struct perf_stat_output_ctx ctx = {
+		.print_metric = script_print_metric,
+		.new_line = script_new_line,
+		.ctx = &(struct metric_ctx) {
+				.sample = sample,
+				.thread = thread,
+				.evsel  = evsel,
+				.fp     = fp,
+			 },
+		.force_header = false,
+	};
+	struct perf_evsel *ev2;
+	static bool init;
+	u64 val;
+
+	if (!init) {
+		perf_stat__init_shadow_stats();
+		init = true;
+	}
+	if (!evsel->stats)
+		perf_evlist__alloc_stats(script->session->evlist, false);
+	if (evsel_script(evsel->leader)->gnum++ == 0)
+		perf_stat__reset_shadow_stats();
+	val = sample->period * evsel->scale;
+	perf_stat__update_shadow_stats(evsel,
+				       val,
+				       sample->cpu,
+				       &rt_stat);
+	evsel_script(evsel)->val = val;
+	if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
+		for_each_group_member (ev2, evsel->leader) {
+			perf_stat__print_shadow_stats(ev2,
+						      evsel_script(ev2)->val,
+						      sample->cpu,
+						      &ctx,
+						      NULL,
+						      &rt_stat);
+		}
+		evsel_script(evsel->leader)->gnum = 0;
+	}
+}
+
+static void process_event(struct perf_script *script,
+			  struct perf_sample *sample, struct perf_evsel *evsel,
+			  struct addr_location *al,
+			  struct machine *machine)
+{
+	struct thread *thread = al->thread;
+	struct perf_event_attr *attr = &evsel->attr;
+	unsigned int type = output_type(attr->type);
+	struct perf_evsel_script *es = evsel->priv;
+	FILE *fp = es->fp;
+
+	if (output[type].fields == 0)
+		return;
+
+	++es->samples;
+
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_SAMPLE, fp);
+
+	if (PRINT_FIELD(PERIOD))
+		fprintf(fp, "%10" PRIu64 " ", sample->period);
+
+	if (PRINT_FIELD(EVNAME)) {
+		const char *evname = perf_evsel__name(evsel);
+
+		if (!script->name_width)
+			script->name_width = perf_evlist__max_name_len(script->session->evlist);
+
+		fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
+	}
+
+	if (print_flags)
+		perf_sample__fprintf_flags(sample->flags, fp);
+
+	if (is_bts_event(attr)) {
+		perf_sample__fprintf_bts(sample, evsel, thread, al, machine, fp);
+		return;
+	}
+
+	if (PRINT_FIELD(TRACE)) {
+		event_format__fprintf(evsel->tp_format, sample->cpu,
+				      sample->raw_data, sample->raw_size, fp);
+	}
+
+	if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
+		perf_sample__fprintf_synth(sample, evsel, fp);
+
+	if (PRINT_FIELD(ADDR))
+		perf_sample__fprintf_addr(sample, thread, attr, fp);
+
+	if (PRINT_FIELD(DATA_SRC))
+		data_src__fprintf(sample->data_src, fp);
+
+	if (PRINT_FIELD(WEIGHT))
+		fprintf(fp, "%16" PRIu64, sample->weight);
+
+	if (PRINT_FIELD(IP)) {
+		struct callchain_cursor *cursor = NULL;
+
+		if (symbol_conf.use_callchain && sample->callchain &&
+		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+					      sample, NULL, NULL, scripting_max_stack) == 0)
+			cursor = &callchain_cursor;
+
+		fputc(cursor ? '\n' : ' ', fp);
+		sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp);
+	}
+
+	if (PRINT_FIELD(IREGS))
+		perf_sample__fprintf_iregs(sample, attr, fp);
+
+	if (PRINT_FIELD(UREGS))
+		perf_sample__fprintf_uregs(sample, attr, fp);
+
+	if (PRINT_FIELD(BRSTACK))
+		perf_sample__fprintf_brstack(sample, thread, attr, fp);
+	else if (PRINT_FIELD(BRSTACKSYM))
+		perf_sample__fprintf_brstacksym(sample, thread, attr, fp);
+	else if (PRINT_FIELD(BRSTACKOFF))
+		perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
+
+	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+		perf_sample__fprintf_bpf_output(sample, fp);
+	perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+
+	if (PRINT_FIELD(PHYS_ADDR))
+		fprintf(fp, "%16" PRIx64, sample->phys_addr);
+	fprintf(fp, "\n");
+
+	if (PRINT_FIELD(METRIC))
+		perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+}
+
+static struct scripting_ops	*scripting_ops;
+
+static void __process_stat(struct perf_evsel *counter, u64 tstamp)
+{
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = perf_evsel__nr_cpus(counter);
+	int cpu, thread;
+	static int header_printed;
+
+	if (counter->system_wide)
+		nthreads = 1;
+
+	if (!header_printed) {
+		printf("%3s %8s %15s %15s %15s %15s %s\n",
+		       "CPU", "THREAD", "VAL", "ENA", "RUN", "TIME", "EVENT");
+		header_printed = 1;
+	}
+
+	for (thread = 0; thread < nthreads; thread++) {
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			struct perf_counts_values *counts;
+
+			counts = perf_counts(counter->counts, cpu, thread);
+
+			printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
+				counter->cpus->map[cpu],
+				thread_map__pid(counter->threads, thread),
+				counts->val,
+				counts->ena,
+				counts->run,
+				tstamp,
+				perf_evsel__name(counter));
+		}
+	}
+}
+
+static void process_stat(struct perf_evsel *counter, u64 tstamp)
+{
+	if (scripting_ops && scripting_ops->process_stat)
+		scripting_ops->process_stat(&stat_config, counter, tstamp);
+	else
+		__process_stat(counter, tstamp);
+}
+
+static void process_stat_interval(u64 tstamp)
+{
+	if (scripting_ops && scripting_ops->process_stat_interval)
+		scripting_ops->process_stat_interval(tstamp);
+}
+
+static void setup_scripting(void)
+{
+	setup_perl_scripting();
+	setup_python_scripting();
+}
+
+static int flush_scripting(void)
+{
+	return scripting_ops ? scripting_ops->flush_script() : 0;
+}
+
+static int cleanup_scripting(void)
+{
+	pr_debug("\nperf script stopped\n");
+
+	return scripting_ops ? scripting_ops->stop_script() : 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct perf_script *scr = container_of(tool, struct perf_script, tool);
+	struct addr_location al;
+
+	if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+					  sample->time)) {
+		return 0;
+	}
+
+	if (debug_mode) {
+		if (sample->time < last_timestamp) {
+			pr_err("Samples misordered, previous: %" PRIu64
+				" this: %" PRIu64 "\n", last_timestamp,
+				sample->time);
+			nr_unordered++;
+		}
+		last_timestamp = sample->time;
+		return 0;
+	}
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		pr_err("problem processing %d event, skipping it.\n",
+		       event->header.type);
+		return -1;
+	}
+
+	if (al.filtered)
+		goto out_put;
+
+	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+		goto out_put;
+
+	if (scripting_ops)
+		scripting_ops->process_event(event, sample, evsel, &al);
+	else
+		process_event(scr, sample, evsel, &al, machine);
+
+out_put:
+	addr_location__put(&al);
+	return 0;
+}
+
+static int process_attr(struct perf_tool *tool, union perf_event *event,
+			struct perf_evlist **pevlist)
+{
+	struct perf_script *scr = container_of(tool, struct perf_script, tool);
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel, *pos;
+	int err;
+
+	err = perf_event__process_attr(tool, event, pevlist);
+	if (err)
+		return err;
+
+	evlist = *pevlist;
+	evsel = perf_evlist__last(*pevlist);
+
+	if (evsel->attr.type >= PERF_TYPE_MAX &&
+	    evsel->attr.type != PERF_TYPE_SYNTH)
+		return 0;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (pos->attr.type == evsel->attr.type && pos != evsel)
+			return 0;
+	}
+
+	set_print_ip_opts(&evsel->attr);
+
+	if (evsel->attr.sample_type)
+		err = perf_evsel__check_attr(evsel, scr->session);
+
+	return err;
+}
+
+static int process_comm_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct machine *machine)
+{
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	int ret = -1;
+
+	thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
+	if (thread == NULL) {
+		pr_debug("problem processing COMM event, skipping it.\n");
+		return -1;
+	}
+
+	if (perf_event__process_comm(tool, event, sample, machine) < 0)
+		goto out;
+
+	if (!evsel->attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = 0;
+		sample->tid = event->comm.tid;
+		sample->pid = event->comm.pid;
+	}
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_COMM, stdout);
+	perf_event__fprintf(event, stdout);
+	ret = 0;
+out:
+	thread__put(thread);
+	return ret;
+}
+
+static int process_namespaces_event(struct perf_tool *tool,
+				    union perf_event *event,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	int ret = -1;
+
+	thread = machine__findnew_thread(machine, event->namespaces.pid,
+					 event->namespaces.tid);
+	if (thread == NULL) {
+		pr_debug("problem processing NAMESPACES event, skipping it.\n");
+		return -1;
+	}
+
+	if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
+		goto out;
+
+	if (!evsel->attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = 0;
+		sample->tid = event->namespaces.tid;
+		sample->pid = event->namespaces.pid;
+	}
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_NAMESPACES, stdout);
+	perf_event__fprintf(event, stdout);
+	ret = 0;
+out:
+	thread__put(thread);
+	return ret;
+}
+
+static int process_fork_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct machine *machine)
+{
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+	if (perf_event__process_fork(tool, event, sample, machine) < 0)
+		return -1;
+
+	thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
+	if (thread == NULL) {
+		pr_debug("problem processing FORK event, skipping it.\n");
+		return -1;
+	}
+
+	if (!evsel->attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = event->fork.time;
+		sample->tid = event->fork.tid;
+		sample->pid = event->fork.pid;
+	}
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_FORK, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+
+	return 0;
+}
+static int process_exit_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct machine *machine)
+{
+	int err = 0;
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+	thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
+	if (thread == NULL) {
+		pr_debug("problem processing EXIT event, skipping it.\n");
+		return -1;
+	}
+
+	if (!evsel->attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = 0;
+		sample->tid = event->fork.tid;
+		sample->pid = event->fork.pid;
+	}
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_EXIT, stdout);
+	perf_event__fprintf(event, stdout);
+
+	if (perf_event__process_exit(tool, event, sample, machine) < 0)
+		err = -1;
+
+	thread__put(thread);
+	return err;
+}
+
+static int process_mmap_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct machine *machine)
+{
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+	if (perf_event__process_mmap(tool, event, sample, machine) < 0)
+		return -1;
+
+	thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid);
+	if (thread == NULL) {
+		pr_debug("problem processing MMAP event, skipping it.\n");
+		return -1;
+	}
+
+	if (!evsel->attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = 0;
+		sample->tid = event->mmap.tid;
+		sample->pid = event->mmap.pid;
+	}
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_MMAP, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+	return 0;
+}
+
+static int process_mmap2_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample,
+			      struct machine *machine)
+{
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+	if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
+		return -1;
+
+	thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid);
+	if (thread == NULL) {
+		pr_debug("problem processing MMAP2 event, skipping it.\n");
+		return -1;
+	}
+
+	if (!evsel->attr.sample_id_all) {
+		sample->cpu = 0;
+		sample->time = 0;
+		sample->tid = event->mmap2.tid;
+		sample->pid = event->mmap2.pid;
+	}
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_MMAP2, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+	return 0;
+}
+
+static int process_switch_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct machine *machine)
+{
+	struct thread *thread;
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+	if (perf_event__process_switch(tool, event, sample, machine) < 0)
+		return -1;
+
+	thread = machine__findnew_thread(machine, sample->pid,
+					 sample->tid);
+	if (thread == NULL) {
+		pr_debug("problem processing SWITCH event, skipping it.\n");
+		return -1;
+	}
+
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_SWITCH, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+	return 0;
+}
+
+static int
+process_lost_event(struct perf_tool *tool,
+		   union perf_event *event,
+		   struct perf_sample *sample,
+		   struct machine *machine)
+{
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	struct thread *thread;
+
+	thread = machine__findnew_thread(machine, sample->pid,
+					 sample->tid);
+	if (thread == NULL)
+		return -1;
+
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_LOST, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+	return 0;
+}
+
+static int
+process_finished_round_event(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct ordered_events *oe __maybe_unused)
+
+{
+	perf_event__fprintf(event, stdout);
+	return 0;
+}
+
+static void sig_handler(int sig __maybe_unused)
+{
+	session_done = 1;
+}
+
+static void perf_script__fclose_per_event_dump(struct perf_script *script)
+{
+	struct perf_evlist *evlist = script->session->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->priv)
+			break;
+		perf_evsel_script__delete(evsel->priv);
+		evsel->priv = NULL;
+	}
+}
+
+static int perf_script__fopen_per_event_dump(struct perf_script *script)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(script->session->evlist, evsel) {
+		/*
+		 * Already setup? I.e. we may be called twice in cases like
+		 * Intel PT, one for the intel_pt// and dummy events, then
+		 * for the evsels syntheized from the auxtrace info.
+		 *
+		 * Ses perf_script__process_auxtrace_info.
+		 */
+		if (evsel->priv != NULL)
+			continue;
+
+		evsel->priv = perf_evsel_script__new(evsel, script->session->data);
+		if (evsel->priv == NULL)
+			goto out_err_fclose;
+	}
+
+	return 0;
+
+out_err_fclose:
+	perf_script__fclose_per_event_dump(script);
+	return -1;
+}
+
+static int perf_script__setup_per_event_dump(struct perf_script *script)
+{
+	struct perf_evsel *evsel;
+	static struct perf_evsel_script es_stdout;
+
+	if (script->per_event_dump)
+		return perf_script__fopen_per_event_dump(script);
+
+	es_stdout.fp = stdout;
+
+	evlist__for_each_entry(script->session->evlist, evsel)
+		evsel->priv = &es_stdout;
+
+	return 0;
+}
+
+static void perf_script__exit_per_event_dump_stats(struct perf_script *script)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(script->session->evlist, evsel) {
+		struct perf_evsel_script *es = evsel->priv;
+
+		perf_evsel_script__fprintf(es, stdout);
+		perf_evsel_script__delete(es);
+		evsel->priv = NULL;
+	}
+}
+
+static int __cmd_script(struct perf_script *script)
+{
+	int ret;
+
+	signal(SIGINT, sig_handler);
+
+	/* override event processing functions */
+	if (script->show_task_events) {
+		script->tool.comm = process_comm_event;
+		script->tool.fork = process_fork_event;
+		script->tool.exit = process_exit_event;
+	}
+	if (script->show_mmap_events) {
+		script->tool.mmap = process_mmap_event;
+		script->tool.mmap2 = process_mmap2_event;
+	}
+	if (script->show_switch_events)
+		script->tool.context_switch = process_switch_event;
+	if (script->show_namespace_events)
+		script->tool.namespaces = process_namespaces_event;
+	if (script->show_lost_events)
+		script->tool.lost = process_lost_event;
+	if (script->show_round_events) {
+		script->tool.ordered_events = false;
+		script->tool.finished_round = process_finished_round_event;
+	}
+
+	if (perf_script__setup_per_event_dump(script)) {
+		pr_err("Couldn't create the per event dump files\n");
+		return -1;
+	}
+
+	ret = perf_session__process_events(script->session);
+
+	if (script->per_event_dump)
+		perf_script__exit_per_event_dump_stats(script);
+
+	if (debug_mode)
+		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
+
+	return ret;
+}
+
+struct script_spec {
+	struct list_head	node;
+	struct scripting_ops	*ops;
+	char			spec[0];
+};
+
+static LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+					    struct scripting_ops *ops)
+{
+	struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+	if (s != NULL) {
+		strcpy(s->spec, spec);
+		s->ops = ops;
+	}
+
+	return s;
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+	list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+	struct script_spec *s;
+
+	list_for_each_entry(s, &script_specs, node)
+		if (strcasecmp(s->spec, spec) == 0)
+			return s;
+	return NULL;
+}
+
+int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+	struct script_spec *s;
+
+	s = script_spec__find(spec);
+	if (s)
+		return -1;
+
+	s = script_spec__new(spec, ops);
+	if (!s)
+		return -1;
+	else
+		script_spec__add(s);
+
+	return 0;
+}
+
+static struct scripting_ops *script_spec__lookup(const char *spec)
+{
+	struct script_spec *s = script_spec__find(spec);
+	if (!s)
+		return NULL;
+
+	return s->ops;
+}
+
+static void list_available_languages(void)
+{
+	struct script_spec *s;
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Scripting language extensions (used in "
+		"perf script -s [spec:]script.[spec]):\n\n");
+
+	list_for_each_entry(s, &script_specs, node)
+		fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
+
+	fprintf(stderr, "\n");
+}
+
+static int parse_scriptname(const struct option *opt __maybe_unused,
+			    const char *str, int unset __maybe_unused)
+{
+	char spec[PATH_MAX];
+	const char *script, *ext;
+	int len;
+
+	if (strcmp(str, "lang") == 0) {
+		list_available_languages();
+		exit(0);
+	}
+
+	script = strchr(str, ':');
+	if (script) {
+		len = script - str;
+		if (len >= PATH_MAX) {
+			fprintf(stderr, "invalid language specifier");
+			return -1;
+		}
+		strncpy(spec, str, len);
+		spec[len] = '\0';
+		scripting_ops = script_spec__lookup(spec);
+		if (!scripting_ops) {
+			fprintf(stderr, "invalid language specifier");
+			return -1;
+		}
+		script++;
+	} else {
+		script = str;
+		ext = strrchr(script, '.');
+		if (!ext) {
+			fprintf(stderr, "invalid script extension");
+			return -1;
+		}
+		scripting_ops = script_spec__lookup(++ext);
+		if (!scripting_ops) {
+			fprintf(stderr, "invalid script extension");
+			return -1;
+		}
+	}
+
+	script_name = strdup(script);
+
+	return 0;
+}
+
+static int parse_output_fields(const struct option *opt __maybe_unused,
+			    const char *arg, int unset __maybe_unused)
+{
+	char *tok, *strtok_saveptr = NULL;
+	int i, imax = ARRAY_SIZE(all_output_options);
+	int j;
+	int rc = 0;
+	char *str = strdup(arg);
+	int type = -1;
+	enum { DEFAULT, SET, ADD, REMOVE } change = DEFAULT;
+
+	if (!str)
+		return -ENOMEM;
+
+	/* first word can state for which event type the user is specifying
+	 * the fields. If no type exists, the specified fields apply to all
+	 * event types found in the file minus the invalid fields for a type.
+	 */
+	tok = strchr(str, ':');
+	if (tok) {
+		*tok = '\0';
+		tok++;
+		if (!strcmp(str, "hw"))
+			type = PERF_TYPE_HARDWARE;
+		else if (!strcmp(str, "sw"))
+			type = PERF_TYPE_SOFTWARE;
+		else if (!strcmp(str, "trace"))
+			type = PERF_TYPE_TRACEPOINT;
+		else if (!strcmp(str, "raw"))
+			type = PERF_TYPE_RAW;
+		else if (!strcmp(str, "break"))
+			type = PERF_TYPE_BREAKPOINT;
+		else if (!strcmp(str, "synth"))
+			type = OUTPUT_TYPE_SYNTH;
+		else {
+			fprintf(stderr, "Invalid event type in field string.\n");
+			rc = -EINVAL;
+			goto out;
+		}
+
+		if (output[type].user_set)
+			pr_warning("Overriding previous field request for %s events.\n",
+				   event_type(type));
+
+		output[type].fields = 0;
+		output[type].user_set = true;
+		output[type].wildcard_set = false;
+
+	} else {
+		tok = str;
+		if (strlen(str) == 0) {
+			fprintf(stderr,
+				"Cannot set fields to 'none' for all event types.\n");
+			rc = -EINVAL;
+			goto out;
+		}
+
+		/* Don't override defaults for +- */
+		if (strchr(str, '+') || strchr(str, '-'))
+			goto parse;
+
+		if (output_set_by_user())
+			pr_warning("Overriding previous field request for all events.\n");
+
+		for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+			output[j].fields = 0;
+			output[j].user_set = true;
+			output[j].wildcard_set = true;
+		}
+	}
+
+parse:
+	for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) {
+		if (*tok == '+') {
+			if (change == SET)
+				goto out_badmix;
+			change = ADD;
+			tok++;
+		} else if (*tok == '-') {
+			if (change == SET)
+				goto out_badmix;
+			change = REMOVE;
+			tok++;
+		} else {
+			if (change != SET && change != DEFAULT)
+				goto out_badmix;
+			change = SET;
+		}
+
+		for (i = 0; i < imax; ++i) {
+			if (strcmp(tok, all_output_options[i].str) == 0)
+				break;
+		}
+		if (i == imax && strcmp(tok, "flags") == 0) {
+			print_flags = change == REMOVE ? false : true;
+			continue;
+		}
+		if (i == imax) {
+			fprintf(stderr, "Invalid field requested.\n");
+			rc = -EINVAL;
+			goto out;
+		}
+
+		if (type == -1) {
+			/* add user option to all events types for
+			 * which it is valid
+			 */
+			for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+				if (output[j].invalid_fields & all_output_options[i].field) {
+					pr_warning("\'%s\' not valid for %s events. Ignoring.\n",
+						   all_output_options[i].str, event_type(j));
+				} else {
+					if (change == REMOVE)
+						output[j].fields &= ~all_output_options[i].field;
+					else
+						output[j].fields |= all_output_options[i].field;
+				}
+			}
+		} else {
+			if (output[type].invalid_fields & all_output_options[i].field) {
+				fprintf(stderr, "\'%s\' not valid for %s events.\n",
+					 all_output_options[i].str, event_type(type));
+
+				rc = -EINVAL;
+				goto out;
+			}
+			output[type].fields |= all_output_options[i].field;
+		}
+	}
+
+	if (type >= 0) {
+		if (output[type].fields == 0) {
+			pr_debug("No fields requested for %s type. "
+				 "Events will not be displayed.\n", event_type(type));
+		}
+	}
+	goto out;
+
+out_badmix:
+	fprintf(stderr, "Cannot mix +-field with overridden fields\n");
+	rc = -EINVAL;
+out:
+	free(str);
+	return rc;
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent)		\
+	while ((lang_dirent = readdir(scripts_dir)) != NULL)		\
+		if ((lang_dirent->d_type == DT_DIR ||			\
+		     (lang_dirent->d_type == DT_UNKNOWN &&		\
+		      is_directory(scripts_path, lang_dirent))) &&	\
+		    (strcmp(lang_dirent->d_name, ".")) &&		\
+		    (strcmp(lang_dirent->d_name, "..")))
+
+#define for_each_script(lang_path, lang_dir, script_dirent)		\
+	while ((script_dirent = readdir(lang_dir)) != NULL)		\
+		if (script_dirent->d_type != DT_DIR &&			\
+		    (script_dirent->d_type != DT_UNKNOWN ||		\
+		     !is_directory(lang_path, script_dirent)))
+
+
+#define RECORD_SUFFIX			"-record"
+#define REPORT_SUFFIX			"-report"
+
+struct script_desc {
+	struct list_head	node;
+	char			*name;
+	char			*half_liner;
+	char			*args;
+};
+
+static LIST_HEAD(script_descs);
+
+static struct script_desc *script_desc__new(const char *name)
+{
+	struct script_desc *s = zalloc(sizeof(*s));
+
+	if (s != NULL && name)
+		s->name = strdup(name);
+
+	return s;
+}
+
+static void script_desc__delete(struct script_desc *s)
+{
+	zfree(&s->name);
+	zfree(&s->half_liner);
+	zfree(&s->args);
+	free(s);
+}
+
+static void script_desc__add(struct script_desc *s)
+{
+	list_add_tail(&s->node, &script_descs);
+}
+
+static struct script_desc *script_desc__find(const char *name)
+{
+	struct script_desc *s;
+
+	list_for_each_entry(s, &script_descs, node)
+		if (strcasecmp(s->name, name) == 0)
+			return s;
+	return NULL;
+}
+
+static struct script_desc *script_desc__findnew(const char *name)
+{
+	struct script_desc *s = script_desc__find(name);
+
+	if (s)
+		return s;
+
+	s = script_desc__new(name);
+	if (!s)
+		return NULL;
+
+	script_desc__add(s);
+
+	return s;
+}
+
+static const char *ends_with(const char *str, const char *suffix)
+{
+	size_t suffix_len = strlen(suffix);
+	const char *p = str;
+
+	if (strlen(str) > suffix_len) {
+		p = str + strlen(str) - suffix_len;
+		if (!strncmp(p, suffix, suffix_len))
+			return p;
+	}
+
+	return NULL;
+}
+
+static int read_script_info(struct script_desc *desc, const char *filename)
+{
+	char line[BUFSIZ], *p;
+	FILE *fp;
+
+	fp = fopen(filename, "r");
+	if (!fp)
+		return -1;
+
+	while (fgets(line, sizeof(line), fp)) {
+		p = ltrim(line);
+		if (strlen(p) == 0)
+			continue;
+		if (*p != '#')
+			continue;
+		p++;
+		if (strlen(p) && *p == '!')
+			continue;
+
+		p = ltrim(p);
+		if (strlen(p) && p[strlen(p) - 1] == '\n')
+			p[strlen(p) - 1] = '\0';
+
+		if (!strncmp(p, "description:", strlen("description:"))) {
+			p += strlen("description:");
+			desc->half_liner = strdup(ltrim(p));
+			continue;
+		}
+
+		if (!strncmp(p, "args:", strlen("args:"))) {
+			p += strlen("args:");
+			desc->args = strdup(ltrim(p));
+			continue;
+		}
+	}
+
+	fclose(fp);
+
+	return 0;
+}
+
+static char *get_script_root(struct dirent *script_dirent, const char *suffix)
+{
+	char *script_root, *str;
+
+	script_root = strdup(script_dirent->d_name);
+	if (!script_root)
+		return NULL;
+
+	str = (char *)ends_with(script_root, suffix);
+	if (!str) {
+		free(script_root);
+		return NULL;
+	}
+
+	*str = '\0';
+	return script_root;
+}
+
+static int list_available_scripts(const struct option *opt __maybe_unused,
+				  const char *s __maybe_unused,
+				  int unset __maybe_unused)
+{
+	struct dirent *script_dirent, *lang_dirent;
+	char scripts_path[MAXPATHLEN];
+	DIR *scripts_dir, *lang_dir;
+	char script_path[MAXPATHLEN];
+	char lang_path[MAXPATHLEN];
+	struct script_desc *desc;
+	char first_half[BUFSIZ];
+	char *script_root;
+
+	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
+
+	scripts_dir = opendir(scripts_path);
+	if (!scripts_dir) {
+		fprintf(stdout,
+			"open(%s) failed.\n"
+			"Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+			scripts_path);
+		exit(-1);
+	}
+
+	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
+		scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+			  lang_dirent->d_name);
+		lang_dir = opendir(lang_path);
+		if (!lang_dir)
+			continue;
+
+		for_each_script(lang_path, lang_dir, script_dirent) {
+			script_root = get_script_root(script_dirent, REPORT_SUFFIX);
+			if (script_root) {
+				desc = script_desc__findnew(script_root);
+				scnprintf(script_path, MAXPATHLEN, "%s/%s",
+					  lang_path, script_dirent->d_name);
+				read_script_info(desc, script_path);
+				free(script_root);
+			}
+		}
+	}
+
+	fprintf(stdout, "List of available trace scripts:\n");
+	list_for_each_entry(desc, &script_descs, node) {
+		sprintf(first_half, "%s %s", desc->name,
+			desc->args ? desc->args : "");
+		fprintf(stdout, "  %-36s %s\n", first_half,
+			desc->half_liner ? desc->half_liner : "");
+	}
+
+	exit(0);
+}
+
+/*
+ * Some scripts specify the required events in their "xxx-record" file,
+ * this function will check if the events in perf.data match those
+ * mentioned in the "xxx-record".
+ *
+ * Fixme: All existing "xxx-record" are all in good formats "-e event ",
+ * which is covered well now. And new parsing code should be added to
+ * cover the future complexing formats like event groups etc.
+ */
+static int check_ev_match(char *dir_name, char *scriptname,
+			struct perf_session *session)
+{
+	char filename[MAXPATHLEN], evname[128];
+	char line[BUFSIZ], *p;
+	struct perf_evsel *pos;
+	int match, len;
+	FILE *fp;
+
+	scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname);
+
+	fp = fopen(filename, "r");
+	if (!fp)
+		return -1;
+
+	while (fgets(line, sizeof(line), fp)) {
+		p = ltrim(line);
+		if (*p == '#')
+			continue;
+
+		while (strlen(p)) {
+			p = strstr(p, "-e");
+			if (!p)
+				break;
+
+			p += 2;
+			p = ltrim(p);
+			len = strcspn(p, " \t");
+			if (!len)
+				break;
+
+			snprintf(evname, len + 1, "%s", p);
+
+			match = 0;
+			evlist__for_each_entry(session->evlist, pos) {
+				if (!strcmp(perf_evsel__name(pos), evname)) {
+					match = 1;
+					break;
+				}
+			}
+
+			if (!match) {
+				fclose(fp);
+				return -1;
+			}
+		}
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+/*
+ * Return -1 if none is found, otherwise the actual scripts number.
+ *
+ * Currently the only user of this function is the script browser, which
+ * will list all statically runnable scripts, select one, execute it and
+ * show the output in a perf browser.
+ */
+int find_scripts(char **scripts_array, char **scripts_path_array)
+{
+	struct dirent *script_dirent, *lang_dirent;
+	char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
+	DIR *scripts_dir, *lang_dir;
+	struct perf_session *session;
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+	};
+	char *temp;
+	int i = 0;
+
+	session = perf_session__new(&data, false, NULL);
+	if (!session)
+		return -1;
+
+	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
+
+	scripts_dir = opendir(scripts_path);
+	if (!scripts_dir) {
+		perf_session__delete(session);
+		return -1;
+	}
+
+	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
+		scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
+			  lang_dirent->d_name);
+#ifndef HAVE_LIBPERL_SUPPORT
+		if (strstr(lang_path, "perl"))
+			continue;
+#endif
+#ifndef HAVE_LIBPYTHON_SUPPORT
+		if (strstr(lang_path, "python"))
+			continue;
+#endif
+
+		lang_dir = opendir(lang_path);
+		if (!lang_dir)
+			continue;
+
+		for_each_script(lang_path, lang_dir, script_dirent) {
+			/* Skip those real time scripts: xxxtop.p[yl] */
+			if (strstr(script_dirent->d_name, "top."))
+				continue;
+			sprintf(scripts_path_array[i], "%s/%s", lang_path,
+				script_dirent->d_name);
+			temp = strchr(script_dirent->d_name, '.');
+			snprintf(scripts_array[i],
+				(temp - script_dirent->d_name) + 1,
+				"%s", script_dirent->d_name);
+
+			if (check_ev_match(lang_path,
+					scripts_array[i], session))
+				continue;
+
+			i++;
+		}
+		closedir(lang_dir);
+	}
+
+	closedir(scripts_dir);
+	perf_session__delete(session);
+	return i;
+}
+
+static char *get_script_path(const char *script_root, const char *suffix)
+{
+	struct dirent *script_dirent, *lang_dirent;
+	char scripts_path[MAXPATHLEN];
+	char script_path[MAXPATHLEN];
+	DIR *scripts_dir, *lang_dir;
+	char lang_path[MAXPATHLEN];
+	char *__script_root;
+
+	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
+
+	scripts_dir = opendir(scripts_path);
+	if (!scripts_dir)
+		return NULL;
+
+	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
+		scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+			  lang_dirent->d_name);
+		lang_dir = opendir(lang_path);
+		if (!lang_dir)
+			continue;
+
+		for_each_script(lang_path, lang_dir, script_dirent) {
+			__script_root = get_script_root(script_dirent, suffix);
+			if (__script_root && !strcmp(script_root, __script_root)) {
+				free(__script_root);
+				closedir(lang_dir);
+				closedir(scripts_dir);
+				scnprintf(script_path, MAXPATHLEN, "%s/%s",
+					  lang_path, script_dirent->d_name);
+				return strdup(script_path);
+			}
+			free(__script_root);
+		}
+		closedir(lang_dir);
+	}
+	closedir(scripts_dir);
+
+	return NULL;
+}
+
+static bool is_top_script(const char *script_path)
+{
+	return ends_with(script_path, "top") == NULL ? false : true;
+}
+
+static int has_required_arg(char *script_path)
+{
+	struct script_desc *desc;
+	int n_args = 0;
+	char *p;
+
+	desc = script_desc__new(NULL);
+
+	if (read_script_info(desc, script_path))
+		goto out;
+
+	if (!desc->args)
+		goto out;
+
+	for (p = desc->args; *p; p++)
+		if (*p == '<')
+			n_args++;
+out:
+	script_desc__delete(desc);
+
+	return n_args;
+}
+
+static int have_cmd(int argc, const char **argv)
+{
+	char **__argv = malloc(sizeof(const char *) * argc);
+
+	if (!__argv) {
+		pr_err("malloc failed\n");
+		return -1;
+	}
+
+	memcpy(__argv, argv, sizeof(const char *) * argc);
+	argc = parse_options(argc, (const char **)__argv, record_options,
+			     NULL, PARSE_OPT_STOP_AT_NON_OPTION);
+	free(__argv);
+
+	system_wide = (argc == 0);
+
+	return 0;
+}
+
+static void script__setup_sample_type(struct perf_script *script)
+{
+	struct perf_session *session = script->session;
+	u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+		    (sample_type & PERF_SAMPLE_STACK_USER)) {
+			callchain_param.record_mode = CALLCHAIN_DWARF;
+			dwarf_callchain_users = true;
+		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			callchain_param.record_mode = CALLCHAIN_LBR;
+		else
+			callchain_param.record_mode = CALLCHAIN_FP;
+	}
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_session *session)
+{
+	struct stat_round_event *round = &event->stat_round;
+	struct perf_evsel *counter;
+
+	evlist__for_each_entry(session->evlist, counter) {
+		perf_stat_process_counter(&stat_config, counter);
+		process_stat(counter, round->time);
+	}
+
+	process_stat_interval(round->time);
+	return 0;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_session *session __maybe_unused)
+{
+	perf_event__read_stat_config(&stat_config, &event->stat_config);
+	return 0;
+}
+
+static int set_maps(struct perf_script *script)
+{
+	struct perf_evlist *evlist = script->session->evlist;
+
+	if (!script->cpus || !script->threads)
+		return 0;
+
+	if (WARN_ONCE(script->allocated, "stats double allocation\n"))
+		return -EINVAL;
+
+	perf_evlist__set_maps(evlist, script->cpus, script->threads);
+
+	if (perf_evlist__alloc_stats(evlist, true))
+		return -ENOMEM;
+
+	script->allocated = true;
+	return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_session *session __maybe_unused)
+{
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+	if (script->threads) {
+		pr_warning("Extra thread map event, ignoring.\n");
+		return 0;
+	}
+
+	script->threads = thread_map__new_event(&event->thread_map);
+	if (!script->threads)
+		return -ENOMEM;
+
+	return set_maps(script);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+			  union perf_event *event,
+			  struct perf_session *session __maybe_unused)
+{
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+	if (script->cpus) {
+		pr_warning("Extra cpu map event, ignoring.\n");
+		return 0;
+	}
+
+	script->cpus = cpu_map__new_data(&event->cpu_map.data);
+	if (!script->cpus)
+		return -ENOMEM;
+
+	return set_maps(script);
+}
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+static int perf_script__process_auxtrace_info(struct perf_tool *tool,
+					      union perf_event *event,
+					      struct perf_session *session)
+{
+	int ret = perf_event__process_auxtrace_info(tool, event, session);
+
+	if (ret == 0) {
+		struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+		ret = perf_script__setup_per_event_dump(script);
+	}
+
+	return ret;
+}
+#else
+#define perf_script__process_auxtrace_info 0
+#endif
+
+int cmd_script(int argc, const char **argv)
+{
+	bool show_full_info = false;
+	bool header = false;
+	bool header_only = false;
+	bool script_started = false;
+	char *rec_script_path = NULL;
+	char *rep_script_path = NULL;
+	struct perf_session *session;
+	struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+	char *script_path = NULL;
+	const char **__argv;
+	int i, j, err = 0;
+	struct perf_script script = {
+		.tool = {
+			.sample		 = process_sample_event,
+			.mmap		 = perf_event__process_mmap,
+			.mmap2		 = perf_event__process_mmap2,
+			.comm		 = perf_event__process_comm,
+			.namespaces	 = perf_event__process_namespaces,
+			.exit		 = perf_event__process_exit,
+			.fork		 = perf_event__process_fork,
+			.attr		 = process_attr,
+			.event_update   = perf_event__process_event_update,
+			.tracing_data	 = perf_event__process_tracing_data,
+			.feature	 = perf_event__process_feature,
+			.build_id	 = perf_event__process_build_id,
+			.id_index	 = perf_event__process_id_index,
+			.auxtrace_info	 = perf_script__process_auxtrace_info,
+			.auxtrace	 = perf_event__process_auxtrace,
+			.auxtrace_error	 = perf_event__process_auxtrace_error,
+			.stat		 = perf_event__process_stat_event,
+			.stat_round	 = process_stat_round_event,
+			.stat_config	 = process_stat_config_event,
+			.thread_map	 = process_thread_map_event,
+			.cpu_map	 = process_cpu_map_event,
+			.ordered_events	 = true,
+			.ordering_requires_timestamps = true,
+		},
+	};
+	struct perf_data data = {
+		.mode = PERF_DATA_MODE_READ,
+	};
+	const struct option options[] = {
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_INCR('v', "verbose", &verbose,
+		 "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('L', "Latency", &latency_format,
+		    "show latency attributes (irqs/preemption disabled, etc)"),
+	OPT_CALLBACK_NOOPT('l', "list", NULL, NULL, "list available scripts",
+			   list_available_scripts),
+	OPT_CALLBACK('s', "script", NULL, "name",
+		     "script file name (lang:script name, script name, or *)",
+		     parse_scriptname),
+	OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
+		   "generate perf-script.xx script in specified language"),
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_BOOLEAN('d', "debug-mode", &debug_mode,
+		   "do various checks like samples ordering and lost events"),
+	OPT_BOOLEAN(0, "header", &header, "Show data header."),
+	OPT_BOOLEAN(0, "header-only", &header_only, "Show only data header."),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+		   "file", "kallsyms pathname"),
+	OPT_BOOLEAN('G', "hide-call-graph", &no_callchain,
+		    "When printing symbols do not display call chain"),
+	OPT_CALLBACK(0, "symfs", NULL, "directory",
+		     "Look for files with symbols relative to this directory",
+		     symbol__config_symfs),
+	OPT_CALLBACK('F', "fields", NULL, "str",
+		     "comma separated output fields prepend with 'type:'. "
+		     "+field to add and -field to remove."
+		     "Valid types: hw,sw,trace,raw,synth. "
+		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
+		     "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags,"
+		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
+		     parse_output_fields),
+	OPT_BOOLEAN('a', "all-cpus", &system_wide,
+		    "system-wide collection from all CPUs"),
+	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+		   "only consider these symbols"),
+	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
+		   "Stop display of callgraph at these symbols"),
+	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
+	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+		   "only display events for these comms"),
+	OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+		   "only consider symbols in these pids"),
+	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+		   "only consider symbols in these tids"),
+	OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
+		     "Set the maximum stack depth when parsing the callchain, "
+		     "anything beyond the specified depth will be ignored. "
+		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+	OPT_BOOLEAN('I', "show-info", &show_full_info,
+		    "display extended information from perf.data file"),
+	OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
+		    "Show the path of [kernel.kallsyms]"),
+	OPT_BOOLEAN('\0', "show-task-events", &script.show_task_events,
+		    "Show the fork/comm/exit events"),
+	OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
+		    "Show the mmap events"),
+	OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
+		    "Show context switch events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
+		    "Show namespace events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
+		    "Show lost events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
+		    "Show round events (if recorded)"),
+	OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
+		    "Dump trace output to files named by the monitored events"),
+	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_INTEGER(0, "max-blocks", &max_blocks,
+		    "Maximum number of code blocks to dump with brstackinsn"),
+	OPT_BOOLEAN(0, "ns", &nanosecs,
+		    "Use 9 decimal places when displaying time"),
+	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+			    "Instruction Tracing options",
+			    itrace_parse_synth_opts),
+	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
+			"Show full source file name path for source lines"),
+	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+			"Enable symbol demangling"),
+	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+			"Enable kernel symbol demangling"),
+	OPT_STRING(0, "time", &script.time_str, "str",
+		   "Time span of interest (start,stop)"),
+	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+		    "Show inline function"),
+	OPT_END()
+	};
+	const char * const script_subcommands[] = { "record", "report", NULL };
+	const char *script_usage[] = {
+		"perf script [<options>]",
+		"perf script [<options>] record <script> [<record-options>] <command>",
+		"perf script [<options>] report <script> [script-args]",
+		"perf script [<options>] <script> [<record-options>] <command>",
+		"perf script [<options>] <top-script> [script-args]",
+		NULL
+	};
+
+	perf_set_singlethreaded();
+
+	setup_scripting();
+
+	argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	data.file.path = input_name;
+	data.force     = symbol_conf.force;
+
+	if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
+		rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
+		if (!rec_script_path)
+			return cmd_record(argc, argv);
+	}
+
+	if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) {
+		rep_script_path = get_script_path(argv[1], REPORT_SUFFIX);
+		if (!rep_script_path) {
+			fprintf(stderr,
+				"Please specify a valid report script"
+				"(see 'perf script -l' for listing)\n");
+			return -1;
+		}
+	}
+
+	if (itrace_synth_opts.callchain &&
+	    itrace_synth_opts.callchain_sz > scripting_max_stack)
+		scripting_max_stack = itrace_synth_opts.callchain_sz;
+
+	/* make sure PERF_EXEC_PATH is set for scripts */
+	set_argv_exec_path(get_argv_exec_path());
+
+	if (argc && !script_name && !rec_script_path && !rep_script_path) {
+		int live_pipe[2];
+		int rep_args;
+		pid_t pid;
+
+		rec_script_path = get_script_path(argv[0], RECORD_SUFFIX);
+		rep_script_path = get_script_path(argv[0], REPORT_SUFFIX);
+
+		if (!rec_script_path && !rep_script_path) {
+			usage_with_options_msg(script_usage, options,
+				"Couldn't find script `%s'\n\n See perf"
+				" script -l for available scripts.\n", argv[0]);
+		}
+
+		if (is_top_script(argv[0])) {
+			rep_args = argc - 1;
+		} else {
+			int rec_args;
+
+			rep_args = has_required_arg(rep_script_path);
+			rec_args = (argc - 1) - rep_args;
+			if (rec_args < 0) {
+				usage_with_options_msg(script_usage, options,
+					"`%s' script requires options."
+					"\n\n See perf script -l for available "
+					"scripts and options.\n", argv[0]);
+			}
+		}
+
+		if (pipe(live_pipe) < 0) {
+			perror("failed to create pipe");
+			return -1;
+		}
+
+		pid = fork();
+		if (pid < 0) {
+			perror("failed to fork");
+			return -1;
+		}
+
+		if (!pid) {
+			j = 0;
+
+			dup2(live_pipe[1], 1);
+			close(live_pipe[0]);
+
+			if (is_top_script(argv[0])) {
+				system_wide = true;
+			} else if (!system_wide) {
+				if (have_cmd(argc - rep_args, &argv[rep_args]) != 0) {
+					err = -1;
+					goto out;
+				}
+			}
+
+			__argv = malloc((argc + 6) * sizeof(const char *));
+			if (!__argv) {
+				pr_err("malloc failed\n");
+				err = -ENOMEM;
+				goto out;
+			}
+
+			__argv[j++] = "/bin/sh";
+			__argv[j++] = rec_script_path;
+			if (system_wide)
+				__argv[j++] = "-a";
+			__argv[j++] = "-q";
+			__argv[j++] = "-o";
+			__argv[j++] = "-";
+			for (i = rep_args + 1; i < argc; i++)
+				__argv[j++] = argv[i];
+			__argv[j++] = NULL;
+
+			execvp("/bin/sh", (char **)__argv);
+			free(__argv);
+			exit(-1);
+		}
+
+		dup2(live_pipe[0], 0);
+		close(live_pipe[1]);
+
+		__argv = malloc((argc + 4) * sizeof(const char *));
+		if (!__argv) {
+			pr_err("malloc failed\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
+		j = 0;
+		__argv[j++] = "/bin/sh";
+		__argv[j++] = rep_script_path;
+		for (i = 1; i < rep_args + 1; i++)
+			__argv[j++] = argv[i];
+		__argv[j++] = "-i";
+		__argv[j++] = "-";
+		__argv[j++] = NULL;
+
+		execvp("/bin/sh", (char **)__argv);
+		free(__argv);
+		exit(-1);
+	}
+
+	if (rec_script_path)
+		script_path = rec_script_path;
+	if (rep_script_path)
+		script_path = rep_script_path;
+
+	if (script_path) {
+		j = 0;
+
+		if (!rec_script_path)
+			system_wide = false;
+		else if (!system_wide) {
+			if (have_cmd(argc - 1, &argv[1]) != 0) {
+				err = -1;
+				goto out;
+			}
+		}
+
+		__argv = malloc((argc + 2) * sizeof(const char *));
+		if (!__argv) {
+			pr_err("malloc failed\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
+		__argv[j++] = "/bin/sh";
+		__argv[j++] = script_path;
+		if (system_wide)
+			__argv[j++] = "-a";
+		for (i = 2; i < argc; i++)
+			__argv[j++] = argv[i];
+		__argv[j++] = NULL;
+
+		execvp("/bin/sh", (char **)__argv);
+		free(__argv);
+		exit(-1);
+	}
+
+	if (!script_name)
+		setup_pager();
+
+	session = perf_session__new(&data, false, &script.tool);
+	if (session == NULL)
+		return -1;
+
+	if (header || header_only) {
+		script.tool.show_feat_hdr = SHOW_FEAT_HEADER;
+		perf_session__fprintf_info(session, stdout, show_full_info);
+		if (header_only)
+			goto out_delete;
+	}
+	if (show_full_info)
+		script.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
+
+	if (symbol__init(&session->header.env) < 0)
+		goto out_delete;
+
+	script.session = session;
+	script__setup_sample_type(&script);
+
+	if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+		itrace_synth_opts.thread_stack = true;
+
+	session->itrace_synth_opts = &itrace_synth_opts;
+
+	if (cpu_list) {
+		err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+		if (err < 0)
+			goto out_delete;
+		itrace_synth_opts.cpu_bitmap = cpu_bitmap;
+	}
+
+	if (!no_callchain)
+		symbol_conf.use_callchain = true;
+	else
+		symbol_conf.use_callchain = false;
+
+	if (session->tevent.pevent &&
+	    pevent_set_function_resolver(session->tevent.pevent,
+					 machine__resolve_kernel_addr,
+					 &session->machines.host) < 0) {
+		pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
+		err = -1;
+		goto out_delete;
+	}
+
+	if (generate_script_lang) {
+		struct stat perf_stat;
+		int input;
+
+		if (output_set_by_user()) {
+			fprintf(stderr,
+				"custom fields not supported for generated scripts");
+			err = -EINVAL;
+			goto out_delete;
+		}
+
+		input = open(data.file.path, O_RDONLY);	/* input_name */
+		if (input < 0) {
+			err = -errno;
+			perror("failed to open file");
+			goto out_delete;
+		}
+
+		err = fstat(input, &perf_stat);
+		if (err < 0) {
+			perror("failed to stat file");
+			goto out_delete;
+		}
+
+		if (!perf_stat.st_size) {
+			fprintf(stderr, "zero-sized file, nothing to do!\n");
+			goto out_delete;
+		}
+
+		scripting_ops = script_spec__lookup(generate_script_lang);
+		if (!scripting_ops) {
+			fprintf(stderr, "invalid language specifier");
+			err = -ENOENT;
+			goto out_delete;
+		}
+
+		err = scripting_ops->generate_script(session->tevent.pevent,
+						     "perf-script");
+		goto out_delete;
+	}
+
+	if (script_name) {
+		err = scripting_ops->start_script(script_name, argc, argv);
+		if (err)
+			goto out_delete;
+		pr_debug("perf script started with script %s\n\n", script_name);
+		script_started = true;
+	}
+
+
+	err = perf_session__check_output_opt(session);
+	if (err < 0)
+		goto out_delete;
+
+	script.ptime_range = perf_time__range_alloc(script.time_str,
+						    &script.range_size);
+	if (!script.ptime_range) {
+		err = -ENOMEM;
+		goto out_delete;
+	}
+
+	/* needs to be parsed after looking up reference time */
+	if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
+		if (session->evlist->first_sample_time == 0 &&
+		    session->evlist->last_sample_time == 0) {
+			pr_err("HINT: no first/last sample time found in perf data.\n"
+			       "Please use latest perf binary to execute 'perf record'\n"
+			       "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
+			err = -EINVAL;
+			goto out_delete;
+		}
+
+		script.range_num = perf_time__percent_parse_str(
+					script.ptime_range, script.range_size,
+					script.time_str,
+					session->evlist->first_sample_time,
+					session->evlist->last_sample_time);
+
+		if (script.range_num < 0) {
+			pr_err("Invalid time string\n");
+			err = -EINVAL;
+			goto out_delete;
+		}
+	} else {
+		script.range_num = 1;
+	}
+
+	err = __cmd_script(&script);
+
+	flush_scripting();
+
+out_delete:
+	zfree(&script.ptime_range);
+
+	perf_evlist__free_stats(session->evlist);
+	perf_session__delete(session);
+
+	if (script_started)
+		cleanup_scripting();
+out:
+	return err;
+}
diff --git a/builtin-stat.c b/builtin-stat.c
new file mode 100644
index 0000000..f17dc60
--- /dev/null
+++ b/builtin-stat.c
@@ -0,0 +1,3071 @@
+/*
+ * builtin-stat.c
+ *
+ * Builtin stat command: Give a precise performance counters summary
+ * overview about any workload, CPU or specific PID.
+ *
+ * Sample output:
+
+   $ perf stat ./hackbench 10
+
+  Time: 0.118
+
+  Performance counter stats for './hackbench 10':
+
+       1708.761321 task-clock                #   11.037 CPUs utilized
+            41,190 context-switches          #    0.024 M/sec
+             6,735 CPU-migrations            #    0.004 M/sec
+            17,318 page-faults               #    0.010 M/sec
+     5,205,202,243 cycles                    #    3.046 GHz
+     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
+     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
+     2,603,501,247 instructions              #    0.50  insns per cycle
+                                             #    1.48  stalled cycles per insn
+       484,357,498 branches                  #  283.455 M/sec
+         6,388,934 branch-misses             #    1.32% of all branches
+
+        0.154822978  seconds time elapsed
+
+ *
+ * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *   Jaswinder Singh Rajput <jaswinder@kernel.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "perf.h"
+#include "builtin.h"
+#include "util/cgroup.h"
+#include "util/util.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/pmu.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/debug.h"
+#include "util/drv_configs.h"
+#include "util/color.h"
+#include "util/stat.h"
+#include "util/header.h"
+#include "util/cpumap.h"
+#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/counts.h"
+#include "util/group.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/string2.h"
+#include "util/metricgroup.h"
+#include "asm/bug.h"
+
+#include <linux/time64.h>
+#include <api/fs/fs.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <math.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sane_ctype.h"
+
+#define DEFAULT_SEPARATOR	" "
+#define CNTR_NOT_SUPPORTED	"<not supported>"
+#define CNTR_NOT_COUNTED	"<not counted>"
+#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
+
+static void print_counters(struct timespec *ts, int argc, const char **argv);
+
+/* Default events used for perf stat -T */
+static const char *transaction_attrs = {
+	"task-clock,"
+	"{"
+	"instructions,"
+	"cycles,"
+	"cpu/cycles-t/,"
+	"cpu/tx-start/,"
+	"cpu/el-start/,"
+	"cpu/cycles-ct/"
+	"}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * transaction_limited_attrs = {
+	"task-clock,"
+	"{"
+	"instructions,"
+	"cycles,"
+	"cpu/cycles-t/,"
+	"cpu/tx-start/"
+	"}"
+};
+
+static const char * topdown_attrs[] = {
+	"topdown-total-slots",
+	"topdown-slots-retired",
+	"topdown-recovery-bubbles",
+	"topdown-fetch-bubbles",
+	"topdown-slots-issued",
+	NULL,
+};
+
+static const char *smi_cost_attrs = {
+	"{"
+	"msr/aperf/,"
+	"msr/smi/,"
+	"cycles"
+	"}"
+};
+
+static struct perf_evlist	*evsel_list;
+
+static struct rblist		 metric_events;
+
+static struct target target = {
+	.uid	= UINT_MAX,
+};
+
+typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
+
+static int			run_count			=  1;
+static bool			no_inherit			= false;
+static volatile pid_t		child_pid			= -1;
+static bool			null_run			=  false;
+static int			detailed_run			=  0;
+static bool			transaction_run;
+static bool			topdown_run			= false;
+static bool			smi_cost			= false;
+static bool			smi_reset			= false;
+static bool			big_num				=  true;
+static int			big_num_opt			=  -1;
+static const char		*csv_sep			= NULL;
+static bool			csv_output			= false;
+static bool			group				= false;
+static const char		*pre_cmd			= NULL;
+static const char		*post_cmd			= NULL;
+static bool			sync_run			= false;
+static unsigned int		initial_delay			= 0;
+static unsigned int		unit_width			= 4; /* strlen("unit") */
+static bool			forever				= false;
+static bool			metric_only			= false;
+static bool			force_metric_only		= false;
+static bool			no_merge			= false;
+static struct timespec		ref_time;
+static struct cpu_map		*aggr_map;
+static aggr_get_id_t		aggr_get_id;
+static bool			append_file;
+static bool			interval_count;
+static const char		*output_name;
+static int			output_fd;
+static int			print_free_counters_hint;
+static int			print_mixed_hw_group_error;
+
+struct perf_stat {
+	bool			 record;
+	struct perf_data	 data;
+	struct perf_session	*session;
+	u64			 bytes_written;
+	struct perf_tool	 tool;
+	bool			 maps_allocated;
+	struct cpu_map		*cpus;
+	struct thread_map	*threads;
+	enum aggr_mode		 aggr_mode;
+};
+
+static struct perf_stat		perf_stat;
+#define STAT_RECORD		perf_stat.record
+
+static volatile int done = 0;
+
+static struct perf_stat_config stat_config = {
+	.aggr_mode	= AGGR_GLOBAL,
+	.scale		= true,
+};
+
+static bool is_duration_time(struct perf_evsel *evsel)
+{
+	return !strcmp(evsel->name, "duration_time");
+}
+
+static inline void diff_timespec(struct timespec *r, struct timespec *a,
+				 struct timespec *b)
+{
+	r->tv_sec = a->tv_sec - b->tv_sec;
+	if (a->tv_nsec < b->tv_nsec) {
+		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
+		r->tv_sec--;
+	} else {
+		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
+	}
+}
+
+static void perf_stat__reset_stats(void)
+{
+	int i;
+
+	perf_evlist__reset_stats(evsel_list);
+	perf_stat__reset_shadow_stats();
+
+	for (i = 0; i < stat_config.stats_num; i++)
+		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
+}
+
+static int create_perf_stat_counter(struct perf_evsel *evsel)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	struct perf_evsel *leader = evsel->leader;
+
+	if (stat_config.scale) {
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
+	/*
+	 * The event is part of non trivial group, let's enable
+	 * the group read (for leader) and ID retrieval for all
+	 * members.
+	 */
+	if (leader->nr_members > 1)
+		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+	attr->inherit = !no_inherit;
+
+	/*
+	 * Some events get initialized with sample_(period/type) set,
+	 * like tracepoints. Clear it up for counting.
+	 */
+	attr->sample_period = 0;
+
+	/*
+	 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+	 * while avoiding that older tools show confusing messages.
+	 *
+	 * However for pipe sessions we need to keep it zero,
+	 * because script's perf_evsel__check_attr is triggered
+	 * by attr->sample_type != 0, and we can't run it on
+	 * stat sessions.
+	 */
+	if (!(STAT_RECORD && perf_stat.data.is_pipe))
+		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+	/*
+	 * Disabling all counters initially, they will be enabled
+	 * either manually by us or by kernel via enable_on_exec
+	 * set later.
+	 */
+	if (perf_evsel__is_group_leader(evsel)) {
+		attr->disabled = 1;
+
+		/*
+		 * In case of initial_delay we enable tracee
+		 * events manually.
+		 */
+		if (target__none(&target) && !initial_delay)
+			attr->enable_on_exec = 1;
+	}
+
+	if (target__has_cpu(&target) && !target__has_per_thread(&target))
+		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+
+	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
+}
+
+/*
+ * Does the counter have nsecs as a unit?
+ */
+static inline int nsec_counter(struct perf_evsel *evsel)
+{
+	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
+		return 1;
+
+	return 0;
+}
+
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
+		pr_err("failed to write perf data, error: %m\n");
+		return -1;
+	}
+
+	perf_stat.bytes_written += event->header.size;
+	return 0;
+}
+
+static int write_stat_round_event(u64 tm, u64 type)
+{
+	return perf_event__synthesize_stat_round(NULL, tm, type,
+						 process_synthesized_event,
+						 NULL);
+}
+
+#define WRITE_STAT_ROUND_EVENT(time, interval) \
+	write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static int
+perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
+			     struct perf_counts_values *count)
+{
+	struct perf_sample_id *sid = SID(counter, cpu, thread);
+
+	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
+					   process_synthesized_event, NULL);
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static int read_counter(struct perf_evsel *counter)
+{
+	int nthreads = thread_map__nr(evsel_list->threads);
+	int ncpus, cpu, thread;
+
+	if (target__has_cpu(&target) && !target__has_per_thread(&target))
+		ncpus = perf_evsel__nr_cpus(counter);
+	else
+		ncpus = 1;
+
+	if (!counter->supported)
+		return -ENOENT;
+
+	if (counter->system_wide)
+		nthreads = 1;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			struct perf_counts_values *count;
+
+			count = perf_counts(counter->counts, cpu, thread);
+
+			/*
+			 * The leader's group read loads data into its group members
+			 * (via perf_evsel__read_counter) and sets threir count->loaded.
+			 */
+			if (!count->loaded &&
+			    perf_evsel__read_counter(counter, cpu, thread)) {
+				counter->counts->scaled = -1;
+				perf_counts(counter->counts, cpu, thread)->ena = 0;
+				perf_counts(counter->counts, cpu, thread)->run = 0;
+				return -1;
+			}
+
+			count->loaded = false;
+
+			if (STAT_RECORD) {
+				if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+					pr_err("failed to write stat event\n");
+					return -1;
+				}
+			}
+
+			if (verbose > 1) {
+				fprintf(stat_config.output,
+					"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+						perf_evsel__name(counter),
+						cpu,
+						count->val, count->ena, count->run);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void read_counters(void)
+{
+	struct perf_evsel *counter;
+	int ret;
+
+	evlist__for_each_entry(evsel_list, counter) {
+		ret = read_counter(counter);
+		if (ret)
+			pr_debug("failed to read counter %s\n", counter->name);
+
+		if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+			pr_warning("failed to process counter %s\n", counter->name);
+	}
+}
+
+static void process_interval(void)
+{
+	struct timespec ts, rs;
+
+	read_counters();
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	diff_timespec(&rs, &ts, &ref_time);
+
+	if (STAT_RECORD) {
+		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
+			pr_err("failed to write stat round event\n");
+	}
+
+	init_stats(&walltime_nsecs_stats);
+	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
+	print_counters(&rs, 0, NULL);
+}
+
+static void enable_counters(void)
+{
+	if (initial_delay)
+		usleep(initial_delay * USEC_PER_MSEC);
+
+	/*
+	 * We need to enable counters only if:
+	 * - we don't have tracee (attaching to task or cpu)
+	 * - we have initial delay configured
+	 */
+	if (!target__none(&target) || initial_delay)
+		perf_evlist__enable(evsel_list);
+}
+
+static void disable_counters(void)
+{
+	/*
+	 * If we don't have tracee (attaching to task or cpu), counters may
+	 * still be running. To get accurate group ratios, we must stop groups
+	 * from counting before reading their constituent counters.
+	 */
+	if (!target__none(&target))
+		perf_evlist__disable(evsel_list);
+}
+
+static volatile int workload_exec_errno;
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1
+ * if the fork fails, since we asked by setting its
+ * want_signal to true.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
+					void *ucontext __maybe_unused)
+{
+	workload_exec_errno = info->si_value.sival_int;
+}
+
+static int perf_stat_synthesize_config(bool is_pipe)
+{
+	int err;
+
+	if (is_pipe) {
+		err = perf_event__synthesize_attrs(NULL, perf_stat.session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			return err;
+		}
+	}
+
+	err = perf_event__synthesize_extra_attr(NULL,
+						evsel_list,
+						process_synthesized_event,
+						is_pipe);
+
+	err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
+						process_synthesized_event,
+						NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
+					     process_synthesized_event, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_stat_config(NULL, &stat_config,
+						 process_synthesized_event, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize config.\n");
+		return err;
+	}
+
+	return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+static int __store_counter_ids(struct perf_evsel *counter)
+{
+	int cpu, thread;
+
+	for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
+		for (thread = 0; thread < xyarray__max_y(counter->fd);
+		     thread++) {
+			int fd = FD(counter, cpu, thread);
+
+			if (perf_evlist__id_add_fd(evsel_list, counter,
+						   cpu, thread, fd) < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int store_counter_ids(struct perf_evsel *counter)
+{
+	struct cpu_map *cpus = counter->cpus;
+	struct thread_map *threads = counter->threads;
+
+	if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
+		return -ENOMEM;
+
+	return __store_counter_ids(counter);
+}
+
+static bool perf_evsel__should_store_id(struct perf_evsel *counter)
+{
+	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
+}
+
+static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
+{
+	struct perf_evsel *c2, *leader;
+	bool is_open = true;
+
+	leader = evsel->leader;
+	pr_debug("Weak group for %s/%d failed\n",
+			leader->name, leader->nr_members);
+
+	/*
+	 * for_each_group_member doesn't work here because it doesn't
+	 * include the first entry.
+	 */
+	evlist__for_each_entry(evsel_list, c2) {
+		if (c2 == evsel)
+			is_open = false;
+		if (c2->leader == leader) {
+			if (is_open)
+				perf_evsel__close(c2);
+			c2->leader = c2;
+			c2->nr_members = 0;
+		}
+	}
+	return leader;
+}
+
+static int __run_perf_stat(int argc, const char **argv)
+{
+	int interval = stat_config.interval;
+	int times = stat_config.times;
+	int timeout = stat_config.timeout;
+	char msg[BUFSIZ];
+	unsigned long long t0, t1;
+	struct perf_evsel *counter;
+	struct timespec ts;
+	size_t l;
+	int status = 0;
+	const bool forks = (argc > 0);
+	bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
+	struct perf_evsel_config_term *err_term;
+
+	if (interval) {
+		ts.tv_sec  = interval / USEC_PER_MSEC;
+		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
+	} else if (timeout) {
+		ts.tv_sec  = timeout / USEC_PER_MSEC;
+		ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
+	} else {
+		ts.tv_sec  = 1;
+		ts.tv_nsec = 0;
+	}
+
+	if (forks) {
+		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
+						  workload_exec_failed_signal) < 0) {
+			perror("failed to prepare workload");
+			return -1;
+		}
+		child_pid = evsel_list->workload.pid;
+	}
+
+	if (group)
+		perf_evlist__set_leader(evsel_list);
+
+	evlist__for_each_entry(evsel_list, counter) {
+try_again:
+		if (create_perf_stat_counter(counter) < 0) {
+
+			/* Weak group failed. Reset the group. */
+			if ((errno == EINVAL || errno == EBADF) &&
+			    counter->leader != counter &&
+			    counter->weak_group) {
+				counter = perf_evsel__reset_weak_group(counter);
+				goto try_again;
+			}
+
+			/*
+			 * PPC returns ENXIO for HW counters until 2.6.37
+			 * (behavior changed with commit b0a873e).
+			 */
+			if (errno == EINVAL || errno == ENOSYS ||
+			    errno == ENOENT || errno == EOPNOTSUPP ||
+			    errno == ENXIO) {
+				if (verbose > 0)
+					ui__warning("%s event is not supported by the kernel.\n",
+						    perf_evsel__name(counter));
+				counter->supported = false;
+
+				if ((counter->leader != counter) ||
+				    !(counter->leader->nr_members > 1))
+					continue;
+			} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+                                if (verbose > 0)
+                                        ui__warning("%s\n", msg);
+                                goto try_again;
+			} else if (target__has_per_thread(&target) &&
+				   evsel_list->threads &&
+				   evsel_list->threads->err_thread != -1) {
+				/*
+				 * For global --per-thread case, skip current
+				 * error thread.
+				 */
+				if (!thread_map__remove(evsel_list->threads,
+							evsel_list->threads->err_thread)) {
+					evsel_list->threads->err_thread = -1;
+					goto try_again;
+				}
+			}
+
+			perf_evsel__open_strerror(counter, &target,
+						  errno, msg, sizeof(msg));
+			ui__error("%s\n", msg);
+
+			if (child_pid != -1)
+				kill(child_pid, SIGTERM);
+
+			return -1;
+		}
+		counter->supported = true;
+
+		l = strlen(counter->unit);
+		if (l > unit_width)
+			unit_width = l;
+
+		if (perf_evsel__should_store_id(counter) &&
+		    store_counter_ids(counter))
+			return -1;
+	}
+
+	if (perf_evlist__apply_filters(evsel_list, &counter)) {
+		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
+			counter->filter, perf_evsel__name(counter), errno,
+			str_error_r(errno, msg, sizeof(msg)));
+		return -1;
+	}
+
+	if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
+		      err_term->val.drv_cfg, perf_evsel__name(counter), errno,
+		      str_error_r(errno, msg, sizeof(msg)));
+		return -1;
+	}
+
+	if (STAT_RECORD) {
+		int err, fd = perf_data__fd(&perf_stat.data);
+
+		if (is_pipe) {
+			err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
+		} else {
+			err = perf_session__write_header(perf_stat.session, evsel_list,
+							 fd, false);
+		}
+
+		if (err < 0)
+			return err;
+
+		err = perf_stat_synthesize_config(is_pipe);
+		if (err < 0)
+			return err;
+	}
+
+	/*
+	 * Enable counters and exec the command:
+	 */
+	t0 = rdclock();
+	clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
+	if (forks) {
+		perf_evlist__start_workload(evsel_list);
+		enable_counters();
+
+		if (interval || timeout) {
+			while (!waitpid(child_pid, &status, WNOHANG)) {
+				nanosleep(&ts, NULL);
+				if (timeout)
+					break;
+				process_interval();
+				if (interval_count && !(--times))
+					break;
+			}
+		}
+		waitpid(child_pid, &status, 0);
+
+		if (workload_exec_errno) {
+			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
+			pr_err("Workload failed: %s\n", emsg);
+			return -1;
+		}
+
+		if (WIFSIGNALED(status))
+			psignal(WTERMSIG(status), argv[0]);
+	} else {
+		enable_counters();
+		while (!done) {
+			nanosleep(&ts, NULL);
+			if (timeout)
+				break;
+			if (interval) {
+				process_interval();
+				if (interval_count && !(--times))
+					break;
+			}
+		}
+	}
+
+	disable_counters();
+
+	t1 = rdclock();
+
+	update_stats(&walltime_nsecs_stats, t1 - t0);
+
+	/*
+	 * Closing a group leader splits the group, and as we only disable
+	 * group leaders, results in remaining events becoming enabled. To
+	 * avoid arbitrary skew, we must read all counters before closing any
+	 * group leaders.
+	 */
+	read_counters();
+	perf_evlist__close(evsel_list);
+
+	return WEXITSTATUS(status);
+}
+
+static int run_perf_stat(int argc, const char **argv)
+{
+	int ret;
+
+	if (pre_cmd) {
+		ret = system(pre_cmd);
+		if (ret)
+			return ret;
+	}
+
+	if (sync_run)
+		sync();
+
+	ret = __run_perf_stat(argc, argv);
+	if (ret)
+		return ret;
+
+	if (post_cmd) {
+		ret = system(post_cmd);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static void print_running(u64 run, u64 ena)
+{
+	if (csv_output) {
+		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
+					csv_sep,
+					run,
+					csv_sep,
+					ena ? 100.0 * run / ena : 100.0);
+	} else if (run != ena) {
+		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
+	}
+}
+
+static void print_noise_pct(double total, double avg)
+{
+	double pct = rel_stddev_stats(total, avg);
+
+	if (csv_output)
+		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
+	else if (pct)
+		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
+}
+
+static void print_noise(struct perf_evsel *evsel, double avg)
+{
+	struct perf_stat_evsel *ps;
+
+	if (run_count == 1)
+		return;
+
+	ps = evsel->stats;
+	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
+}
+
+static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
+{
+	switch (stat_config.aggr_mode) {
+	case AGGR_CORE:
+		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
+			cpu_map__id_to_socket(id),
+			csv_output ? 0 : -8,
+			cpu_map__id_to_cpu(id),
+			csv_sep,
+			csv_output ? 0 : 4,
+			nr,
+			csv_sep);
+		break;
+	case AGGR_SOCKET:
+		fprintf(stat_config.output, "S%*d%s%*d%s",
+			csv_output ? 0 : -5,
+			id,
+			csv_sep,
+			csv_output ? 0 : 4,
+			nr,
+			csv_sep);
+			break;
+	case AGGR_NONE:
+		fprintf(stat_config.output, "CPU%*d%s",
+			csv_output ? 0 : -4,
+			perf_evsel__cpus(evsel)->map[id], csv_sep);
+		break;
+	case AGGR_THREAD:
+		fprintf(stat_config.output, "%*s-%*d%s",
+			csv_output ? 0 : 16,
+			thread_map__comm(evsel->threads, id),
+			csv_output ? 0 : -8,
+			thread_map__pid(evsel->threads, id),
+			csv_sep);
+		break;
+	case AGGR_GLOBAL:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+}
+
+struct outstate {
+	FILE *fh;
+	bool newline;
+	const char *prefix;
+	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(void *ctx)
+{
+	struct outstate *os = ctx;
+
+	os->newline = true;
+}
+
+static void do_new_line_std(struct outstate *os)
+{
+	fputc('\n', os->fh);
+	fputs(os->prefix, os->fh);
+	aggr_printout(os->evsel, os->id, os->nr);
+	if (stat_config.aggr_mode == AGGR_NONE)
+		fprintf(os->fh, "        ");
+	fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(void *ctx, const char *color, const char *fmt,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	bool newline = os->newline;
+
+	os->newline = false;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%-*s", METRIC_LEN, "");
+		return;
+	}
+
+	if (newline)
+		do_new_line_std(os);
+
+	n = fprintf(out, " # ");
+	if (color)
+		n += color_fprintf(out, color, fmt, val);
+	else
+		n += fprintf(out, fmt, val);
+	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	aggr_printout(os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s", csv_sep, csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	ends = vals = ltrim(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	char buf[1024];
+	unsigned mlen = METRIC_ONLY_LEN;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (color)
+		n = color_fprintf(out, color, fmt, val);
+	else
+		n = fprintf(out, fmt, val);
+	if (n > METRIC_ONLY_LEN)
+		n = METRIC_ONLY_LEN;
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+	fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	ends = vals = ltrim(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (csv_output)
+		fprintf(os->fh, "%s%s", unit, csv_sep);
+	else
+		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
+static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
+{
+	FILE *output = stat_config.output;
+	double msecs = avg / NSEC_PER_MSEC;
+	const char *fmt_v, *fmt_n;
+	char name[25];
+
+	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
+	fmt_n = csv_output ? "%s" : "%-25s";
+
+	aggr_printout(evsel, id, nr);
+
+	scnprintf(name, sizeof(name), "%s%s",
+		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
+
+	fprintf(output, fmt_v, msecs, csv_sep);
+
+	if (csv_output)
+		fprintf(output, "%s%s", evsel->unit, csv_sep);
+	else
+		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
+
+	fprintf(output, fmt_n, name);
+
+	if (evsel->cgrp)
+		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
+}
+
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+	int i;
+
+	if (!aggr_get_id)
+		return 0;
+
+	if (stat_config.aggr_mode == AGGR_NONE)
+		return id;
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
+static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
+{
+	FILE *output = stat_config.output;
+	double sc =  evsel->scale;
+	const char *fmt;
+
+	if (csv_output) {
+		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
+	} else {
+		if (big_num)
+			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
+		else
+			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
+	}
+
+	aggr_printout(evsel, id, nr);
+
+	fprintf(output, fmt, avg, csv_sep);
+
+	if (evsel->unit)
+		fprintf(output, "%-*s%s",
+			csv_output ? 0 : unit_width,
+			evsel->unit, csv_sep);
+
+	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
+
+	if (evsel->cgrp)
+		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
+}
+
+static bool is_mixed_hw_group(struct perf_evsel *counter)
+{
+	struct perf_evlist *evlist = counter->evlist;
+	u32 pmu_type = counter->attr.type;
+	struct perf_evsel *pos;
+
+	if (counter->nr_members < 2)
+		return false;
+
+	evlist__for_each_entry(evlist, pos) {
+		/* software events can be part of any hardware group */
+		if (pos->attr.type == PERF_TYPE_SOFTWARE)
+			continue;
+		if (pmu_type == PERF_TYPE_SOFTWARE) {
+			pmu_type = pos->attr.type;
+			continue;
+		}
+		if (pmu_type != pos->attr.type)
+			return true;
+	}
+
+	return false;
+}
+
+static void printout(int id, int nr, struct perf_evsel *counter, double uval,
+		     char *prefix, u64 run, u64 ena, double noise,
+		     struct runtime_stat *st)
+{
+	struct perf_stat_output_ctx out;
+	struct outstate os = {
+		.fh = stat_config.output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
+	};
+	print_metric_t pm = print_metric_std;
+	void (*nl)(void *);
+
+	if (metric_only) {
+		nl = new_line_metric;
+		if (csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
+
+	if (csv_output && !metric_only) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[stat_config.aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
+	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (metric_only) {
+			pm(&os, NULL, "", "", 0);
+			return;
+		}
+		aggr_printout(counter, id, nr);
+
+		fprintf(stat_config.output, "%*s%s",
+			csv_output ? 0 : 18,
+			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+			csv_sep);
+
+		if (counter->supported) {
+			print_free_counters_hint = 1;
+			if (is_mixed_hw_group(counter))
+				print_mixed_hw_group_error = 1;
+		}
+
+		fprintf(stat_config.output, "%-*s%s",
+			csv_output ? 0 : unit_width,
+			counter->unit, csv_sep);
+
+		fprintf(stat_config.output, "%*s",
+			csv_output ? 0 : -25,
+			perf_evsel__name(counter));
+
+		if (counter->cgrp)
+			fprintf(stat_config.output, "%s%s",
+				csv_sep, counter->cgrp->name);
+
+		if (!csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		print_noise(counter, noise);
+		print_running(run, ena);
+		if (csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		return;
+	}
+
+	if (metric_only)
+		/* nothing */;
+	else if (nsec_counter(counter))
+		nsec_printout(id, nr, counter, uval);
+	else
+		abs_printout(id, nr, counter, uval);
+
+	out.print_metric = pm;
+	out.new_line = nl;
+	out.ctx = &os;
+	out.force_header = false;
+
+	if (csv_output && !metric_only) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+
+	perf_stat__print_shadow_stats(counter, uval,
+				first_shadow_cpu(counter, id),
+				&out, &metric_events, st);
+	if (!csv_output && !metric_only) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+}
+
+static void aggr_update_shadow(void)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
+		evlist__for_each_entry(evsel_list, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			perf_stat__update_shadow_stats(counter, val,
+					first_shadow_cpu(counter, id),
+					&rt_stat);
+		}
+	}
+}
+
+static void uniquify_event_name(struct perf_evsel *counter)
+{
+	char *new_name;
+	char *config;
+
+	if (counter->uniquified_name ||
+	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+					   strlen(counter->pmu_name)))
+		return;
+
+	config = strchr(counter->name, '/');
+	if (config) {
+		if (asprintf(&new_name,
+			     "%s%s", counter->pmu_name, config) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	} else {
+		if (asprintf(&new_name,
+			     "%s [%s]", counter->name, counter->pmu_name) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	}
+
+	counter->uniquified_name = true;
+}
+
+static void collect_all_aliases(struct perf_evsel *counter,
+			    void (*cb)(struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	struct perf_evsel *alias;
+
+	alias = list_prepare_entry(counter, &(evsel_list->entries), node);
+	list_for_each_entry_continue (alias, &evsel_list->entries, node) {
+		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+		    alias->scale != counter->scale ||
+		    alias->cgrp != counter->cgrp ||
+		    strcmp(alias->unit, counter->unit) ||
+		    nsec_counter(alias) != nsec_counter(counter))
+			break;
+		alias->merged_stat = true;
+		cb(alias, data, false);
+	}
+}
+
+static bool collect_data(struct perf_evsel *counter,
+			    void (*cb)(struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	if (counter->merged_stat)
+		return false;
+	cb(counter, data, true);
+	if (no_merge)
+		uniquify_event_name(counter);
+	else if (counter->auto_merge_stats)
+		collect_all_aliases(counter, cb, data);
+	return true;
+}
+
+struct aggr_data {
+	u64 ena, run, val;
+	int id;
+	int nr;
+	int cpu;
+};
+
+static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
+{
+	struct aggr_data *ad = data;
+	int cpu, s2;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		struct perf_counts_values *counts;
+
+		s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
+		if (s2 != ad->id)
+			continue;
+		if (first)
+			ad->nr++;
+		counts = perf_counts(counter->counts, cpu, 0);
+		/*
+		 * When any result is bad, make them all to give
+		 * consistent output in interval mode.
+		 */
+		if (counts->ena == 0 || counts->run == 0 ||
+		    counter->counts->scaled == -1) {
+			ad->ena = 0;
+			ad->run = 0;
+			break;
+		}
+		ad->val += counts->val;
+		ad->ena += counts->ena;
+		ad->run += counts->run;
+	}
+}
+
+static void print_aggr(char *prefix)
+{
+	FILE *output = stat_config.output;
+	struct perf_evsel *counter;
+	int s, id, nr;
+	double uval;
+	u64 ena, run, val;
+	bool first;
+
+	if (!(aggr_map || aggr_get_id))
+		return;
+
+	aggr_update_shadow();
+
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
+	for (s = 0; s < aggr_map->nr; s++) {
+		struct aggr_data ad;
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
+		ad.id = id = aggr_map->map[s];
+		first = true;
+		evlist__for_each_entry(evsel_list, counter) {
+			if (is_duration_time(counter))
+				continue;
+
+			ad.val = ad.ena = ad.run = 0;
+			ad.nr = 0;
+			if (!collect_data(counter, aggr_cb, &ad))
+				continue;
+			nr = ad.nr;
+			ena = ad.ena;
+			run = ad.run;
+			val = ad.val;
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(counter, id, nr);
+			}
+			if (prefix && !metric_only)
+				fprintf(output, "%s", prefix);
+
+			uval = val * counter->scale;
+			printout(id, nr, counter, uval, prefix, run, ena, 1.0,
+				 &rt_stat);
+			if (!metric_only)
+				fputc('\n', output);
+		}
+		if (metric_only)
+			fputc('\n', output);
+	}
+}
+
+static int cmp_val(const void *a, const void *b)
+{
+	return ((struct perf_aggr_thread_value *)b)->val -
+		((struct perf_aggr_thread_value *)a)->val;
+}
+
+static struct perf_aggr_thread_value *sort_aggr_thread(
+					struct perf_evsel *counter,
+					int nthreads, int ncpus,
+					int *ret)
+{
+	int cpu, thread, i = 0;
+	double uval;
+	struct perf_aggr_thread_value *buf;
+
+	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
+	if (!buf)
+		return NULL;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		u64 ena = 0, run = 0, val = 0;
+
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			val += perf_counts(counter->counts, cpu, thread)->val;
+			ena += perf_counts(counter->counts, cpu, thread)->ena;
+			run += perf_counts(counter->counts, cpu, thread)->run;
+		}
+
+		uval = val * counter->scale;
+
+		/*
+		 * Skip value 0 when enabling --per-thread globally,
+		 * otherwise too many 0 output.
+		 */
+		if (uval == 0.0 && target__has_per_thread(&target))
+			continue;
+
+		buf[i].counter = counter;
+		buf[i].id = thread;
+		buf[i].uval = uval;
+		buf[i].val = val;
+		buf[i].run = run;
+		buf[i].ena = ena;
+		i++;
+	}
+
+	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
+
+	if (ret)
+		*ret = i;
+
+	return buf;
+}
+
+static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = stat_config.output;
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = cpu_map__nr(counter->cpus);
+	int thread, sorted_threads, id;
+	struct perf_aggr_thread_value *buf;
+
+	buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
+	if (!buf) {
+		perror("cannot sort aggr thread");
+		return;
+	}
+
+	for (thread = 0; thread < sorted_threads; thread++) {
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		id = buf[thread].id;
+		if (stat_config.stats)
+			printout(id, 0, buf[thread].counter, buf[thread].uval,
+				 prefix, buf[thread].run, buf[thread].ena, 1.0,
+				 &stat_config.stats[id]);
+		else
+			printout(id, 0, buf[thread].counter, buf[thread].uval,
+				 prefix, buf[thread].run, buf[thread].ena, 1.0,
+				 &rt_stat);
+		fputc('\n', output);
+	}
+
+	free(buf);
+}
+
+struct caggr_data {
+	double avg, avg_enabled, avg_running;
+};
+
+static void counter_aggr_cb(struct perf_evsel *counter, void *data,
+			    bool first __maybe_unused)
+{
+	struct caggr_data *cd = data;
+	struct perf_stat_evsel *ps = counter->stats;
+
+	cd->avg += avg_stats(&ps->res_stats[0]);
+	cd->avg_enabled += avg_stats(&ps->res_stats[1]);
+	cd->avg_running += avg_stats(&ps->res_stats[2]);
+}
+
+/*
+ * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
+ */
+static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = stat_config.output;
+	double uval;
+	struct caggr_data cd = { .avg = 0.0 };
+
+	if (!collect_data(counter, counter_aggr_cb, &cd))
+		return;
+
+	if (prefix && !metric_only)
+		fprintf(output, "%s", prefix);
+
+	uval = cd.avg * counter->scale;
+	printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
+		 cd.avg, &rt_stat);
+	if (!metric_only)
+		fprintf(output, "\n");
+}
+
+static void counter_cb(struct perf_evsel *counter, void *data,
+		       bool first __maybe_unused)
+{
+	struct aggr_data *ad = data;
+
+	ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
+	ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
+	ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+}
+
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = stat_config.output;
+	u64 ena, run, val;
+	double uval;
+	int cpu;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		struct aggr_data ad = { .cpu = cpu };
+
+		if (!collect_data(counter, counter_cb, &ad))
+			return;
+		val = ad.val;
+		ena = ad.ena;
+		run = ad.run;
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		uval = val * counter->scale;
+		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+			 &rt_stat);
+
+		fputc('\n', output);
+	}
+}
+
+static void print_no_aggr_metric(char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evsel_list->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, stat_config.output);
+		evlist__for_each_entry(evsel_list, counter) {
+			if (is_duration_time(counter))
+				continue;
+			if (first) {
+				aggr_printout(counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+				 &rt_stat);
+		}
+		fputc('\n', stat_config.output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+	[AGGR_CORE] 	= 	"core,cpus,",
+	[AGGR_SOCKET] 	= 	"socket,cpus",
+	[AGGR_NONE] 	= 	"cpu,",
+	[AGGR_THREAD] 	= 	"comm-pid,",
+	[AGGR_GLOBAL] 	=	""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = stat_config.output
+	};
+
+	if (prefix)
+		fprintf(stat_config.output, "%s", prefix);
+
+	if (!csv_output && !no_indent)
+		fprintf(stat_config.output, "%*s",
+			aggr_header_lens[stat_config.aggr_mode], "");
+	if (csv_output) {
+		if (stat_config.interval)
+			fputs("time,", stat_config.output);
+		fputs(aggr_header_csv[stat_config.aggr_mode],
+			stat_config.output);
+	}
+
+	/* Print metrics headers only */
+	evlist__for_each_entry(evsel_list, counter) {
+		if (is_duration_time(counter))
+			continue;
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		out.force_header = true;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(counter, 0,
+					      0,
+					      &out,
+					      &metric_events,
+					      &rt_stat);
+	}
+	fputc('\n', stat_config.output);
+}
+
+static void print_interval(char *prefix, struct timespec *ts)
+{
+	FILE *output = stat_config.output;
+	static int num_print_interval;
+
+	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
+
+	if (num_print_interval == 0 && !csv_output) {
+		switch (stat_config.aggr_mode) {
+		case AGGR_SOCKET:
+			fprintf(output, "#           time socket cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_CORE:
+			fprintf(output, "#           time core         cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_NONE:
+			fprintf(output, "#           time CPU");
+			if (!metric_only)
+				fprintf(output, "                counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_THREAD:
+			fprintf(output, "#           time             comm-pid");
+			if (!metric_only)
+				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_GLOBAL:
+		default:
+			fprintf(output, "#           time");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+		case AGGR_UNSET:
+			break;
+		}
+	}
+
+	if (num_print_interval == 0 && metric_only)
+		print_metric_headers(" ", true);
+	if (++num_print_interval == 25)
+		num_print_interval = 0;
+}
+
+static void print_header(int argc, const char **argv)
+{
+	FILE *output = stat_config.output;
+	int i;
+
+	fflush(stdout);
+
+	if (!csv_output) {
+		fprintf(output, "\n");
+		fprintf(output, " Performance counter stats for ");
+		if (target.system_wide)
+			fprintf(output, "\'system wide");
+		else if (target.cpu_list)
+			fprintf(output, "\'CPU(s) %s", target.cpu_list);
+		else if (!target__has_task(&target)) {
+			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+			for (i = 1; argv && (i < argc); i++)
+				fprintf(output, " %s", argv[i]);
+		} else if (target.pid)
+			fprintf(output, "process id \'%s", target.pid);
+		else
+			fprintf(output, "thread id \'%s", target.tid);
+
+		fprintf(output, "\'");
+		if (run_count > 1)
+			fprintf(output, " (%d runs)", run_count);
+		fprintf(output, ":\n\n");
+	}
+}
+
+static void print_footer(void)
+{
+	FILE *output = stat_config.output;
+	int n;
+
+	if (!null_run)
+		fprintf(output, "\n");
+	fprintf(output, " %17.9f seconds time elapsed",
+			avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
+	if (run_count > 1) {
+		fprintf(output, "                                        ");
+		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
+				avg_stats(&walltime_nsecs_stats));
+	}
+	fprintf(output, "\n\n");
+
+	if (print_free_counters_hint &&
+	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+	    n > 0)
+		fprintf(output,
+"Some events weren't counted. Try disabling the NMI watchdog:\n"
+"	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+"	perf stat ...\n"
+"	echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+	if (print_mixed_hw_group_error)
+		fprintf(output,
+			"The events in group usually have to be from "
+			"the same PMU. Try reorganizing the group.\n");
+}
+
+static void print_counters(struct timespec *ts, int argc, const char **argv)
+{
+	int interval = stat_config.interval;
+	struct perf_evsel *counter;
+	char buf[64], *prefix = NULL;
+
+	/* Do not print anything if we record to the pipe. */
+	if (STAT_RECORD && perf_stat.data.is_pipe)
+		return;
+
+	if (interval)
+		print_interval(prefix = buf, ts);
+	else
+		print_header(argc, argv);
+
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0 && !interval)
+			print_metric_headers(prefix, false);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(stat_config.output, "%s", prefix);
+	}
+
+	switch (stat_config.aggr_mode) {
+	case AGGR_CORE:
+	case AGGR_SOCKET:
+		print_aggr(prefix);
+		break;
+	case AGGR_THREAD:
+		evlist__for_each_entry(evsel_list, counter) {
+			if (is_duration_time(counter))
+				continue;
+			print_aggr_thread(counter, prefix);
+		}
+		break;
+	case AGGR_GLOBAL:
+		evlist__for_each_entry(evsel_list, counter) {
+			if (is_duration_time(counter))
+				continue;
+			print_counter_aggr(counter, prefix);
+		}
+		if (metric_only)
+			fputc('\n', stat_config.output);
+		break;
+	case AGGR_NONE:
+		if (metric_only)
+			print_no_aggr_metric(prefix);
+		else {
+			evlist__for_each_entry(evsel_list, counter) {
+				if (is_duration_time(counter))
+					continue;
+				print_counter(counter, prefix);
+			}
+		}
+		break;
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	if (!interval && !csv_output)
+		print_footer();
+
+	fflush(stat_config.output);
+}
+
+static volatile int signr = -1;
+
+static void skip_signal(int signo)
+{
+	if ((child_pid == -1) || stat_config.interval)
+		done = 1;
+
+	signr = signo;
+	/*
+	 * render child_pid harmless
+	 * won't send SIGTERM to a random
+	 * process in case of race condition
+	 * and fast PID recycling
+	 */
+	child_pid = -1;
+}
+
+static void sig_atexit(void)
+{
+	sigset_t set, oset;
+
+	/*
+	 * avoid race condition with SIGCHLD handler
+	 * in skip_signal() which is modifying child_pid
+	 * goal is to avoid send SIGTERM to a random
+	 * process
+	 */
+	sigemptyset(&set);
+	sigaddset(&set, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &set, &oset);
+
+	if (child_pid != -1)
+		kill(child_pid, SIGTERM);
+
+	sigprocmask(SIG_SETMASK, &oset, NULL);
+
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	kill(getpid(), signr);
+}
+
+static int stat__set_big_num(const struct option *opt __maybe_unused,
+			     const char *s __maybe_unused, int unset)
+{
+	big_num_opt = unset ? 0 : 1;
+	return 0;
+}
+
+static int enable_metric_only(const struct option *opt __maybe_unused,
+			      const char *s __maybe_unused, int unset)
+{
+	force_metric_only = true;
+	metric_only = !unset;
+	return 0;
+}
+
+static int parse_metric_groups(const struct option *opt,
+			       const char *str,
+			       int unset __maybe_unused)
+{
+	return metricgroup__parse_groups(opt, str, &metric_events);
+}
+
+static const struct option stat_options[] = {
+	OPT_BOOLEAN('T', "transaction", &transaction_run,
+		    "hardware transaction statistics"),
+	OPT_CALLBACK('e', "event", &evsel_list, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events_option),
+	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
+		     "event filter", parse_filter),
+	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
+		    "child tasks do not inherit counters"),
+	OPT_STRING('p', "pid", &target.pid, "pid",
+		   "stat events on existing process id"),
+	OPT_STRING('t', "tid", &target.tid, "tid",
+		   "stat events on existing thread id"),
+	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
+		    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('g', "group", &group,
+		    "put the counters into a counter group"),
+	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_INTEGER('r', "repeat", &run_count,
+		    "repeat command and print average + stddev (max: 100, forever: 0)"),
+	OPT_BOOLEAN('n', "null", &null_run,
+		    "null run - dont start any counters"),
+	OPT_INCR('d', "detailed", &detailed_run,
+		    "detailed run - start a lot of events"),
+	OPT_BOOLEAN('S', "sync", &sync_run,
+		    "call sync() before starting a run"),
+	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
+			   "print large numbers with thousands\' separators",
+			   stat__set_big_num),
+	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
+		    "list of cpus to monitor in system-wide"),
+	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
+		    "disable CPU count aggregation", AGGR_NONE),
+	OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
+	OPT_STRING('x', "field-separator", &csv_sep, "separator",
+		   "print counts with custom separator"),
+	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
+		     "monitor event in cgroup name only", parse_cgroups),
+	OPT_STRING('o', "output", &output_name, "file", "output file name"),
+	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
+	OPT_INTEGER(0, "log-fd", &output_fd,
+		    "log output to fd, instead of stderr"),
+	OPT_STRING(0, "pre", &pre_cmd, "command",
+			"command to run prior to the measured command"),
+	OPT_STRING(0, "post", &post_cmd, "command",
+			"command to run after to the measured command"),
+	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
+		    "print counts at regular interval in ms "
+		    "(overhead is possible for values <= 100ms)"),
+	OPT_INTEGER(0, "interval-count", &stat_config.times,
+		    "print counts for fixed number of times"),
+	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
+		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
+	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
+		     "aggregate counts per processor socket", AGGR_SOCKET),
+	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
+		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
+		     "aggregate counts per thread", AGGR_THREAD),
+	OPT_UINTEGER('D', "delay", &initial_delay,
+		     "ms to wait before starting measurement after program start"),
+	OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+			"Only print computed metrics. No raw values", enable_metric_only),
+	OPT_BOOLEAN(0, "topdown", &topdown_run,
+			"measure topdown level 1 statistics"),
+	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+			"measure SMI cost"),
+	OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
+		     "monitor specified metrics or metric groups (separated by ,)",
+		     parse_metric_groups),
+	OPT_END()
+};
+
+static int perf_stat__get_socket(struct cpu_map *map, int cpu)
+{
+	return cpu_map__get_socket(map, cpu, NULL);
+}
+
+static int perf_stat__get_core(struct cpu_map *map, int cpu)
+{
+	return cpu_map__get_core(map, cpu, NULL);
+}
+
+static int cpu_map__get_max(struct cpu_map *map)
+{
+	int i, max = -1;
+
+	for (i = 0; i < map->nr; i++) {
+		if (map->map[i] > max)
+			max = map->map[i];
+	}
+
+	return max;
+}
+
+static struct cpu_map *cpus_aggr_map;
+
+static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
+{
+	int cpu;
+
+	if (idx >= map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	if (cpus_aggr_map->map[cpu] == -1)
+		cpus_aggr_map->map[cpu] = get_id(map, idx);
+
+	return cpus_aggr_map->map[cpu];
+}
+
+static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
+{
+	return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
+}
+
+static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
+{
+	return perf_stat__get_aggr(perf_stat__get_core, map, idx);
+}
+
+static int perf_stat_init_aggr_mode(void)
+{
+	int nr;
+
+	switch (stat_config.aggr_mode) {
+	case AGGR_SOCKET:
+		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
+			perror("cannot build socket map");
+			return -1;
+		}
+		aggr_get_id = perf_stat__get_socket_cached;
+		break;
+	case AGGR_CORE:
+		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
+			perror("cannot build core map");
+			return -1;
+		}
+		aggr_get_id = perf_stat__get_core_cached;
+		break;
+	case AGGR_NONE:
+	case AGGR_GLOBAL:
+	case AGGR_THREAD:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	/*
+	 * The evsel_list->cpus is the base we operate on,
+	 * taking the highest cpu number to be the size of
+	 * the aggregation translate cpumap.
+	 */
+	nr = cpu_map__get_max(evsel_list->cpus);
+	cpus_aggr_map = cpu_map__empty_new(nr + 1);
+	return cpus_aggr_map ? 0 : -ENOMEM;
+}
+
+static void perf_stat__exit_aggr_mode(void)
+{
+	cpu_map__put(aggr_map);
+	cpu_map__put(cpus_aggr_map);
+	aggr_map = NULL;
+	cpus_aggr_map = NULL;
+}
+
+static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
+{
+	int cpu;
+
+	if (idx > map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	if (cpu >= env->nr_cpus_avail)
+		return -1;
+
+	return cpu;
+}
+
+static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
+{
+	struct perf_env *env = data;
+	int cpu = perf_env__get_cpu(env, map, idx);
+
+	return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
+}
+
+static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
+{
+	struct perf_env *env = data;
+	int core = -1, cpu = perf_env__get_cpu(env, map, idx);
+
+	if (cpu != -1) {
+		int socket_id = env->cpu[cpu].socket_id;
+
+		/*
+		 * Encode socket in upper 16 bits
+		 * core_id is relative to socket, and
+		 * we need a global id. So we combine
+		 * socket + core id.
+		 */
+		core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
+	}
+
+	return core;
+}
+
+static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
+				      struct cpu_map **sockp)
+{
+	return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
+}
+
+static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
+				    struct cpu_map **corep)
+{
+	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
+}
+
+static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+{
+	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+{
+	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+	struct perf_env *env = &st->session->header.env;
+
+	switch (stat_config.aggr_mode) {
+	case AGGR_SOCKET:
+		if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+			perror("cannot build socket map");
+			return -1;
+		}
+		aggr_get_id = perf_stat__get_socket_file;
+		break;
+	case AGGR_CORE:
+		if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+			perror("cannot build core map");
+			return -1;
+		}
+		aggr_get_id = perf_stat__get_core_file;
+		break;
+	case AGGR_NONE:
+	case AGGR_GLOBAL:
+	case AGGR_THREAD:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+	int off = 0;
+	int i;
+	int len = 0;
+	char *s;
+
+	for (i = 0; attr[i]; i++) {
+		if (pmu_have_event("cpu", attr[i])) {
+			len += strlen(attr[i]) + 1;
+			attr[i - off] = attr[i];
+		} else
+			off++;
+	}
+	attr[i - off] = NULL;
+
+	*str = malloc(len + 1 + 2);
+	if (!*str)
+		return -1;
+	s = *str;
+	if (i - off == 0) {
+		*s = 0;
+		return 0;
+	}
+	if (use_group)
+		*s++ = '{';
+	for (i = 0; attr[i]; i++) {
+		strcpy(s, attr[i]);
+		s += strlen(s);
+		*s++ = ',';
+	}
+	if (use_group) {
+		s[-1] = '}';
+		*s = 0;
+	} else
+		s[-1] = 0;
+	return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+	*warn = false;
+	return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
+/*
+ * Add default attributes, if there were no attributes specified or
+ * if -d/--detailed, -d -d or -d -d -d is used:
+ */
+static int add_default_attributes(void)
+{
+	int err;
+	struct perf_event_attr default_attrs0[] = {
+
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
+
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+};
+	struct perf_event_attr frontend_attrs[] = {
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+};
+	struct perf_event_attr backend_attrs[] = {
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
+};
+	struct perf_event_attr default_attrs1[] = {
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
+
+};
+
+/*
+ * Detailed stats (-d), covering the L1 and last level data caches:
+ */
+	struct perf_event_attr detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_LL			<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_LL			<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+};
+
+/*
+ * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
+ */
+	struct perf_event_attr very_detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+};
+
+/*
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
+ */
+	struct perf_event_attr very_very_detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+};
+
+	/* Set attrs if no event is selected and !null_run: */
+	if (null_run)
+		return 0;
+
+	if (transaction_run) {
+		struct parse_events_error errinfo;
+
+		if (pmu_have_event("cpu", "cycles-ct") &&
+		    pmu_have_event("cpu", "el-start"))
+			err = parse_events(evsel_list, transaction_attrs,
+					   &errinfo);
+		else
+			err = parse_events(evsel_list,
+					   transaction_limited_attrs,
+					   &errinfo);
+		if (err) {
+			fprintf(stderr, "Cannot set up transaction events\n");
+			return -1;
+		}
+		return 0;
+	}
+
+	if (smi_cost) {
+		int smi;
+
+		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+			fprintf(stderr, "freeze_on_smi is not supported.\n");
+			return -1;
+		}
+
+		if (!smi) {
+			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+				fprintf(stderr, "Failed to set freeze_on_smi.\n");
+				return -1;
+			}
+			smi_reset = true;
+		}
+
+		if (pmu_have_event("msr", "aperf") &&
+		    pmu_have_event("msr", "smi")) {
+			if (!force_metric_only)
+				metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+		} else {
+			fprintf(stderr, "To measure SMI cost, it needs "
+				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			return -1;
+		}
+		if (err) {
+			fprintf(stderr, "Cannot set up SMI cost events\n");
+			return -1;
+		}
+		return 0;
+	}
+
+	if (topdown_run) {
+		char *str = NULL;
+		bool warn = false;
+
+		if (stat_config.aggr_mode != AGGR_GLOBAL &&
+		    stat_config.aggr_mode != AGGR_CORE) {
+			pr_err("top down event configuration requires --per-core mode\n");
+			return -1;
+		}
+		stat_config.aggr_mode = AGGR_CORE;
+		if (nr_cgroups || !target__has_cpu(&target)) {
+			pr_err("top down event configuration requires system-wide mode (-a)\n");
+			return -1;
+		}
+
+		if (!force_metric_only)
+			metric_only = true;
+		if (topdown_filter_events(topdown_attrs, &str,
+				arch_topdown_check_group(&warn)) < 0) {
+			pr_err("Out of memory\n");
+			return -1;
+		}
+		if (topdown_attrs[0] && str) {
+			if (warn)
+				arch_topdown_group_warn();
+			err = parse_events(evsel_list, str, NULL);
+			if (err) {
+				fprintf(stderr,
+					"Cannot set up top down events %s: %d\n",
+					str, err);
+				free(str);
+				return -1;
+			}
+		} else {
+			fprintf(stderr, "System does not support topdown\n");
+			return -1;
+		}
+		free(str);
+	}
+
+	if (!evsel_list->nr_entries) {
+		if (target__has_cpu(&target))
+			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
+
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+			return -1;
+		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						frontend_attrs) < 0)
+				return -1;
+		}
+		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						backend_attrs) < 0)
+				return -1;
+		}
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+			return -1;
+	}
+
+	/* Detailed events get appended to the event list: */
+
+	if (detailed_run <  1)
+		return 0;
+
+	/* Append detailed run extra attributes: */
+	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
+		return -1;
+
+	if (detailed_run < 2)
+		return 0;
+
+	/* Append very detailed run extra attributes: */
+	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
+		return -1;
+
+	if (detailed_run < 3)
+		return 0;
+
+	/* Append very, very detailed run extra attributes: */
+	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+}
+
+static const char * const stat_record_usage[] = {
+	"perf stat record [<options>]",
+	NULL,
+};
+
+static void init_features(struct perf_session *session)
+{
+	int feat;
+
+	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+		perf_header__set_feat(&session->header, feat);
+
+	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+	struct perf_session *session;
+	struct perf_data *data = &perf_stat.data;
+
+	argc = parse_options(argc, argv, stat_options, stat_record_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (output_name)
+		data->file.path = output_name;
+
+	if (run_count != 1 || forever) {
+		pr_err("Cannot use -r option with perf stat record.\n");
+		return -1;
+	}
+
+	session = perf_session__new(data, false, NULL);
+	if (session == NULL) {
+		pr_err("Perf session creation failed.\n");
+		return -1;
+	}
+
+	init_features(session);
+
+	session->evlist   = evsel_list;
+	perf_stat.session = session;
+	perf_stat.record  = true;
+	return argc;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_session *session)
+{
+	struct stat_round_event *stat_round = &event->stat_round;
+	struct perf_evsel *counter;
+	struct timespec tsh, *ts = NULL;
+	const char **argv = session->header.env.cmdline_argv;
+	int argc = session->header.env.nr_cmdline;
+
+	evlist__for_each_entry(evsel_list, counter)
+		perf_stat_process_counter(&stat_config, counter);
+
+	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
+		update_stats(&walltime_nsecs_stats, stat_round->time);
+
+	if (stat_config.interval && stat_round->time) {
+		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
+		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
+		ts = &tsh;
+	}
+
+	print_counters(ts, argc, argv);
+	return 0;
+}
+
+static
+int process_stat_config_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_session *session __maybe_unused)
+{
+	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+	perf_event__read_stat_config(&stat_config, &event->stat_config);
+
+	if (cpu_map__empty(st->cpus)) {
+		if (st->aggr_mode != AGGR_UNSET)
+			pr_warning("warning: processing task data, aggregation mode not set\n");
+		return 0;
+	}
+
+	if (st->aggr_mode != AGGR_UNSET)
+		stat_config.aggr_mode = st->aggr_mode;
+
+	if (perf_stat.data.is_pipe)
+		perf_stat_init_aggr_mode();
+	else
+		perf_stat_init_aggr_mode_file(st);
+
+	return 0;
+}
+
+static int set_maps(struct perf_stat *st)
+{
+	if (!st->cpus || !st->threads)
+		return 0;
+
+	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
+		return -EINVAL;
+
+	perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
+
+	if (perf_evlist__alloc_stats(evsel_list, true))
+		return -ENOMEM;
+
+	st->maps_allocated = true;
+	return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_session *session __maybe_unused)
+{
+	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+	if (st->threads) {
+		pr_warning("Extra thread map event, ignoring.\n");
+		return 0;
+	}
+
+	st->threads = thread_map__new_event(&event->thread_map);
+	if (!st->threads)
+		return -ENOMEM;
+
+	return set_maps(st);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool,
+			  union perf_event *event,
+			  struct perf_session *session __maybe_unused)
+{
+	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+	struct cpu_map *cpus;
+
+	if (st->cpus) {
+		pr_warning("Extra cpu map event, ignoring.\n");
+		return 0;
+	}
+
+	cpus = cpu_map__new_data(&event->cpu_map.data);
+	if (!cpus)
+		return -ENOMEM;
+
+	st->cpus = cpus;
+	return set_maps(st);
+}
+
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+	int i;
+
+	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+	if (!config->stats)
+		return -1;
+
+	config->stats_num = nthreads;
+
+	for (i = 0; i < nthreads; i++)
+		runtime_stat__init(&config->stats[i]);
+
+	return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+	int i;
+
+	if (!config->stats)
+		return;
+
+	for (i = 0; i < config->stats_num; i++)
+		runtime_stat__exit(&config->stats[i]);
+
+	free(config->stats);
+}
+
+static const char * const stat_report_usage[] = {
+	"perf stat report [<options>]",
+	NULL,
+};
+
+static struct perf_stat perf_stat = {
+	.tool = {
+		.attr		= perf_event__process_attr,
+		.event_update	= perf_event__process_event_update,
+		.thread_map	= process_thread_map_event,
+		.cpu_map	= process_cpu_map_event,
+		.stat_config	= process_stat_config_event,
+		.stat		= perf_event__process_stat_event,
+		.stat_round	= process_stat_round_event,
+	},
+	.aggr_mode = AGGR_UNSET,
+};
+
+static int __cmd_report(int argc, const char **argv)
+{
+	struct perf_session *session;
+	const struct option options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
+		     "aggregate counts per processor socket", AGGR_SOCKET),
+	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
+		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
+		     "disable CPU count aggregation", AGGR_NONE),
+	OPT_END()
+	};
+	struct stat st;
+	int ret;
+
+	argc = parse_options(argc, argv, options, stat_report_usage, 0);
+
+	if (!input_name || !strlen(input_name)) {
+		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+			input_name = "-";
+		else
+			input_name = "perf.data";
+	}
+
+	perf_stat.data.file.path = input_name;
+	perf_stat.data.mode      = PERF_DATA_MODE_READ;
+
+	session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
+	if (session == NULL)
+		return -1;
+
+	perf_stat.session  = session;
+	stat_config.output = stderr;
+	evsel_list         = session->evlist;
+
+	ret = perf_session__process_events(session);
+	if (ret)
+		return ret;
+
+	perf_session__delete(session);
+	return 0;
+}
+
+static void setup_system_wide(int forks)
+{
+	/*
+	 * Make system wide (-a) the default target if
+	 * no target was specified and one of following
+	 * conditions is met:
+	 *
+	 *   - there's no workload specified
+	 *   - there is workload specified but all requested
+	 *     events are system wide events
+	 */
+	if (!target__none(&target))
+		return;
+
+	if (!forks)
+		target.system_wide = true;
+	else {
+		struct perf_evsel *counter;
+
+		evlist__for_each_entry(evsel_list, counter) {
+			if (!counter->system_wide)
+				return;
+		}
+
+		if (evsel_list->nr_entries)
+			target.system_wide = true;
+	}
+}
+
+int cmd_stat(int argc, const char **argv)
+{
+	const char * const stat_usage[] = {
+		"perf stat [<options>] [<command>]",
+		NULL
+	};
+	int status = -EINVAL, run_idx;
+	const char *mode;
+	FILE *output = stderr;
+	unsigned int interval, timeout;
+	const char * const stat_subcommands[] = { "record", "report" };
+
+	setlocale(LC_ALL, "");
+
+	evsel_list = perf_evlist__new();
+	if (evsel_list == NULL)
+		return -ENOMEM;
+
+	parse_events__shrink_config_terms();
+	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
+					(const char **) stat_usage,
+					PARSE_OPT_STOP_AT_NON_OPTION);
+	perf_stat__collect_metric_expr(evsel_list);
+	perf_stat__init_shadow_stats();
+
+	if (csv_sep) {
+		csv_output = true;
+		if (!strcmp(csv_sep, "\\t"))
+			csv_sep = "\t";
+	} else
+		csv_sep = DEFAULT_SEPARATOR;
+
+	if (argc && !strncmp(argv[0], "rec", 3)) {
+		argc = __cmd_record(argc, argv);
+		if (argc < 0)
+			return -1;
+	} else if (argc && !strncmp(argv[0], "rep", 3))
+		return __cmd_report(argc, argv);
+
+	interval = stat_config.interval;
+	timeout = stat_config.timeout;
+
+	/*
+	 * For record command the -o is already taken care of.
+	 */
+	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
+		output = NULL;
+
+	if (output_name && output_fd) {
+		fprintf(stderr, "cannot use both --output and --log-fd\n");
+		parse_options_usage(stat_usage, stat_options, "o", 1);
+		parse_options_usage(NULL, stat_options, "log-fd", 0);
+		goto out;
+	}
+
+	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+		goto out;
+	}
+
+	if (metric_only && run_count > 1) {
+		fprintf(stderr, "--metric-only is not supported with -r\n");
+		goto out;
+	}
+
+	if (output_fd < 0) {
+		fprintf(stderr, "argument to --log-fd must be a > 0\n");
+		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
+		goto out;
+	}
+
+	if (!output) {
+		struct timespec tm;
+		mode = append_file ? "a" : "w";
+
+		output = fopen(output_name, mode);
+		if (!output) {
+			perror("failed to create output file");
+			return -1;
+		}
+		clock_gettime(CLOCK_REALTIME, &tm);
+		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
+	} else if (output_fd > 0) {
+		mode = append_file ? "a" : "w";
+		output = fdopen(output_fd, mode);
+		if (!output) {
+			perror("Failed opening logfd");
+			return -errno;
+		}
+	}
+
+	stat_config.output = output;
+
+	/*
+	 * let the spreadsheet do the pretty-printing
+	 */
+	if (csv_output) {
+		/* User explicitly passed -B? */
+		if (big_num_opt == 1) {
+			fprintf(stderr, "-B option not supported with -x\n");
+			parse_options_usage(stat_usage, stat_options, "B", 1);
+			parse_options_usage(NULL, stat_options, "x", 1);
+			goto out;
+		} else /* Nope, so disable big number formatting */
+			big_num = false;
+	} else if (big_num_opt == 0) /* User passed --no-big-num */
+		big_num = false;
+
+	setup_system_wide(argc);
+
+	if (run_count < 0) {
+		pr_err("Run count must be a positive number\n");
+		parse_options_usage(stat_usage, stat_options, "r", 1);
+		goto out;
+	} else if (run_count == 0) {
+		forever = true;
+		run_count = 1;
+	}
+
+	if ((stat_config.aggr_mode == AGGR_THREAD) &&
+		!target__has_task(&target)) {
+		if (!target.system_wide || target.cpu_list) {
+			fprintf(stderr, "The --per-thread option is only "
+				"available when monitoring via -p -t -a "
+				"options or only --per-thread.\n");
+			parse_options_usage(NULL, stat_options, "p", 1);
+			parse_options_usage(NULL, stat_options, "t", 1);
+			goto out;
+		}
+	}
+
+	/*
+	 * no_aggr, cgroup are for system-wide only
+	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
+	 */
+	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
+	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
+	    !target__has_cpu(&target)) {
+		fprintf(stderr, "both cgroup and no-aggregation "
+			"modes only available in system-wide mode\n");
+
+		parse_options_usage(stat_usage, stat_options, "G", 1);
+		parse_options_usage(NULL, stat_options, "A", 1);
+		parse_options_usage(NULL, stat_options, "a", 1);
+		goto out;
+	}
+
+	if (add_default_attributes())
+		goto out;
+
+	target__validate(&target);
+
+	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
+		target.per_thread = true;
+
+	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
+		if (target__has_task(&target)) {
+			pr_err("Problems finding threads of monitor\n");
+			parse_options_usage(stat_usage, stat_options, "p", 1);
+			parse_options_usage(NULL, stat_options, "t", 1);
+		} else if (target__has_cpu(&target)) {
+			perror("failed to parse CPUs map");
+			parse_options_usage(stat_usage, stat_options, "C", 1);
+			parse_options_usage(NULL, stat_options, "a", 1);
+		}
+		goto out;
+	}
+
+	/*
+	 * Initialize thread_map with comm names,
+	 * so we could print it out on output.
+	 */
+	if (stat_config.aggr_mode == AGGR_THREAD) {
+		thread_map__read_comms(evsel_list->threads);
+		if (target.system_wide) {
+			if (runtime_stat_new(&stat_config,
+				thread_map__nr(evsel_list->threads))) {
+				goto out;
+			}
+		}
+	}
+
+	if (stat_config.times && interval)
+		interval_count = true;
+	else if (stat_config.times && !interval) {
+		pr_err("interval-count option should be used together with "
+				"interval-print.\n");
+		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
+		parse_options_usage(stat_usage, stat_options, "I", 1);
+		goto out;
+	}
+
+	if (timeout && timeout < 100) {
+		if (timeout < 10) {
+			pr_err("timeout must be >= 10ms.\n");
+			parse_options_usage(stat_usage, stat_options, "timeout", 0);
+			goto out;
+		} else
+			pr_warning("timeout < 100ms. "
+				   "The overhead percentage could be high in some cases. "
+				   "Please proceed with caution.\n");
+	}
+	if (timeout && interval) {
+		pr_err("timeout option is not supported with interval-print.\n");
+		parse_options_usage(stat_usage, stat_options, "timeout", 0);
+		parse_options_usage(stat_usage, stat_options, "I", 1);
+		goto out;
+	}
+
+	if (perf_evlist__alloc_stats(evsel_list, interval))
+		goto out;
+
+	if (perf_stat_init_aggr_mode())
+		goto out;
+
+	/*
+	 * We dont want to block the signals - that would cause
+	 * child tasks to inherit that and Ctrl-C would not work.
+	 * What we want is for Ctrl-C to work in the exec()-ed
+	 * task, but being ignored by perf stat itself:
+	 */
+	atexit(sig_atexit);
+	if (!forever)
+		signal(SIGINT,  skip_signal);
+	signal(SIGCHLD, skip_signal);
+	signal(SIGALRM, skip_signal);
+	signal(SIGABRT, skip_signal);
+
+	status = 0;
+	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
+		if (run_count != 1 && verbose > 0)
+			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
+				run_idx + 1);
+
+		status = run_perf_stat(argc, argv);
+		if (forever && status != -1) {
+			print_counters(NULL, argc, argv);
+			perf_stat__reset_stats();
+		}
+	}
+
+	if (!forever && status != -1 && !interval)
+		print_counters(NULL, argc, argv);
+
+	if (STAT_RECORD) {
+		/*
+		 * We synthesize the kernel mmap record just so that older tools
+		 * don't emit warnings about not being able to resolve symbols
+		 * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+		 * a saner message about no samples being in the perf.data file.
+		 *
+		 * This also serves to suppress a warning about f_header.data.size == 0
+		 * in header.c at the moment 'perf stat record' gets introduced, which
+		 * is not really needed once we start adding the stat specific PERF_RECORD_
+		 * records, but the need to suppress the kptr_restrict messages in older
+		 * tools remain  -acme
+		 */
+		int fd = perf_data__fd(&perf_stat.data);
+		int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+							     process_synthesized_event,
+							     &perf_stat.session->machines.host);
+		if (err) {
+			pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
+				   "older tools may produce warnings about this file\n.");
+		}
+
+		if (!interval) {
+			if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+				pr_err("failed to write stat round event\n");
+		}
+
+		if (!perf_stat.data.is_pipe) {
+			perf_stat.session->header.data_size += perf_stat.bytes_written;
+			perf_session__write_header(perf_stat.session, evsel_list, fd, true);
+		}
+
+		perf_session__delete(perf_stat.session);
+	}
+
+	perf_stat__exit_aggr_mode();
+	perf_evlist__free_stats(evsel_list);
+out:
+	if (smi_cost && smi_reset)
+		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
+	perf_evlist__delete(evsel_list);
+
+	runtime_stat_delete(&stat_config);
+
+	return status;
+}
diff --git a/builtin-timechart.c b/builtin-timechart.c
new file mode 100644
index 0000000..813698a
--- /dev/null
+++ b/builtin-timechart.c
@@ -0,0 +1,2018 @@
+/*
+ * builtin-timechart.c - make an svg timechart of system activity
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * Authors:
+ *     Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <traceevent/event-parse.h>
+
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include <linux/list.h>
+#include "util/cache.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/time64.h>
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/callchain.h"
+
+#include "perf.h"
+#include "util/header.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/event.h"
+#include "util/session.h"
+#include "util/svghelper.h"
+#include "util/tool.h"
+#include "util/data.h"
+#include "util/debug.h"
+
+#define SUPPORT_OLD_POWER_EVENTS 1
+#define PWR_EVENT_EXIT -1
+
+struct per_pid;
+struct power_event;
+struct wake_event;
+
+struct timechart {
+	struct perf_tool	tool;
+	struct per_pid		*all_data;
+	struct power_event	*power_events;
+	struct wake_event	*wake_events;
+	int			proc_num;
+	unsigned int		numcpus;
+	u64			min_freq,	/* Lowest CPU frequency seen */
+				max_freq,	/* Highest CPU frequency seen */
+				turbo_frequency,
+				first_time, last_time;
+	bool			power_only,
+				tasks_only,
+				with_backtrace,
+				topology;
+	bool			force;
+	/* IO related settings */
+	bool			io_only,
+				skip_eagain;
+	u64			io_events;
+	u64			min_time,
+				merge_dist;
+};
+
+struct per_pidcomm;
+struct cpu_sample;
+struct io_sample;
+
+/*
+ * Datastructure layout:
+ * We keep an list of "pid"s, matching the kernels notion of a task struct.
+ * Each "pid" entry, has a list of "comm"s.
+ *	this is because we want to track different programs different, while
+ *	exec will reuse the original pid (by design).
+ * Each comm has a list of samples that will be used to draw
+ * final graph.
+ */
+
+struct per_pid {
+	struct per_pid *next;
+
+	int		pid;
+	int		ppid;
+
+	u64		start_time;
+	u64		end_time;
+	u64		total_time;
+	u64		total_bytes;
+	int		display;
+
+	struct per_pidcomm *all;
+	struct per_pidcomm *current;
+};
+
+
+struct per_pidcomm {
+	struct per_pidcomm *next;
+
+	u64		start_time;
+	u64		end_time;
+	u64		total_time;
+	u64		max_bytes;
+	u64		total_bytes;
+
+	int		Y;
+	int		display;
+
+	long		state;
+	u64		state_since;
+
+	char		*comm;
+
+	struct cpu_sample *samples;
+	struct io_sample  *io_samples;
+};
+
+struct sample_wrapper {
+	struct sample_wrapper *next;
+
+	u64		timestamp;
+	unsigned char	data[0];
+};
+
+#define TYPE_NONE	0
+#define TYPE_RUNNING	1
+#define TYPE_WAITING	2
+#define TYPE_BLOCKED	3
+
+struct cpu_sample {
+	struct cpu_sample *next;
+
+	u64 start_time;
+	u64 end_time;
+	int type;
+	int cpu;
+	const char *backtrace;
+};
+
+enum {
+	IOTYPE_READ,
+	IOTYPE_WRITE,
+	IOTYPE_SYNC,
+	IOTYPE_TX,
+	IOTYPE_RX,
+	IOTYPE_POLL,
+};
+
+struct io_sample {
+	struct io_sample *next;
+
+	u64 start_time;
+	u64 end_time;
+	u64 bytes;
+	int type;
+	int fd;
+	int err;
+	int merges;
+};
+
+#define CSTATE 1
+#define PSTATE 2
+
+struct power_event {
+	struct power_event *next;
+	int type;
+	int state;
+	u64 start_time;
+	u64 end_time;
+	int cpu;
+};
+
+struct wake_event {
+	struct wake_event *next;
+	int waker;
+	int wakee;
+	u64 time;
+	const char *backtrace;
+};
+
+struct process_filter {
+	char			*name;
+	int			pid;
+	struct process_filter	*next;
+};
+
+static struct process_filter *process_filter;
+
+
+static struct per_pid *find_create_pid(struct timechart *tchart, int pid)
+{
+	struct per_pid *cursor = tchart->all_data;
+
+	while (cursor) {
+		if (cursor->pid == pid)
+			return cursor;
+		cursor = cursor->next;
+	}
+	cursor = zalloc(sizeof(*cursor));
+	assert(cursor != NULL);
+	cursor->pid = pid;
+	cursor->next = tchart->all_data;
+	tchart->all_data = cursor;
+	return cursor;
+}
+
+static void pid_set_comm(struct timechart *tchart, int pid, char *comm)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	p = find_create_pid(tchart, pid);
+	c = p->all;
+	while (c) {
+		if (c->comm && strcmp(c->comm, comm) == 0) {
+			p->current = c;
+			return;
+		}
+		if (!c->comm) {
+			c->comm = strdup(comm);
+			p->current = c;
+			return;
+		}
+		c = c->next;
+	}
+	c = zalloc(sizeof(*c));
+	assert(c != NULL);
+	c->comm = strdup(comm);
+	p->current = c;
+	c->next = p->all;
+	p->all = c;
+}
+
+static void pid_fork(struct timechart *tchart, int pid, int ppid, u64 timestamp)
+{
+	struct per_pid *p, *pp;
+	p = find_create_pid(tchart, pid);
+	pp = find_create_pid(tchart, ppid);
+	p->ppid = ppid;
+	if (pp->current && pp->current->comm && !p->current)
+		pid_set_comm(tchart, pid, pp->current->comm);
+
+	p->start_time = timestamp;
+	if (p->current && !p->current->start_time) {
+		p->current->start_time = timestamp;
+		p->current->state_since = timestamp;
+	}
+}
+
+static void pid_exit(struct timechart *tchart, int pid, u64 timestamp)
+{
+	struct per_pid *p;
+	p = find_create_pid(tchart, pid);
+	p->end_time = timestamp;
+	if (p->current)
+		p->current->end_time = timestamp;
+}
+
+static void pid_put_sample(struct timechart *tchart, int pid, int type,
+			   unsigned int cpu, u64 start, u64 end,
+			   const char *backtrace)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	struct cpu_sample *sample;
+
+	p = find_create_pid(tchart, pid);
+	c = p->current;
+	if (!c) {
+		c = zalloc(sizeof(*c));
+		assert(c != NULL);
+		p->current = c;
+		c->next = p->all;
+		p->all = c;
+	}
+
+	sample = zalloc(sizeof(*sample));
+	assert(sample != NULL);
+	sample->start_time = start;
+	sample->end_time = end;
+	sample->type = type;
+	sample->next = c->samples;
+	sample->cpu = cpu;
+	sample->backtrace = backtrace;
+	c->samples = sample;
+
+	if (sample->type == TYPE_RUNNING && end > start && start > 0) {
+		c->total_time += (end-start);
+		p->total_time += (end-start);
+	}
+
+	if (c->start_time == 0 || c->start_time > start)
+		c->start_time = start;
+	if (p->start_time == 0 || p->start_time > start)
+		p->start_time = start;
+}
+
+#define MAX_CPUS 4096
+
+static u64 cpus_cstate_start_times[MAX_CPUS];
+static int cpus_cstate_state[MAX_CPUS];
+static u64 cpus_pstate_start_times[MAX_CPUS];
+static u64 cpus_pstate_state[MAX_CPUS];
+
+static int process_comm_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct timechart *tchart = container_of(tool, struct timechart, tool);
+	pid_set_comm(tchart, event->comm.tid, event->comm.comm);
+	return 0;
+}
+
+static int process_fork_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct timechart *tchart = container_of(tool, struct timechart, tool);
+	pid_fork(tchart, event->fork.pid, event->fork.ppid, event->fork.time);
+	return 0;
+}
+
+static int process_exit_event(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct timechart *tchart = container_of(tool, struct timechart, tool);
+	pid_exit(tchart, event->fork.pid, event->fork.time);
+	return 0;
+}
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static int use_old_power_events;
+#endif
+
+static void c_state_start(int cpu, u64 timestamp, int state)
+{
+	cpus_cstate_start_times[cpu] = timestamp;
+	cpus_cstate_state[cpu] = state;
+}
+
+static void c_state_end(struct timechart *tchart, int cpu, u64 timestamp)
+{
+	struct power_event *pwr = zalloc(sizeof(*pwr));
+
+	if (!pwr)
+		return;
+
+	pwr->state = cpus_cstate_state[cpu];
+	pwr->start_time = cpus_cstate_start_times[cpu];
+	pwr->end_time = timestamp;
+	pwr->cpu = cpu;
+	pwr->type = CSTATE;
+	pwr->next = tchart->power_events;
+
+	tchart->power_events = pwr;
+}
+
+static void p_state_change(struct timechart *tchart, int cpu, u64 timestamp, u64 new_freq)
+{
+	struct power_event *pwr;
+
+	if (new_freq > 8000000) /* detect invalid data */
+		return;
+
+	pwr = zalloc(sizeof(*pwr));
+	if (!pwr)
+		return;
+
+	pwr->state = cpus_pstate_state[cpu];
+	pwr->start_time = cpus_pstate_start_times[cpu];
+	pwr->end_time = timestamp;
+	pwr->cpu = cpu;
+	pwr->type = PSTATE;
+	pwr->next = tchart->power_events;
+
+	if (!pwr->start_time)
+		pwr->start_time = tchart->first_time;
+
+	tchart->power_events = pwr;
+
+	cpus_pstate_state[cpu] = new_freq;
+	cpus_pstate_start_times[cpu] = timestamp;
+
+	if ((u64)new_freq > tchart->max_freq)
+		tchart->max_freq = new_freq;
+
+	if (new_freq < tchart->min_freq || tchart->min_freq == 0)
+		tchart->min_freq = new_freq;
+
+	if (new_freq == tchart->max_freq - 1000)
+		tchart->turbo_frequency = tchart->max_freq;
+}
+
+static void sched_wakeup(struct timechart *tchart, int cpu, u64 timestamp,
+			 int waker, int wakee, u8 flags, const char *backtrace)
+{
+	struct per_pid *p;
+	struct wake_event *we = zalloc(sizeof(*we));
+
+	if (!we)
+		return;
+
+	we->time = timestamp;
+	we->waker = waker;
+	we->backtrace = backtrace;
+
+	if ((flags & TRACE_FLAG_HARDIRQ) || (flags & TRACE_FLAG_SOFTIRQ))
+		we->waker = -1;
+
+	we->wakee = wakee;
+	we->next = tchart->wake_events;
+	tchart->wake_events = we;
+	p = find_create_pid(tchart, we->wakee);
+
+	if (p && p->current && p->current->state == TYPE_NONE) {
+		p->current->state_since = timestamp;
+		p->current->state = TYPE_WAITING;
+	}
+	if (p && p->current && p->current->state == TYPE_BLOCKED) {
+		pid_put_sample(tchart, p->pid, p->current->state, cpu,
+			       p->current->state_since, timestamp, NULL);
+		p->current->state_since = timestamp;
+		p->current->state = TYPE_WAITING;
+	}
+}
+
+static void sched_switch(struct timechart *tchart, int cpu, u64 timestamp,
+			 int prev_pid, int next_pid, u64 prev_state,
+			 const char *backtrace)
+{
+	struct per_pid *p = NULL, *prev_p;
+
+	prev_p = find_create_pid(tchart, prev_pid);
+
+	p = find_create_pid(tchart, next_pid);
+
+	if (prev_p->current && prev_p->current->state != TYPE_NONE)
+		pid_put_sample(tchart, prev_pid, TYPE_RUNNING, cpu,
+			       prev_p->current->state_since, timestamp,
+			       backtrace);
+	if (p && p->current) {
+		if (p->current->state != TYPE_NONE)
+			pid_put_sample(tchart, next_pid, p->current->state, cpu,
+				       p->current->state_since, timestamp,
+				       backtrace);
+
+		p->current->state_since = timestamp;
+		p->current->state = TYPE_RUNNING;
+	}
+
+	if (prev_p->current) {
+		prev_p->current->state = TYPE_NONE;
+		prev_p->current->state_since = timestamp;
+		if (prev_state & 2)
+			prev_p->current->state = TYPE_BLOCKED;
+		if (prev_state == 0)
+			prev_p->current->state = TYPE_WAITING;
+	}
+}
+
+static const char *cat_backtrace(union perf_event *event,
+				 struct perf_sample *sample,
+				 struct machine *machine)
+{
+	struct addr_location al;
+	unsigned int i;
+	char *p = NULL;
+	size_t p_len;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	struct addr_location tal;
+	struct ip_callchain *chain = sample->callchain;
+	FILE *f = open_memstream(&p, &p_len);
+
+	if (!f) {
+		perror("open_memstream error");
+		return NULL;
+	}
+
+	if (!chain)
+		goto exit;
+
+	if (machine__resolve(machine, &al, sample) < 0) {
+		fprintf(stderr, "problem processing %d event, skipping it.\n",
+			event->header.type);
+		goto exit;
+	}
+
+	for (i = 0; i < chain->nr; i++) {
+		u64 ip;
+
+		if (callchain_param.order == ORDER_CALLEE)
+			ip = chain->ips[i];
+		else
+			ip = chain->ips[chain->nr - i - 1];
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				break;
+			case PERF_CONTEXT_KERNEL:
+				cpumode = PERF_RECORD_MISC_KERNEL;
+				break;
+			case PERF_CONTEXT_USER:
+				cpumode = PERF_RECORD_MISC_USER;
+				break;
+			default:
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				zfree(&p);
+				goto exit_put;
+			}
+			continue;
+		}
+
+		tal.filtered = 0;
+		thread__find_addr_location(al.thread, cpumode,
+					   MAP__FUNCTION, ip, &tal);
+
+		if (tal.sym)
+			fprintf(f, "..... %016" PRIx64 " %s\n", ip,
+				tal.sym->name);
+		else
+			fprintf(f, "..... %016" PRIx64 "\n", ip);
+	}
+exit_put:
+	addr_location__put(&al);
+exit:
+	fclose(f);
+
+	return p;
+}
+
+typedef int (*tracepoint_handler)(struct timechart *tchart,
+				  struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  const char *backtrace);
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine)
+{
+	struct timechart *tchart = container_of(tool, struct timechart, tool);
+
+	if (evsel->attr.sample_type & PERF_SAMPLE_TIME) {
+		if (!tchart->first_time || tchart->first_time > sample->time)
+			tchart->first_time = sample->time;
+		if (tchart->last_time < sample->time)
+			tchart->last_time = sample->time;
+	}
+
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
+		return f(tchart, evsel, sample,
+			 cat_backtrace(event, sample, machine));
+	}
+
+	return 0;
+}
+
+static int
+process_sample_cpu_idle(struct timechart *tchart __maybe_unused,
+			struct perf_evsel *evsel,
+			struct perf_sample *sample,
+			const char *backtrace __maybe_unused)
+{
+	u32 state = perf_evsel__intval(evsel, sample, "state");
+	u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+
+	if (state == (u32)PWR_EVENT_EXIT)
+		c_state_end(tchart, cpu_id, sample->time);
+	else
+		c_state_start(cpu_id, sample->time, state);
+	return 0;
+}
+
+static int
+process_sample_cpu_frequency(struct timechart *tchart,
+			     struct perf_evsel *evsel,
+			     struct perf_sample *sample,
+			     const char *backtrace __maybe_unused)
+{
+	u32 state = perf_evsel__intval(evsel, sample, "state");
+	u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+
+	p_state_change(tchart, cpu_id, sample->time, state);
+	return 0;
+}
+
+static int
+process_sample_sched_wakeup(struct timechart *tchart,
+			    struct perf_evsel *evsel,
+			    struct perf_sample *sample,
+			    const char *backtrace)
+{
+	u8 flags = perf_evsel__intval(evsel, sample, "common_flags");
+	int waker = perf_evsel__intval(evsel, sample, "common_pid");
+	int wakee = perf_evsel__intval(evsel, sample, "pid");
+
+	sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace);
+	return 0;
+}
+
+static int
+process_sample_sched_switch(struct timechart *tchart,
+			    struct perf_evsel *evsel,
+			    struct perf_sample *sample,
+			    const char *backtrace)
+{
+	int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid");
+	int next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+
+	sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid,
+		     prev_state, backtrace);
+	return 0;
+}
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static int
+process_sample_power_start(struct timechart *tchart __maybe_unused,
+			   struct perf_evsel *evsel,
+			   struct perf_sample *sample,
+			   const char *backtrace __maybe_unused)
+{
+	u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+	u64 value = perf_evsel__intval(evsel, sample, "value");
+
+	c_state_start(cpu_id, sample->time, value);
+	return 0;
+}
+
+static int
+process_sample_power_end(struct timechart *tchart,
+			 struct perf_evsel *evsel __maybe_unused,
+			 struct perf_sample *sample,
+			 const char *backtrace __maybe_unused)
+{
+	c_state_end(tchart, sample->cpu, sample->time);
+	return 0;
+}
+
+static int
+process_sample_power_frequency(struct timechart *tchart,
+			       struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       const char *backtrace __maybe_unused)
+{
+	u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+	u64 value = perf_evsel__intval(evsel, sample, "value");
+
+	p_state_change(tchart, cpu_id, sample->time, value);
+	return 0;
+}
+#endif /* SUPPORT_OLD_POWER_EVENTS */
+
+/*
+ * After the last sample we need to wrap up the current C/P state
+ * and close out each CPU for these.
+ */
+static void end_sample_processing(struct timechart *tchart)
+{
+	u64 cpu;
+	struct power_event *pwr;
+
+	for (cpu = 0; cpu <= tchart->numcpus; cpu++) {
+		/* C state */
+#if 0
+		pwr = zalloc(sizeof(*pwr));
+		if (!pwr)
+			return;
+
+		pwr->state = cpus_cstate_state[cpu];
+		pwr->start_time = cpus_cstate_start_times[cpu];
+		pwr->end_time = tchart->last_time;
+		pwr->cpu = cpu;
+		pwr->type = CSTATE;
+		pwr->next = tchart->power_events;
+
+		tchart->power_events = pwr;
+#endif
+		/* P state */
+
+		pwr = zalloc(sizeof(*pwr));
+		if (!pwr)
+			return;
+
+		pwr->state = cpus_pstate_state[cpu];
+		pwr->start_time = cpus_pstate_start_times[cpu];
+		pwr->end_time = tchart->last_time;
+		pwr->cpu = cpu;
+		pwr->type = PSTATE;
+		pwr->next = tchart->power_events;
+
+		if (!pwr->start_time)
+			pwr->start_time = tchart->first_time;
+		if (!pwr->state)
+			pwr->state = tchart->min_freq;
+		tchart->power_events = pwr;
+	}
+}
+
+static int pid_begin_io_sample(struct timechart *tchart, int pid, int type,
+			       u64 start, int fd)
+{
+	struct per_pid *p = find_create_pid(tchart, pid);
+	struct per_pidcomm *c = p->current;
+	struct io_sample *sample;
+	struct io_sample *prev;
+
+	if (!c) {
+		c = zalloc(sizeof(*c));
+		if (!c)
+			return -ENOMEM;
+		p->current = c;
+		c->next = p->all;
+		p->all = c;
+	}
+
+	prev = c->io_samples;
+
+	if (prev && prev->start_time && !prev->end_time) {
+		pr_warning("Skip invalid start event: "
+			   "previous event already started!\n");
+
+		/* remove previous event that has been started,
+		 * we are not sure we will ever get an end for it */
+		c->io_samples = prev->next;
+		free(prev);
+		return 0;
+	}
+
+	sample = zalloc(sizeof(*sample));
+	if (!sample)
+		return -ENOMEM;
+	sample->start_time = start;
+	sample->type = type;
+	sample->fd = fd;
+	sample->next = c->io_samples;
+	c->io_samples = sample;
+
+	if (c->start_time == 0 || c->start_time > start)
+		c->start_time = start;
+
+	return 0;
+}
+
+static int pid_end_io_sample(struct timechart *tchart, int pid, int type,
+			     u64 end, long ret)
+{
+	struct per_pid *p = find_create_pid(tchart, pid);
+	struct per_pidcomm *c = p->current;
+	struct io_sample *sample, *prev;
+
+	if (!c) {
+		pr_warning("Invalid pidcomm!\n");
+		return -1;
+	}
+
+	sample = c->io_samples;
+
+	if (!sample) /* skip partially captured events */
+		return 0;
+
+	if (sample->end_time) {
+		pr_warning("Skip invalid end event: "
+			   "previous event already ended!\n");
+		return 0;
+	}
+
+	if (sample->type != type) {
+		pr_warning("Skip invalid end event: invalid event type!\n");
+		return 0;
+	}
+
+	sample->end_time = end;
+	prev = sample->next;
+
+	/* we want to be able to see small and fast transfers, so make them
+	 * at least min_time long, but don't overlap them */
+	if (sample->end_time - sample->start_time < tchart->min_time)
+		sample->end_time = sample->start_time + tchart->min_time;
+	if (prev && sample->start_time < prev->end_time) {
+		if (prev->err) /* try to make errors more visible */
+			sample->start_time = prev->end_time;
+		else
+			prev->end_time = sample->start_time;
+	}
+
+	if (ret < 0) {
+		sample->err = ret;
+	} else if (type == IOTYPE_READ || type == IOTYPE_WRITE ||
+		   type == IOTYPE_TX || type == IOTYPE_RX) {
+
+		if ((u64)ret > c->max_bytes)
+			c->max_bytes = ret;
+
+		c->total_bytes += ret;
+		p->total_bytes += ret;
+		sample->bytes = ret;
+	}
+
+	/* merge two requests to make svg smaller and render-friendly */
+	if (prev &&
+	    prev->type == sample->type &&
+	    prev->err == sample->err &&
+	    prev->fd == sample->fd &&
+	    prev->end_time + tchart->merge_dist >= sample->start_time) {
+
+		sample->bytes += prev->bytes;
+		sample->merges += prev->merges + 1;
+
+		sample->start_time = prev->start_time;
+		sample->next = prev->next;
+		free(prev);
+
+		if (!sample->err && sample->bytes > c->max_bytes)
+			c->max_bytes = sample->bytes;
+	}
+
+	tchart->io_events++;
+
+	return 0;
+}
+
+static int
+process_enter_read(struct timechart *tchart,
+		   struct perf_evsel *evsel,
+		   struct perf_sample *sample)
+{
+	long fd = perf_evsel__intval(evsel, sample, "fd");
+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
+				   sample->time, fd);
+}
+
+static int
+process_exit_read(struct timechart *tchart,
+		  struct perf_evsel *evsel,
+		  struct perf_sample *sample)
+{
+	long ret = perf_evsel__intval(evsel, sample, "ret");
+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
+				 sample->time, ret);
+}
+
+static int
+process_enter_write(struct timechart *tchart,
+		    struct perf_evsel *evsel,
+		    struct perf_sample *sample)
+{
+	long fd = perf_evsel__intval(evsel, sample, "fd");
+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
+				   sample->time, fd);
+}
+
+static int
+process_exit_write(struct timechart *tchart,
+		   struct perf_evsel *evsel,
+		   struct perf_sample *sample)
+{
+	long ret = perf_evsel__intval(evsel, sample, "ret");
+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
+				 sample->time, ret);
+}
+
+static int
+process_enter_sync(struct timechart *tchart,
+		   struct perf_evsel *evsel,
+		   struct perf_sample *sample)
+{
+	long fd = perf_evsel__intval(evsel, sample, "fd");
+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
+				   sample->time, fd);
+}
+
+static int
+process_exit_sync(struct timechart *tchart,
+		  struct perf_evsel *evsel,
+		  struct perf_sample *sample)
+{
+	long ret = perf_evsel__intval(evsel, sample, "ret");
+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
+				 sample->time, ret);
+}
+
+static int
+process_enter_tx(struct timechart *tchart,
+		 struct perf_evsel *evsel,
+		 struct perf_sample *sample)
+{
+	long fd = perf_evsel__intval(evsel, sample, "fd");
+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
+				   sample->time, fd);
+}
+
+static int
+process_exit_tx(struct timechart *tchart,
+		struct perf_evsel *evsel,
+		struct perf_sample *sample)
+{
+	long ret = perf_evsel__intval(evsel, sample, "ret");
+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
+				 sample->time, ret);
+}
+
+static int
+process_enter_rx(struct timechart *tchart,
+		 struct perf_evsel *evsel,
+		 struct perf_sample *sample)
+{
+	long fd = perf_evsel__intval(evsel, sample, "fd");
+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
+				   sample->time, fd);
+}
+
+static int
+process_exit_rx(struct timechart *tchart,
+		struct perf_evsel *evsel,
+		struct perf_sample *sample)
+{
+	long ret = perf_evsel__intval(evsel, sample, "ret");
+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
+				 sample->time, ret);
+}
+
+static int
+process_enter_poll(struct timechart *tchart,
+		   struct perf_evsel *evsel,
+		   struct perf_sample *sample)
+{
+	long fd = perf_evsel__intval(evsel, sample, "fd");
+	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
+				   sample->time, fd);
+}
+
+static int
+process_exit_poll(struct timechart *tchart,
+		  struct perf_evsel *evsel,
+		  struct perf_sample *sample)
+{
+	long ret = perf_evsel__intval(evsel, sample, "ret");
+	return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
+				 sample->time, ret);
+}
+
+/*
+ * Sort the pid datastructure
+ */
+static void sort_pids(struct timechart *tchart)
+{
+	struct per_pid *new_list, *p, *cursor, *prev;
+	/* sort by ppid first, then by pid, lowest to highest */
+
+	new_list = NULL;
+
+	while (tchart->all_data) {
+		p = tchart->all_data;
+		tchart->all_data = p->next;
+		p->next = NULL;
+
+		if (new_list == NULL) {
+			new_list = p;
+			p->next = NULL;
+			continue;
+		}
+		prev = NULL;
+		cursor = new_list;
+		while (cursor) {
+			if (cursor->ppid > p->ppid ||
+				(cursor->ppid == p->ppid && cursor->pid > p->pid)) {
+				/* must insert before */
+				if (prev) {
+					p->next = prev->next;
+					prev->next = p;
+					cursor = NULL;
+					continue;
+				} else {
+					p->next = new_list;
+					new_list = p;
+					cursor = NULL;
+					continue;
+				}
+			}
+
+			prev = cursor;
+			cursor = cursor->next;
+			if (!cursor)
+				prev->next = p;
+		}
+	}
+	tchart->all_data = new_list;
+}
+
+
+static void draw_c_p_states(struct timechart *tchart)
+{
+	struct power_event *pwr;
+	pwr = tchart->power_events;
+
+	/*
+	 * two pass drawing so that the P state bars are on top of the C state blocks
+	 */
+	while (pwr) {
+		if (pwr->type == CSTATE)
+			svg_cstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
+		pwr = pwr->next;
+	}
+
+	pwr = tchart->power_events;
+	while (pwr) {
+		if (pwr->type == PSTATE) {
+			if (!pwr->state)
+				pwr->state = tchart->min_freq;
+			svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
+		}
+		pwr = pwr->next;
+	}
+}
+
+static void draw_wakeups(struct timechart *tchart)
+{
+	struct wake_event *we;
+	struct per_pid *p;
+	struct per_pidcomm *c;
+
+	we = tchart->wake_events;
+	while (we) {
+		int from = 0, to = 0;
+		char *task_from = NULL, *task_to = NULL;
+
+		/* locate the column of the waker and wakee */
+		p = tchart->all_data;
+		while (p) {
+			if (p->pid == we->waker || p->pid == we->wakee) {
+				c = p->all;
+				while (c) {
+					if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
+						if (p->pid == we->waker && !from) {
+							from = c->Y;
+							task_from = strdup(c->comm);
+						}
+						if (p->pid == we->wakee && !to) {
+							to = c->Y;
+							task_to = strdup(c->comm);
+						}
+					}
+					c = c->next;
+				}
+				c = p->all;
+				while (c) {
+					if (p->pid == we->waker && !from) {
+						from = c->Y;
+						task_from = strdup(c->comm);
+					}
+					if (p->pid == we->wakee && !to) {
+						to = c->Y;
+						task_to = strdup(c->comm);
+					}
+					c = c->next;
+				}
+			}
+			p = p->next;
+		}
+
+		if (!task_from) {
+			task_from = malloc(40);
+			sprintf(task_from, "[%i]", we->waker);
+		}
+		if (!task_to) {
+			task_to = malloc(40);
+			sprintf(task_to, "[%i]", we->wakee);
+		}
+
+		if (we->waker == -1)
+			svg_interrupt(we->time, to, we->backtrace);
+		else if (from && to && abs(from - to) == 1)
+			svg_wakeline(we->time, from, to, we->backtrace);
+		else
+			svg_partial_wakeline(we->time, from, task_from, to,
+					     task_to, we->backtrace);
+		we = we->next;
+
+		free(task_from);
+		free(task_to);
+	}
+}
+
+static void draw_cpu_usage(struct timechart *tchart)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	struct cpu_sample *sample;
+	p = tchart->all_data;
+	while (p) {
+		c = p->all;
+		while (c) {
+			sample = c->samples;
+			while (sample) {
+				if (sample->type == TYPE_RUNNING) {
+					svg_process(sample->cpu,
+						    sample->start_time,
+						    sample->end_time,
+						    p->pid,
+						    c->comm,
+						    sample->backtrace);
+				}
+
+				sample = sample->next;
+			}
+			c = c->next;
+		}
+		p = p->next;
+	}
+}
+
+static void draw_io_bars(struct timechart *tchart)
+{
+	const char *suf;
+	double bytes;
+	char comm[256];
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	struct io_sample *sample;
+	int Y = 1;
+
+	p = tchart->all_data;
+	while (p) {
+		c = p->all;
+		while (c) {
+			if (!c->display) {
+				c->Y = 0;
+				c = c->next;
+				continue;
+			}
+
+			svg_box(Y, c->start_time, c->end_time, "process3");
+			sample = c->io_samples;
+			for (sample = c->io_samples; sample; sample = sample->next) {
+				double h = (double)sample->bytes / c->max_bytes;
+
+				if (tchart->skip_eagain &&
+				    sample->err == -EAGAIN)
+					continue;
+
+				if (sample->err)
+					h = 1;
+
+				if (sample->type == IOTYPE_SYNC)
+					svg_fbox(Y,
+						sample->start_time,
+						sample->end_time,
+						1,
+						sample->err ? "error" : "sync",
+						sample->fd,
+						sample->err,
+						sample->merges);
+				else if (sample->type == IOTYPE_POLL)
+					svg_fbox(Y,
+						sample->start_time,
+						sample->end_time,
+						1,
+						sample->err ? "error" : "poll",
+						sample->fd,
+						sample->err,
+						sample->merges);
+				else if (sample->type == IOTYPE_READ)
+					svg_ubox(Y,
+						sample->start_time,
+						sample->end_time,
+						h,
+						sample->err ? "error" : "disk",
+						sample->fd,
+						sample->err,
+						sample->merges);
+				else if (sample->type == IOTYPE_WRITE)
+					svg_lbox(Y,
+						sample->start_time,
+						sample->end_time,
+						h,
+						sample->err ? "error" : "disk",
+						sample->fd,
+						sample->err,
+						sample->merges);
+				else if (sample->type == IOTYPE_RX)
+					svg_ubox(Y,
+						sample->start_time,
+						sample->end_time,
+						h,
+						sample->err ? "error" : "net",
+						sample->fd,
+						sample->err,
+						sample->merges);
+				else if (sample->type == IOTYPE_TX)
+					svg_lbox(Y,
+						sample->start_time,
+						sample->end_time,
+						h,
+						sample->err ? "error" : "net",
+						sample->fd,
+						sample->err,
+						sample->merges);
+			}
+
+			suf = "";
+			bytes = c->total_bytes;
+			if (bytes > 1024) {
+				bytes = bytes / 1024;
+				suf = "K";
+			}
+			if (bytes > 1024) {
+				bytes = bytes / 1024;
+				suf = "M";
+			}
+			if (bytes > 1024) {
+				bytes = bytes / 1024;
+				suf = "G";
+			}
+
+
+			sprintf(comm, "%s:%i (%3.1f %sbytes)", c->comm ?: "", p->pid, bytes, suf);
+			svg_text(Y, c->start_time, comm);
+
+			c->Y = Y;
+			Y++;
+			c = c->next;
+		}
+		p = p->next;
+	}
+}
+
+static void draw_process_bars(struct timechart *tchart)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	struct cpu_sample *sample;
+	int Y = 0;
+
+	Y = 2 * tchart->numcpus + 2;
+
+	p = tchart->all_data;
+	while (p) {
+		c = p->all;
+		while (c) {
+			if (!c->display) {
+				c->Y = 0;
+				c = c->next;
+				continue;
+			}
+
+			svg_box(Y, c->start_time, c->end_time, "process");
+			sample = c->samples;
+			while (sample) {
+				if (sample->type == TYPE_RUNNING)
+					svg_running(Y, sample->cpu,
+						    sample->start_time,
+						    sample->end_time,
+						    sample->backtrace);
+				if (sample->type == TYPE_BLOCKED)
+					svg_blocked(Y, sample->cpu,
+						    sample->start_time,
+						    sample->end_time,
+						    sample->backtrace);
+				if (sample->type == TYPE_WAITING)
+					svg_waiting(Y, sample->cpu,
+						    sample->start_time,
+						    sample->end_time,
+						    sample->backtrace);
+				sample = sample->next;
+			}
+
+			if (c->comm) {
+				char comm[256];
+				if (c->total_time > 5000000000) /* 5 seconds */
+					sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / (double)NSEC_PER_SEC);
+				else
+					sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / (double)NSEC_PER_MSEC);
+
+				svg_text(Y, c->start_time, comm);
+			}
+			c->Y = Y;
+			Y++;
+			c = c->next;
+		}
+		p = p->next;
+	}
+}
+
+static void add_process_filter(const char *string)
+{
+	int pid = strtoull(string, NULL, 10);
+	struct process_filter *filt = malloc(sizeof(*filt));
+
+	if (!filt)
+		return;
+
+	filt->name = strdup(string);
+	filt->pid  = pid;
+	filt->next = process_filter;
+
+	process_filter = filt;
+}
+
+static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
+{
+	struct process_filter *filt;
+	if (!process_filter)
+		return 1;
+
+	filt = process_filter;
+	while (filt) {
+		if (filt->pid && p->pid == filt->pid)
+			return 1;
+		if (strcmp(filt->name, c->comm) == 0)
+			return 1;
+		filt = filt->next;
+	}
+	return 0;
+}
+
+static int determine_display_tasks_filtered(struct timechart *tchart)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	int count = 0;
+
+	p = tchart->all_data;
+	while (p) {
+		p->display = 0;
+		if (p->start_time == 1)
+			p->start_time = tchart->first_time;
+
+		/* no exit marker, task kept running to the end */
+		if (p->end_time == 0)
+			p->end_time = tchart->last_time;
+
+		c = p->all;
+
+		while (c) {
+			c->display = 0;
+
+			if (c->start_time == 1)
+				c->start_time = tchart->first_time;
+
+			if (passes_filter(p, c)) {
+				c->display = 1;
+				p->display = 1;
+				count++;
+			}
+
+			if (c->end_time == 0)
+				c->end_time = tchart->last_time;
+
+			c = c->next;
+		}
+		p = p->next;
+	}
+	return count;
+}
+
+static int determine_display_tasks(struct timechart *tchart, u64 threshold)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	int count = 0;
+
+	p = tchart->all_data;
+	while (p) {
+		p->display = 0;
+		if (p->start_time == 1)
+			p->start_time = tchart->first_time;
+
+		/* no exit marker, task kept running to the end */
+		if (p->end_time == 0)
+			p->end_time = tchart->last_time;
+		if (p->total_time >= threshold)
+			p->display = 1;
+
+		c = p->all;
+
+		while (c) {
+			c->display = 0;
+
+			if (c->start_time == 1)
+				c->start_time = tchart->first_time;
+
+			if (c->total_time >= threshold) {
+				c->display = 1;
+				count++;
+			}
+
+			if (c->end_time == 0)
+				c->end_time = tchart->last_time;
+
+			c = c->next;
+		}
+		p = p->next;
+	}
+	return count;
+}
+
+static int determine_display_io_tasks(struct timechart *timechart, u64 threshold)
+{
+	struct per_pid *p;
+	struct per_pidcomm *c;
+	int count = 0;
+
+	p = timechart->all_data;
+	while (p) {
+		/* no exit marker, task kept running to the end */
+		if (p->end_time == 0)
+			p->end_time = timechart->last_time;
+
+		c = p->all;
+
+		while (c) {
+			c->display = 0;
+
+			if (c->total_bytes >= threshold) {
+				c->display = 1;
+				count++;
+			}
+
+			if (c->end_time == 0)
+				c->end_time = timechart->last_time;
+
+			c = c->next;
+		}
+		p = p->next;
+	}
+	return count;
+}
+
+#define BYTES_THRESH (1 * 1024 * 1024)
+#define TIME_THRESH 10000000
+
+static void write_svg_file(struct timechart *tchart, const char *filename)
+{
+	u64 i;
+	int count;
+	int thresh = tchart->io_events ? BYTES_THRESH : TIME_THRESH;
+
+	if (tchart->power_only)
+		tchart->proc_num = 0;
+
+	/* We'd like to show at least proc_num tasks;
+	 * be less picky if we have fewer */
+	do {
+		if (process_filter)
+			count = determine_display_tasks_filtered(tchart);
+		else if (tchart->io_events)
+			count = determine_display_io_tasks(tchart, thresh);
+		else
+			count = determine_display_tasks(tchart, thresh);
+		thresh /= 10;
+	} while (!process_filter && thresh && count < tchart->proc_num);
+
+	if (!tchart->proc_num)
+		count = 0;
+
+	if (tchart->io_events) {
+		open_svg(filename, 0, count, tchart->first_time, tchart->last_time);
+
+		svg_time_grid(0.5);
+		svg_io_legenda();
+
+		draw_io_bars(tchart);
+	} else {
+		open_svg(filename, tchart->numcpus, count, tchart->first_time, tchart->last_time);
+
+		svg_time_grid(0);
+
+		svg_legenda();
+
+		for (i = 0; i < tchart->numcpus; i++)
+			svg_cpu_box(i, tchart->max_freq, tchart->turbo_frequency);
+
+		draw_cpu_usage(tchart);
+		if (tchart->proc_num)
+			draw_process_bars(tchart);
+		if (!tchart->tasks_only)
+			draw_c_p_states(tchart);
+		if (tchart->proc_num)
+			draw_wakeups(tchart);
+	}
+
+	svg_close();
+}
+
+static int process_header(struct perf_file_section *section __maybe_unused,
+			  struct perf_header *ph,
+			  int feat,
+			  int fd __maybe_unused,
+			  void *data)
+{
+	struct timechart *tchart = data;
+
+	switch (feat) {
+	case HEADER_NRCPUS:
+		tchart->numcpus = ph->env.nr_cpus_avail;
+		break;
+
+	case HEADER_CPU_TOPOLOGY:
+		if (!tchart->topology)
+			break;
+
+		if (svg_build_topology_map(ph->env.sibling_cores,
+					   ph->env.nr_sibling_cores,
+					   ph->env.sibling_threads,
+					   ph->env.nr_sibling_threads))
+			fprintf(stderr, "problem building topology\n");
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int __cmd_timechart(struct timechart *tchart, const char *output_name)
+{
+	const struct perf_evsel_str_handler power_tracepoints[] = {
+		{ "power:cpu_idle",		process_sample_cpu_idle },
+		{ "power:cpu_frequency",	process_sample_cpu_frequency },
+		{ "sched:sched_wakeup",		process_sample_sched_wakeup },
+		{ "sched:sched_switch",		process_sample_sched_switch },
+#ifdef SUPPORT_OLD_POWER_EVENTS
+		{ "power:power_start",		process_sample_power_start },
+		{ "power:power_end",		process_sample_power_end },
+		{ "power:power_frequency",	process_sample_power_frequency },
+#endif
+
+		{ "syscalls:sys_enter_read",		process_enter_read },
+		{ "syscalls:sys_enter_pread64",		process_enter_read },
+		{ "syscalls:sys_enter_readv",		process_enter_read },
+		{ "syscalls:sys_enter_preadv",		process_enter_read },
+		{ "syscalls:sys_enter_write",		process_enter_write },
+		{ "syscalls:sys_enter_pwrite64",	process_enter_write },
+		{ "syscalls:sys_enter_writev",		process_enter_write },
+		{ "syscalls:sys_enter_pwritev",		process_enter_write },
+		{ "syscalls:sys_enter_sync",		process_enter_sync },
+		{ "syscalls:sys_enter_sync_file_range",	process_enter_sync },
+		{ "syscalls:sys_enter_fsync",		process_enter_sync },
+		{ "syscalls:sys_enter_msync",		process_enter_sync },
+		{ "syscalls:sys_enter_recvfrom",	process_enter_rx },
+		{ "syscalls:sys_enter_recvmmsg",	process_enter_rx },
+		{ "syscalls:sys_enter_recvmsg",		process_enter_rx },
+		{ "syscalls:sys_enter_sendto",		process_enter_tx },
+		{ "syscalls:sys_enter_sendmsg",		process_enter_tx },
+		{ "syscalls:sys_enter_sendmmsg",	process_enter_tx },
+		{ "syscalls:sys_enter_epoll_pwait",	process_enter_poll },
+		{ "syscalls:sys_enter_epoll_wait",	process_enter_poll },
+		{ "syscalls:sys_enter_poll",		process_enter_poll },
+		{ "syscalls:sys_enter_ppoll",		process_enter_poll },
+		{ "syscalls:sys_enter_pselect6",	process_enter_poll },
+		{ "syscalls:sys_enter_select",		process_enter_poll },
+
+		{ "syscalls:sys_exit_read",		process_exit_read },
+		{ "syscalls:sys_exit_pread64",		process_exit_read },
+		{ "syscalls:sys_exit_readv",		process_exit_read },
+		{ "syscalls:sys_exit_preadv",		process_exit_read },
+		{ "syscalls:sys_exit_write",		process_exit_write },
+		{ "syscalls:sys_exit_pwrite64",		process_exit_write },
+		{ "syscalls:sys_exit_writev",		process_exit_write },
+		{ "syscalls:sys_exit_pwritev",		process_exit_write },
+		{ "syscalls:sys_exit_sync",		process_exit_sync },
+		{ "syscalls:sys_exit_sync_file_range",	process_exit_sync },
+		{ "syscalls:sys_exit_fsync",		process_exit_sync },
+		{ "syscalls:sys_exit_msync",		process_exit_sync },
+		{ "syscalls:sys_exit_recvfrom",		process_exit_rx },
+		{ "syscalls:sys_exit_recvmmsg",		process_exit_rx },
+		{ "syscalls:sys_exit_recvmsg",		process_exit_rx },
+		{ "syscalls:sys_exit_sendto",		process_exit_tx },
+		{ "syscalls:sys_exit_sendmsg",		process_exit_tx },
+		{ "syscalls:sys_exit_sendmmsg",		process_exit_tx },
+		{ "syscalls:sys_exit_epoll_pwait",	process_exit_poll },
+		{ "syscalls:sys_exit_epoll_wait",	process_exit_poll },
+		{ "syscalls:sys_exit_poll",		process_exit_poll },
+		{ "syscalls:sys_exit_ppoll",		process_exit_poll },
+		{ "syscalls:sys_exit_pselect6",		process_exit_poll },
+		{ "syscalls:sys_exit_select",		process_exit_poll },
+	};
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = tchart->force,
+	};
+
+	struct perf_session *session = perf_session__new(&data, false,
+							 &tchart->tool);
+	int ret = -EINVAL;
+
+	if (session == NULL)
+		return -1;
+
+	symbol__init(&session->header.env);
+
+	(void)perf_header__process_sections(&session->header,
+					    perf_data__fd(session->data),
+					    tchart,
+					    process_header);
+
+	if (!perf_session__has_traces(session, "timechart record"))
+		goto out_delete;
+
+	if (perf_session__set_tracepoints_handlers(session,
+						   power_tracepoints)) {
+		pr_err("Initializing session tracepoint handlers failed\n");
+		goto out_delete;
+	}
+
+	ret = perf_session__process_events(session);
+	if (ret)
+		goto out_delete;
+
+	end_sample_processing(tchart);
+
+	sort_pids(tchart);
+
+	write_svg_file(tchart, output_name);
+
+	pr_info("Written %2.1f seconds of trace to %s.\n",
+		(tchart->last_time - tchart->first_time) / (double)NSEC_PER_SEC, output_name);
+out_delete:
+	perf_session__delete(session);
+	return ret;
+}
+
+static int timechart__io_record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i;
+	const char **rec_argv;
+	const char **p;
+	char *filter = NULL;
+
+	const char * const common_args[] = {
+		"record", "-a", "-R", "-c", "1",
+	};
+	unsigned int common_args_nr = ARRAY_SIZE(common_args);
+
+	const char * const disk_events[] = {
+		"syscalls:sys_enter_read",
+		"syscalls:sys_enter_pread64",
+		"syscalls:sys_enter_readv",
+		"syscalls:sys_enter_preadv",
+		"syscalls:sys_enter_write",
+		"syscalls:sys_enter_pwrite64",
+		"syscalls:sys_enter_writev",
+		"syscalls:sys_enter_pwritev",
+		"syscalls:sys_enter_sync",
+		"syscalls:sys_enter_sync_file_range",
+		"syscalls:sys_enter_fsync",
+		"syscalls:sys_enter_msync",
+
+		"syscalls:sys_exit_read",
+		"syscalls:sys_exit_pread64",
+		"syscalls:sys_exit_readv",
+		"syscalls:sys_exit_preadv",
+		"syscalls:sys_exit_write",
+		"syscalls:sys_exit_pwrite64",
+		"syscalls:sys_exit_writev",
+		"syscalls:sys_exit_pwritev",
+		"syscalls:sys_exit_sync",
+		"syscalls:sys_exit_sync_file_range",
+		"syscalls:sys_exit_fsync",
+		"syscalls:sys_exit_msync",
+	};
+	unsigned int disk_events_nr = ARRAY_SIZE(disk_events);
+
+	const char * const net_events[] = {
+		"syscalls:sys_enter_recvfrom",
+		"syscalls:sys_enter_recvmmsg",
+		"syscalls:sys_enter_recvmsg",
+		"syscalls:sys_enter_sendto",
+		"syscalls:sys_enter_sendmsg",
+		"syscalls:sys_enter_sendmmsg",
+
+		"syscalls:sys_exit_recvfrom",
+		"syscalls:sys_exit_recvmmsg",
+		"syscalls:sys_exit_recvmsg",
+		"syscalls:sys_exit_sendto",
+		"syscalls:sys_exit_sendmsg",
+		"syscalls:sys_exit_sendmmsg",
+	};
+	unsigned int net_events_nr = ARRAY_SIZE(net_events);
+
+	const char * const poll_events[] = {
+		"syscalls:sys_enter_epoll_pwait",
+		"syscalls:sys_enter_epoll_wait",
+		"syscalls:sys_enter_poll",
+		"syscalls:sys_enter_ppoll",
+		"syscalls:sys_enter_pselect6",
+		"syscalls:sys_enter_select",
+
+		"syscalls:sys_exit_epoll_pwait",
+		"syscalls:sys_exit_epoll_wait",
+		"syscalls:sys_exit_poll",
+		"syscalls:sys_exit_ppoll",
+		"syscalls:sys_exit_pselect6",
+		"syscalls:sys_exit_select",
+	};
+	unsigned int poll_events_nr = ARRAY_SIZE(poll_events);
+
+	rec_argc = common_args_nr +
+		disk_events_nr * 4 +
+		net_events_nr * 4 +
+		poll_events_nr * 4 +
+		argc;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	if (asprintf(&filter, "common_pid != %d", getpid()) < 0) {
+		free(rec_argv);
+		return -ENOMEM;
+	}
+
+	p = rec_argv;
+	for (i = 0; i < common_args_nr; i++)
+		*p++ = strdup(common_args[i]);
+
+	for (i = 0; i < disk_events_nr; i++) {
+		if (!is_valid_tracepoint(disk_events[i])) {
+			rec_argc -= 4;
+			continue;
+		}
+
+		*p++ = "-e";
+		*p++ = strdup(disk_events[i]);
+		*p++ = "--filter";
+		*p++ = filter;
+	}
+	for (i = 0; i < net_events_nr; i++) {
+		if (!is_valid_tracepoint(net_events[i])) {
+			rec_argc -= 4;
+			continue;
+		}
+
+		*p++ = "-e";
+		*p++ = strdup(net_events[i]);
+		*p++ = "--filter";
+		*p++ = filter;
+	}
+	for (i = 0; i < poll_events_nr; i++) {
+		if (!is_valid_tracepoint(poll_events[i])) {
+			rec_argc -= 4;
+			continue;
+		}
+
+		*p++ = "-e";
+		*p++ = strdup(poll_events[i]);
+		*p++ = "--filter";
+		*p++ = filter;
+	}
+
+	for (i = 0; i < (unsigned int)argc; i++)
+		*p++ = argv[i];
+
+	return cmd_record(rec_argc, rec_argv);
+}
+
+
+static int timechart__record(struct timechart *tchart, int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+	const char **p;
+	unsigned int record_elems;
+
+	const char * const common_args[] = {
+		"record", "-a", "-R", "-c", "1",
+	};
+	unsigned int common_args_nr = ARRAY_SIZE(common_args);
+
+	const char * const backtrace_args[] = {
+		"-g",
+	};
+	unsigned int backtrace_args_no = ARRAY_SIZE(backtrace_args);
+
+	const char * const power_args[] = {
+		"-e", "power:cpu_frequency",
+		"-e", "power:cpu_idle",
+	};
+	unsigned int power_args_nr = ARRAY_SIZE(power_args);
+
+	const char * const old_power_args[] = {
+#ifdef SUPPORT_OLD_POWER_EVENTS
+		"-e", "power:power_start",
+		"-e", "power:power_end",
+		"-e", "power:power_frequency",
+#endif
+	};
+	unsigned int old_power_args_nr = ARRAY_SIZE(old_power_args);
+
+	const char * const tasks_args[] = {
+		"-e", "sched:sched_wakeup",
+		"-e", "sched:sched_switch",
+	};
+	unsigned int tasks_args_nr = ARRAY_SIZE(tasks_args);
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+	if (!is_valid_tracepoint("power:cpu_idle") &&
+	    is_valid_tracepoint("power:power_start")) {
+		use_old_power_events = 1;
+		power_args_nr = 0;
+	} else {
+		old_power_args_nr = 0;
+	}
+#endif
+
+	if (tchart->power_only)
+		tasks_args_nr = 0;
+
+	if (tchart->tasks_only) {
+		power_args_nr = 0;
+		old_power_args_nr = 0;
+	}
+
+	if (!tchart->with_backtrace)
+		backtrace_args_no = 0;
+
+	record_elems = common_args_nr + tasks_args_nr +
+		power_args_nr + old_power_args_nr + backtrace_args_no;
+
+	rec_argc = record_elems + argc;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	p = rec_argv;
+	for (i = 0; i < common_args_nr; i++)
+		*p++ = strdup(common_args[i]);
+
+	for (i = 0; i < backtrace_args_no; i++)
+		*p++ = strdup(backtrace_args[i]);
+
+	for (i = 0; i < tasks_args_nr; i++)
+		*p++ = strdup(tasks_args[i]);
+
+	for (i = 0; i < power_args_nr; i++)
+		*p++ = strdup(power_args[i]);
+
+	for (i = 0; i < old_power_args_nr; i++)
+		*p++ = strdup(old_power_args[i]);
+
+	for (j = 0; j < (unsigned int)argc; j++)
+		*p++ = argv[j];
+
+	return cmd_record(rec_argc, rec_argv);
+}
+
+static int
+parse_process(const struct option *opt __maybe_unused, const char *arg,
+	      int __maybe_unused unset)
+{
+	if (arg)
+		add_process_filter(arg);
+	return 0;
+}
+
+static int
+parse_highlight(const struct option *opt __maybe_unused, const char *arg,
+		int __maybe_unused unset)
+{
+	unsigned long duration = strtoul(arg, NULL, 0);
+
+	if (svg_highlight || svg_highlight_name)
+		return -1;
+
+	if (duration)
+		svg_highlight = duration;
+	else
+		svg_highlight_name = strdup(arg);
+
+	return 0;
+}
+
+static int
+parse_time(const struct option *opt, const char *arg, int __maybe_unused unset)
+{
+	char unit = 'n';
+	u64 *value = opt->value;
+
+	if (sscanf(arg, "%" PRIu64 "%cs", value, &unit) > 0) {
+		switch (unit) {
+		case 'm':
+			*value *= NSEC_PER_MSEC;
+			break;
+		case 'u':
+			*value *= NSEC_PER_USEC;
+			break;
+		case 'n':
+			break;
+		default:
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int cmd_timechart(int argc, const char **argv)
+{
+	struct timechart tchart = {
+		.tool = {
+			.comm		 = process_comm_event,
+			.fork		 = process_fork_event,
+			.exit		 = process_exit_event,
+			.sample		 = process_sample_event,
+			.ordered_events	 = true,
+		},
+		.proc_num = 15,
+		.min_time = NSEC_PER_MSEC,
+		.merge_dist = 1000,
+	};
+	const char *output_name = "output.svg";
+	const struct option timechart_common_options[] = {
+	OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"),
+	OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"),
+	OPT_END()
+	};
+	const struct option timechart_options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_STRING('o', "output", &output_name, "file", "output file name"),
+	OPT_INTEGER('w', "width", &svg_page_width, "page width"),
+	OPT_CALLBACK(0, "highlight", NULL, "duration or task name",
+		      "highlight tasks. Pass duration in ns or process name.",
+		       parse_highlight),
+	OPT_CALLBACK('p', "process", NULL, "process",
+		      "process selector. Pass a pid or process name.",
+		       parse_process),
+	OPT_CALLBACK(0, "symfs", NULL, "directory",
+		     "Look for files with symbols relative to this directory",
+		     symbol__config_symfs),
+	OPT_INTEGER('n', "proc-num", &tchart.proc_num,
+		    "min. number of tasks to print"),
+	OPT_BOOLEAN('t', "topology", &tchart.topology,
+		    "sort CPUs according to topology"),
+	OPT_BOOLEAN(0, "io-skip-eagain", &tchart.skip_eagain,
+		    "skip EAGAIN errors"),
+	OPT_CALLBACK(0, "io-min-time", &tchart.min_time, "time",
+		     "all IO faster than min-time will visually appear longer",
+		     parse_time),
+	OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time",
+		     "merge events that are merge-dist us apart",
+		     parse_time),
+	OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"),
+	OPT_PARENT(timechart_common_options),
+	};
+	const char * const timechart_subcommands[] = { "record", NULL };
+	const char *timechart_usage[] = {
+		"perf timechart [<options>] {record}",
+		NULL
+	};
+	const struct option timechart_record_options[] = {
+	OPT_BOOLEAN('I', "io-only", &tchart.io_only,
+		    "record only IO data"),
+	OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"),
+	OPT_PARENT(timechart_common_options),
+	};
+	const char * const timechart_record_usage[] = {
+		"perf timechart record [<options>]",
+		NULL
+	};
+	argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands,
+			timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (tchart.power_only && tchart.tasks_only) {
+		pr_err("-P and -T options cannot be used at the same time.\n");
+		return -1;
+	}
+
+	if (argc && !strncmp(argv[0], "rec", 3)) {
+		argc = parse_options(argc, argv, timechart_record_options,
+				     timechart_record_usage,
+				     PARSE_OPT_STOP_AT_NON_OPTION);
+
+		if (tchart.power_only && tchart.tasks_only) {
+			pr_err("-P and -T options cannot be used at the same time.\n");
+			return -1;
+		}
+
+		if (tchart.io_only)
+			return timechart__io_record(argc, argv);
+		else
+			return timechart__record(&tchart, argc, argv);
+	} else if (argc)
+		usage_with_options(timechart_usage, timechart_options);
+
+	setup_pager();
+
+	return __cmd_timechart(&tchart, output_name);
+}
diff --git a/builtin-top.c b/builtin-top.c
new file mode 100644
index 0000000..f39bd60
--- /dev/null
+++ b/builtin-top.c
@@ -0,0 +1,1516 @@
+/*
+ * builtin-top.c
+ *
+ * Builtin top command: Display a continuously updated profile of
+ * any workload, CPU or specific PID.
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *		 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ *   Arjan van de Ven <arjan@linux.intel.com>
+ *   Yanmin Zhang <yanmin.zhang@intel.com>
+ *   Wu Fengguang <fengguang.wu@intel.com>
+ *   Mike Galbraith <efault@gmx.de>
+ *   Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/annotate.h"
+#include "util/config.h"
+#include "util/color.h"
+#include "util/drv_configs.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/event.h"
+#include "util/machine.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/top.h"
+#include <linux/rbtree.h>
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/cpumap.h"
+#include "util/xyarray.h"
+#include "util/sort.h"
+#include "util/term.h"
+#include "util/intlist.h"
+#include "util/parse-branch-options.h"
+#include "arch/common.h"
+
+#include "util/debug.h"
+
+#include <assert.h>
+#include <elf.h>
+#include <fcntl.h>
+
+#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <errno.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/mman.h>
+
+#include <linux/stringify.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+#include "sane_ctype.h"
+
+static volatile int done;
+static volatile int resize;
+
+#define HEADER_LINE_NR  5
+
+static void perf_top__update_print_entries(struct perf_top *top)
+{
+	top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
+}
+
+static void winch_sig(int sig __maybe_unused)
+{
+	resize = 1;
+}
+
+static void perf_top__resize(struct perf_top *top)
+{
+	get_term_dimensions(&top->winsize);
+	perf_top__update_print_entries(top);
+}
+
+static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
+{
+	struct perf_evsel *evsel = hists_to_evsel(he->hists);
+	struct symbol *sym;
+	struct annotation *notes;
+	struct map *map;
+	int err = -1;
+
+	if (!he || !he->ms.sym)
+		return -1;
+
+	sym = he->ms.sym;
+	map = he->ms.map;
+
+	/*
+	 * We can't annotate with just /proc/kallsyms
+	 */
+	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(map->dso)) {
+		pr_err("Can't annotate %s: No vmlinux file was found in the "
+		       "path\n", sym->name);
+		sleep(1);
+		return -1;
+	}
+
+	notes = symbol__annotation(sym);
+	if (notes->src != NULL) {
+		pthread_mutex_lock(&notes->lock);
+		goto out_assign;
+	}
+
+	pthread_mutex_lock(&notes->lock);
+
+	if (symbol__alloc_hist(sym) < 0) {
+		pthread_mutex_unlock(&notes->lock);
+		pr_err("Not enough memory for annotating '%s' symbol!\n",
+		       sym->name);
+		sleep(1);
+		return err;
+	}
+
+	err = symbol__annotate(sym, map, evsel, 0, NULL);
+	if (err == 0) {
+out_assign:
+		top->sym_filter_entry = he;
+	} else {
+		char msg[BUFSIZ];
+		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+		pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
+	}
+
+	pthread_mutex_unlock(&notes->lock);
+	return err;
+}
+
+static void __zero_source_counters(struct hist_entry *he)
+{
+	struct symbol *sym = he->ms.sym;
+	symbol__annotate_zero_histograms(sym);
+}
+
+static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
+{
+	struct utsname uts;
+	int err = uname(&uts);
+
+	ui__warning("Out of bounds address found:\n\n"
+		    "Addr:   %" PRIx64 "\n"
+		    "DSO:    %s %c\n"
+		    "Map:    %" PRIx64 "-%" PRIx64 "\n"
+		    "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
+		    "Arch:   %s\n"
+		    "Kernel: %s\n"
+		    "Tools:  %s\n\n"
+		    "Not all samples will be on the annotation output.\n\n"
+		    "Please report to linux-kernel@vger.kernel.org\n",
+		    ip, map->dso->long_name, dso__symtab_origin(map->dso),
+		    map->start, map->end, sym->start, sym->end,
+		    sym->binding == STB_GLOBAL ? 'g' :
+		    sym->binding == STB_LOCAL  ? 'l' : 'w', sym->name,
+		    err ? "[unknown]" : uts.machine,
+		    err ? "[unknown]" : uts.release, perf_version_string);
+	if (use_browser <= 0)
+		sleep(5);
+
+	map->erange_warned = true;
+}
+
+static void perf_top__record_precise_ip(struct perf_top *top,
+					struct hist_entry *he,
+					struct perf_sample *sample,
+					int counter, u64 ip)
+{
+	struct annotation *notes;
+	struct symbol *sym = he->ms.sym;
+	int err = 0;
+
+	if (sym == NULL || (use_browser == 0 &&
+			    (top->sym_filter_entry == NULL ||
+			     top->sym_filter_entry->ms.sym != sym)))
+		return;
+
+	notes = symbol__annotation(sym);
+
+	if (pthread_mutex_trylock(&notes->lock))
+		return;
+
+	err = hist_entry__inc_addr_samples(he, sample, counter, ip);
+
+	pthread_mutex_unlock(&notes->lock);
+
+	if (unlikely(err)) {
+		/*
+		 * This function is now called with he->hists->lock held.
+		 * Release it before going to sleep.
+		 */
+		pthread_mutex_unlock(&he->hists->lock);
+
+		if (err == -ERANGE && !he->ms.map->erange_warned)
+			ui__warn_map_erange(he->ms.map, sym, ip);
+		else if (err == -ENOMEM) {
+			pr_err("Not enough memory for annotating '%s' symbol!\n",
+			       sym->name);
+			sleep(1);
+		}
+
+		pthread_mutex_lock(&he->hists->lock);
+	}
+}
+
+static void perf_top__show_details(struct perf_top *top)
+{
+	struct hist_entry *he = top->sym_filter_entry;
+	struct perf_evsel *evsel = hists_to_evsel(he->hists);
+	struct annotation *notes;
+	struct symbol *symbol;
+	int more;
+
+	if (!he)
+		return;
+
+	symbol = he->ms.sym;
+	notes = symbol__annotation(symbol);
+
+	pthread_mutex_lock(&notes->lock);
+
+	symbol__calc_percent(symbol, evsel);
+
+	if (notes->src == NULL)
+		goto out_unlock;
+
+	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
+	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
+
+	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
+				       0, top->sym_pcnt_filter, top->print_entries, 4);
+
+	if (top->evlist->enabled) {
+		if (top->zero)
+			symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
+		else
+			symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+	}
+	if (more != 0)
+		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+out_unlock:
+	pthread_mutex_unlock(&notes->lock);
+}
+
+static void perf_top__print_sym_table(struct perf_top *top)
+{
+	char bf[160];
+	int printed = 0;
+	const int win_width = top->winsize.ws_col - 1;
+	struct perf_evsel *evsel = top->sym_evsel;
+	struct hists *hists = evsel__hists(evsel);
+
+	puts(CONSOLE_CLEAR);
+
+	perf_top__header_snprintf(top, bf, sizeof(bf));
+	printf("%s\n", bf);
+
+	perf_top__reset_sample_counters(top);
+
+	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
+
+	if (!top->record_opts.overwrite &&
+	    (hists->stats.nr_lost_warned !=
+	    hists->stats.nr_events[PERF_RECORD_LOST])) {
+		hists->stats.nr_lost_warned =
+			      hists->stats.nr_events[PERF_RECORD_LOST];
+		color_fprintf(stdout, PERF_COLOR_RED,
+			      "WARNING: LOST %d chunks, Check IO/CPU overload",
+			      hists->stats.nr_lost_warned);
+		++printed;
+	}
+
+	if (top->sym_filter_entry) {
+		perf_top__show_details(top);
+		return;
+	}
+
+	if (top->evlist->enabled) {
+		if (top->zero) {
+			hists__delete_entries(hists);
+		} else {
+			hists__decay_entries(hists, top->hide_user_symbols,
+					     top->hide_kernel_symbols);
+		}
+	}
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+
+	hists__output_recalc_col_len(hists, top->print_entries - printed);
+	putchar('\n');
+	hists__fprintf(hists, false, top->print_entries - printed, win_width,
+		       top->min_percent, stdout, symbol_conf.use_callchain);
+}
+
+static void prompt_integer(int *target, const char *msg)
+{
+	char *buf = malloc(0), *p;
+	size_t dummy = 0;
+	int tmp;
+
+	fprintf(stdout, "\n%s: ", msg);
+	if (getline(&buf, &dummy, stdin) < 0)
+		return;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = 0;
+
+	p = buf;
+	while(*p) {
+		if (!isdigit(*p))
+			goto out_free;
+		p++;
+	}
+	tmp = strtoul(buf, NULL, 10);
+	*target = tmp;
+out_free:
+	free(buf);
+}
+
+static void prompt_percent(int *target, const char *msg)
+{
+	int tmp = 0;
+
+	prompt_integer(&tmp, msg);
+	if (tmp >= 0 && tmp <= 100)
+		*target = tmp;
+}
+
+static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
+{
+	char *buf = malloc(0), *p;
+	struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
+	struct hists *hists = evsel__hists(top->sym_evsel);
+	struct rb_node *next;
+	size_t dummy = 0;
+
+	/* zero counters of active symbol */
+	if (syme) {
+		__zero_source_counters(syme);
+		top->sym_filter_entry = NULL;
+	}
+
+	fprintf(stdout, "\n%s: ", msg);
+	if (getline(&buf, &dummy, stdin) < 0)
+		goto out_free;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = 0;
+
+	next = rb_first(&hists->entries);
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
+			found = n;
+			break;
+		}
+		next = rb_next(&n->rb_node);
+	}
+
+	if (!found) {
+		fprintf(stderr, "Sorry, %s is not active.\n", buf);
+		sleep(1);
+	} else
+		perf_top__parse_source(top, found);
+
+out_free:
+	free(buf);
+}
+
+static void perf_top__print_mapped_keys(struct perf_top *top)
+{
+	char *name = NULL;
+
+	if (top->sym_filter_entry) {
+		struct symbol *sym = top->sym_filter_entry->ms.sym;
+		name = sym->name;
+	}
+
+	fprintf(stdout, "\nMapped keys:\n");
+	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top->delay_secs);
+	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
+
+	if (top->evlist->nr_entries > 1)
+		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
+
+	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
+
+	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
+	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
+	fprintf(stdout, "\t[S]     stop annotation.\n");
+
+	fprintf(stdout,
+		"\t[K]     hide kernel symbols.             \t(%s)\n",
+		top->hide_kernel_symbols ? "yes" : "no");
+	fprintf(stdout,
+		"\t[U]     hide user symbols.               \t(%s)\n",
+		top->hide_user_symbols ? "yes" : "no");
+	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top->zero ? 1 : 0);
+	fprintf(stdout, "\t[qQ]    quit.\n");
+}
+
+static int perf_top__key_mapped(struct perf_top *top, int c)
+{
+	switch (c) {
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'z':
+		case 'q':
+		case 'Q':
+		case 'K':
+		case 'U':
+		case 'F':
+		case 's':
+		case 'S':
+			return 1;
+		case 'E':
+			return top->evlist->nr_entries > 1 ? 1 : 0;
+		default:
+			break;
+	}
+
+	return 0;
+}
+
+static bool perf_top__handle_keypress(struct perf_top *top, int c)
+{
+	bool ret = true;
+
+	if (!perf_top__key_mapped(top, c)) {
+		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+		struct termios save;
+
+		perf_top__print_mapped_keys(top);
+		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
+		fflush(stdout);
+
+		set_term_quiet_input(&save);
+
+		poll(&stdin_poll, 1, -1);
+		c = getc(stdin);
+
+		tcsetattr(0, TCSAFLUSH, &save);
+		if (!perf_top__key_mapped(top, c))
+			return ret;
+	}
+
+	switch (c) {
+		case 'd':
+			prompt_integer(&top->delay_secs, "Enter display delay");
+			if (top->delay_secs < 1)
+				top->delay_secs = 1;
+			break;
+		case 'e':
+			prompt_integer(&top->print_entries, "Enter display entries (lines)");
+			if (top->print_entries == 0) {
+				perf_top__resize(top);
+				signal(SIGWINCH, winch_sig);
+			} else {
+				signal(SIGWINCH, SIG_DFL);
+			}
+			break;
+		case 'E':
+			if (top->evlist->nr_entries > 1) {
+				/* Select 0 as the default event: */
+				int counter = 0;
+
+				fprintf(stderr, "\nAvailable events:");
+
+				evlist__for_each_entry(top->evlist, top->sym_evsel)
+					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
+
+				prompt_integer(&counter, "Enter details event counter");
+
+				if (counter >= top->evlist->nr_entries) {
+					top->sym_evsel = perf_evlist__first(top->evlist);
+					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
+					sleep(1);
+					break;
+				}
+				evlist__for_each_entry(top->evlist, top->sym_evsel)
+					if (top->sym_evsel->idx == counter)
+						break;
+			} else
+				top->sym_evsel = perf_evlist__first(top->evlist);
+			break;
+		case 'f':
+			prompt_integer(&top->count_filter, "Enter display event count filter");
+			break;
+		case 'F':
+			prompt_percent(&top->sym_pcnt_filter,
+				       "Enter details display event filter (percent)");
+			break;
+		case 'K':
+			top->hide_kernel_symbols = !top->hide_kernel_symbols;
+			break;
+		case 'q':
+		case 'Q':
+			printf("exiting.\n");
+			if (top->dump_symtab)
+				perf_session__fprintf_dsos(top->session, stderr);
+			ret = false;
+			break;
+		case 's':
+			perf_top__prompt_symbol(top, "Enter details symbol");
+			break;
+		case 'S':
+			if (!top->sym_filter_entry)
+				break;
+			else {
+				struct hist_entry *syme = top->sym_filter_entry;
+
+				top->sym_filter_entry = NULL;
+				__zero_source_counters(syme);
+			}
+			break;
+		case 'U':
+			top->hide_user_symbols = !top->hide_user_symbols;
+			break;
+		case 'z':
+			top->zero = !top->zero;
+			break;
+		default:
+			break;
+	}
+
+	return ret;
+}
+
+static void perf_top__sort_new_samples(void *arg)
+{
+	struct perf_top *t = arg;
+	struct perf_evsel *evsel = t->sym_evsel;
+	struct hists *hists;
+
+	perf_top__reset_sample_counters(t);
+
+	if (t->evlist->selected != NULL)
+		t->sym_evsel = t->evlist->selected;
+
+	hists = evsel__hists(evsel);
+
+	if (t->evlist->enabled) {
+		if (t->zero) {
+			hists__delete_entries(hists);
+		} else {
+			hists__decay_entries(hists, t->hide_user_symbols,
+					     t->hide_kernel_symbols);
+		}
+	}
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+}
+
+static void *display_thread_tui(void *arg)
+{
+	struct perf_evsel *pos;
+	struct perf_top *top = arg;
+	const char *help = "For a higher level overview, try: perf top --sort comm,dso";
+	struct hist_browser_timer hbt = {
+		.timer		= perf_top__sort_new_samples,
+		.arg		= top,
+		.refresh	= top->delay_secs,
+	};
+
+	/* In order to read symbols from other namespaces perf to  needs to call
+	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
+	 * unshare(2) the fs so that we may continue to setns into namespaces
+	 * that we're observing.
+	 */
+	unshare(CLONE_FS);
+
+	perf_top__sort_new_samples(top);
+
+	/*
+	 * Initialize the uid_filter_str, in the future the TUI will allow
+	 * Zooming in/out UIDs. For now juse use whatever the user passed
+	 * via --uid.
+	 */
+	evlist__for_each_entry(top->evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+		hists->uid_filter_str = top->record_opts.target.uid_str;
+	}
+
+	perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+				      top->min_percent,
+				      &top->session->header.env,
+				      !top->record_opts.overwrite);
+
+	done = 1;
+	return NULL;
+}
+
+static void display_sig(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+static void display_setup_sig(void)
+{
+	signal(SIGSEGV, sighandler_dump_stack);
+	signal(SIGFPE, sighandler_dump_stack);
+	signal(SIGINT,  display_sig);
+	signal(SIGQUIT, display_sig);
+	signal(SIGTERM, display_sig);
+}
+
+static void *display_thread(void *arg)
+{
+	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+	struct termios save;
+	struct perf_top *top = arg;
+	int delay_msecs, c;
+
+	/* In order to read symbols from other namespaces perf to  needs to call
+	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
+	 * unshare(2) the fs so that we may continue to setns into namespaces
+	 * that we're observing.
+	 */
+	unshare(CLONE_FS);
+
+	display_setup_sig();
+	pthread__unblock_sigwinch();
+repeat:
+	delay_msecs = top->delay_secs * MSEC_PER_SEC;
+	set_term_quiet_input(&save);
+	/* trash return*/
+	getc(stdin);
+
+	while (!done) {
+		perf_top__print_sym_table(top);
+		/*
+		 * Either timeout expired or we got an EINTR due to SIGWINCH,
+		 * refresh screen in both cases.
+		 */
+		switch (poll(&stdin_poll, 1, delay_msecs)) {
+		case 0:
+			continue;
+		case -1:
+			if (errno == EINTR)
+				continue;
+			__fallthrough;
+		default:
+			c = getc(stdin);
+			tcsetattr(0, TCSAFLUSH, &save);
+
+			if (perf_top__handle_keypress(top, c))
+				goto repeat;
+			done = 1;
+		}
+	}
+
+	tcsetattr(0, TCSAFLUSH, &save);
+	return NULL;
+}
+
+static int hist_iter__top_callback(struct hist_entry_iter *iter,
+				   struct addr_location *al, bool single,
+				   void *arg)
+{
+	struct perf_top *top = arg;
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+
+	if (perf_hpp_list.sym && single)
+		perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr);
+
+	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+		     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
+	return 0;
+}
+
+static void perf_event__process_sample(struct perf_tool *tool,
+				       const union perf_event *event,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_top *top = container_of(tool, struct perf_top, tool);
+	struct addr_location al;
+	int err;
+
+	if (!machine && perf_guest) {
+		static struct intlist *seen;
+
+		if (!seen)
+			seen = intlist__new(NULL);
+
+		if (!intlist__has_entry(seen, sample->pid)) {
+			pr_err("Can't find guest [%d]'s kernel information\n",
+				sample->pid);
+			intlist__add(seen, sample->pid);
+		}
+		return;
+	}
+
+	if (!machine) {
+		pr_err("%u unprocessable samples recorded.\r",
+		       top->session->evlist->stats.nr_unprocessable_samples++);
+		return;
+	}
+
+	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
+		top->exact_samples++;
+
+	if (machine__resolve(machine, &al, sample) < 0)
+		return;
+
+	if (!machine->kptr_restrict_warned &&
+	    symbol_conf.kptr_restrict &&
+	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
+		if (!perf_evlist__exclude_kernel(top->session->evlist)) {
+			ui__warning(
+"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
+"Check /proc/sys/kernel/kptr_restrict.\n\n"
+"Kernel%s samples will not be resolved.\n",
+			  al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+			  " modules" : "");
+			if (use_browser <= 0)
+				sleep(5);
+		}
+		machine->kptr_restrict_warned = true;
+	}
+
+	if (al.sym == NULL) {
+		const char *msg = "Kernel samples will not be resolved.\n";
+		/*
+		 * As we do lazy loading of symtabs we only will know if the
+		 * specified vmlinux file is invalid when we actually have a
+		 * hit in kernel space and then try to load it. So if we get
+		 * here and there are _no_ symbols in the DSO backing the
+		 * kernel map, bail out.
+		 *
+		 * We may never get here, for instance, if we use -K/
+		 * --hide-kernel-symbols, even if the user specifies an
+		 * invalid --vmlinux ;-)
+		 */
+		if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
+		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
+		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
+			if (symbol_conf.vmlinux_name) {
+				char serr[256];
+				dso__strerror_load(al.map->dso, serr, sizeof(serr));
+				ui__warning("The %s file can't be used: %s\n%s",
+					    symbol_conf.vmlinux_name, serr, msg);
+			} else {
+				ui__warning("A vmlinux file was not found.\n%s",
+					    msg);
+			}
+
+			if (use_browser <= 0)
+				sleep(5);
+			top->vmlinux_warned = true;
+		}
+	}
+
+	if (al.sym == NULL || !al.sym->idle) {
+		struct hists *hists = evsel__hists(evsel);
+		struct hist_entry_iter iter = {
+			.evsel		= evsel,
+			.sample 	= sample,
+			.add_entry_cb 	= hist_iter__top_callback,
+		};
+
+		if (symbol_conf.cumulate_callchain)
+			iter.ops = &hist_iter_cumulative;
+		else
+			iter.ops = &hist_iter_normal;
+
+		pthread_mutex_lock(&hists->lock);
+
+		err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
+		if (err < 0)
+			pr_err("Problem incrementing symbol period, skipping event\n");
+
+		pthread_mutex_unlock(&hists->lock);
+	}
+
+	addr_location__put(&al);
+}
+
+static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_sample sample;
+	struct perf_evsel *evsel;
+	struct perf_mmap *md;
+	struct perf_session *session = top->session;
+	union perf_event *event;
+	struct machine *machine;
+	int ret;
+
+	md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+	if (perf_mmap__read_init(md) < 0)
+		return;
+
+	while ((event = perf_mmap__read_event(md)) != NULL) {
+		ret = perf_evlist__parse_sample(evlist, event, &sample);
+		if (ret) {
+			pr_err("Can't parse sample, err = %d\n", ret);
+			goto next_event;
+		}
+
+		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
+		assert(evsel != NULL);
+
+		if (event->header.type == PERF_RECORD_SAMPLE)
+			++top->samples;
+
+		switch (sample.cpumode) {
+		case PERF_RECORD_MISC_USER:
+			++top->us_samples;
+			if (top->hide_user_symbols)
+				goto next_event;
+			machine = &session->machines.host;
+			break;
+		case PERF_RECORD_MISC_KERNEL:
+			++top->kernel_samples;
+			if (top->hide_kernel_symbols)
+				goto next_event;
+			machine = &session->machines.host;
+			break;
+		case PERF_RECORD_MISC_GUEST_KERNEL:
+			++top->guest_kernel_samples;
+			machine = perf_session__find_machine(session,
+							     sample.pid);
+			break;
+		case PERF_RECORD_MISC_GUEST_USER:
+			++top->guest_us_samples;
+			/*
+			 * TODO: we don't process guest user from host side
+			 * except simple counting.
+			 */
+			goto next_event;
+		default:
+			if (event->header.type == PERF_RECORD_SAMPLE)
+				goto next_event;
+			machine = &session->machines.host;
+			break;
+		}
+
+
+		if (event->header.type == PERF_RECORD_SAMPLE) {
+			perf_event__process_sample(&top->tool, event, evsel,
+						   &sample, machine);
+		} else if (event->header.type < PERF_RECORD_MAX) {
+			hists__inc_nr_events(evsel__hists(evsel), event->header.type);
+			machine__process_event(machine, event, &sample);
+		} else
+			++session->evlist->stats.nr_unknown_events;
+next_event:
+		perf_mmap__consume(md);
+	}
+
+	perf_mmap__read_done(md);
+}
+
+static void perf_top__mmap_read(struct perf_top *top)
+{
+	bool overwrite = top->record_opts.overwrite;
+	struct perf_evlist *evlist = top->evlist;
+	unsigned long long start, end;
+	int i;
+
+	start = rdclock();
+	if (overwrite)
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
+	for (i = 0; i < top->evlist->nr_mmaps; i++)
+		perf_top__mmap_read_idx(top, i);
+
+	if (overwrite) {
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+	}
+	end = rdclock();
+
+	if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+		ui__warning("Too slow to read ring buffer.\n"
+			    "Please try increasing the period (-c) or\n"
+			    "decreasing the freq (-F) or\n"
+			    "limiting the number of CPUs (-C)\n");
+}
+
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ *   Nothing change, return 0.
+ * - All events have same per-event term
+ *   E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ *   Using the per-event setting to replace the opts->overwrite if
+ *   they are different, then return 0.
+ * - Events have different per-event term
+ *   E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ * - Some of the event set per-event term, but some not.
+ *   E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ *   Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel_config_term *term;
+	struct list_head *config_terms;
+	struct perf_evsel *evsel;
+	int set, overwrite = -1;
+
+	evlist__for_each_entry(evlist, evsel) {
+		set = -1;
+		config_terms = &evsel->config_terms;
+		list_for_each_entry(term, config_terms, list) {
+			if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+				set = term->val.overwrite ? 1 : 0;
+		}
+
+		/* no term for current and previous event (likely) */
+		if ((overwrite < 0) && (set < 0))
+			continue;
+
+		/* has term for both current and previous event, compare */
+		if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+			return -1;
+
+		/* no term for current event but has term for previous one */
+		if ((overwrite >= 0) && (set < 0))
+			return -1;
+
+		/* has term for current event */
+		if ((overwrite < 0) && (set >= 0)) {
+			/* if it's first event, set overwrite */
+			if (evsel == perf_evlist__first(evlist))
+				overwrite = set;
+			else
+				return -1;
+		}
+	}
+
+	if ((overwrite >= 0) && (opts->overwrite != overwrite))
+		opts->overwrite = overwrite;
+
+	return 0;
+}
+
+static int perf_top_overwrite_fallback(struct perf_top *top,
+				       struct perf_evsel *evsel)
+{
+	struct record_opts *opts = &top->record_opts;
+	struct perf_evlist *evlist = top->evlist;
+	struct perf_evsel *counter;
+
+	if (!opts->overwrite)
+		return 0;
+
+	/* only fall back when first event fails */
+	if (evsel != perf_evlist__first(evlist))
+		return 0;
+
+	evlist__for_each_entry(evlist, counter)
+		counter->attr.write_backward = false;
+	opts->overwrite = false;
+	pr_debug2("fall back to non-overwrite mode\n");
+	return 1;
+}
+
+static int perf_top__start_counters(struct perf_top *top)
+{
+	char msg[BUFSIZ];
+	struct perf_evsel *counter;
+	struct perf_evlist *evlist = top->evlist;
+	struct record_opts *opts = &top->record_opts;
+
+	if (perf_top__overwrite_check(top)) {
+		ui__error("perf top only support consistent per-event "
+			  "overwrite setting for all events\n");
+		goto out_err;
+	}
+
+	perf_evlist__config(evlist, opts, &callchain_param);
+
+	evlist__for_each_entry(evlist, counter) {
+try_again:
+		if (perf_evsel__open(counter, top->evlist->cpus,
+				     top->evlist->threads) < 0) {
+
+			/*
+			 * Specially handle overwrite fall back.
+			 * Because perf top is the only tool which has
+			 * overwrite mode by default, support
+			 * both overwrite and non-overwrite mode, and
+			 * require consistent mode for all events.
+			 *
+			 * May move it to generic code with more tools
+			 * have similar attribute.
+			 */
+			if (perf_missing_features.write_backward &&
+			    perf_top_overwrite_fallback(top, counter))
+				goto try_again;
+
+			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+				if (verbose > 0)
+					ui__warning("%s\n", msg);
+				goto try_again;
+			}
+
+			perf_evsel__open_strerror(counter, &opts->target,
+						  errno, msg, sizeof(msg));
+			ui__error("%s\n", msg);
+			goto out_err;
+		}
+	}
+
+	if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) {
+		ui__error("Failed to mmap with %d (%s)\n",
+			    errno, str_error_r(errno, msg, sizeof(msg)));
+		goto out_err;
+	}
+
+	return 0;
+
+out_err:
+	return -1;
+}
+
+static int callchain_param__setup_sample_type(struct callchain_param *callchain)
+{
+	if (!perf_hpp_list.sym) {
+		if (callchain->enabled) {
+			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
+			return -EINVAL;
+		}
+	} else if (callchain->mode != CHAIN_NONE) {
+		if (callchain_register_param(callchain) < 0) {
+			ui__error("Can't register callchain params.\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int __cmd_top(struct perf_top *top)
+{
+	char msg[512];
+	struct perf_evsel *pos;
+	struct perf_evsel_config_term *err_term;
+	struct perf_evlist *evlist = top->evlist;
+	struct record_opts *opts = &top->record_opts;
+	pthread_t thread;
+	int ret;
+
+	top->session = perf_session__new(NULL, false, NULL);
+	if (top->session == NULL)
+		return -1;
+
+	if (!objdump_path) {
+		ret = perf_env__lookup_objdump(&top->session->header.env);
+		if (ret)
+			goto out_delete;
+	}
+
+	ret = callchain_param__setup_sample_type(&callchain_param);
+	if (ret)
+		goto out_delete;
+
+	if (perf_session__register_idle_thread(top->session) < 0)
+		goto out_delete;
+
+	if (top->nr_threads_synthesize > 1)
+		perf_set_multithreaded();
+
+	machine__synthesize_threads(&top->session->machines.host, &opts->target,
+				    top->evlist->threads, false,
+				    opts->proc_map_timeout,
+				    top->nr_threads_synthesize);
+
+	if (top->nr_threads_synthesize > 1)
+		perf_set_singlethreaded();
+
+	if (perf_hpp_list.socket) {
+		ret = perf_env__read_cpu_topology_map(&perf_env);
+		if (ret < 0)
+			goto out_err_cpu_topo;
+	}
+
+	ret = perf_top__start_counters(top);
+	if (ret)
+		goto out_delete;
+
+	ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
+	if (ret) {
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
+			err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+			str_error_r(errno, msg, sizeof(msg)));
+		goto out_delete;
+	}
+
+	top->session->evlist = top->evlist;
+	perf_session__set_id_hdr_size(top->session);
+
+	/*
+	 * When perf is starting the traced process, all the events (apart from
+	 * group members) have enable_on_exec=1 set, so don't spoil it by
+	 * prematurely enabling them.
+	 *
+	 * XXX 'top' still doesn't start workloads like record, trace, but should,
+	 * so leave the check here.
+	 */
+        if (!target__none(&opts->target))
+                perf_evlist__enable(top->evlist);
+
+	/* Wait for a minimal set of events before starting the snapshot */
+	perf_evlist__poll(top->evlist, 100);
+
+	perf_top__mmap_read(top);
+
+	ret = -1;
+	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
+							    display_thread), top)) {
+		ui__error("Could not create display thread.\n");
+		goto out_delete;
+	}
+
+	if (top->realtime_prio) {
+		struct sched_param param;
+
+		param.sched_priority = top->realtime_prio;
+		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+			ui__error("Could not set realtime priority.\n");
+			goto out_join;
+		}
+	}
+
+	while (!done) {
+		u64 hits = top->samples;
+
+		perf_top__mmap_read(top);
+
+		if (opts->overwrite || (hits == top->samples))
+			ret = perf_evlist__poll(top->evlist, 100);
+
+		if (resize) {
+			perf_top__resize(top);
+			resize = 0;
+		}
+	}
+
+	ret = 0;
+out_join:
+	pthread_join(thread, NULL);
+out_delete:
+	perf_session__delete(top->session);
+	top->session = NULL;
+
+	return ret;
+
+out_err_cpu_topo: {
+	char errbuf[BUFSIZ];
+	const char *err = str_error_r(-ret, errbuf, sizeof(errbuf));
+
+	ui__error("Could not read the CPU topology map: %s\n", err);
+	goto out_delete;
+}
+}
+
+static int
+callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+	symbol_conf.use_callchain = true;
+	return record_callchain_opt(opt, arg, unset);
+}
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+	struct callchain_param *callchain = opt->value;
+
+	callchain->enabled = !unset;
+	callchain->record_mode = CALLCHAIN_FP;
+
+	/*
+	 * --no-call-graph
+	 */
+	if (unset) {
+		symbol_conf.use_callchain = false;
+		callchain->record_mode = CALLCHAIN_NONE;
+		return 0;
+	}
+
+	return parse_callchain_top_opt(arg);
+}
+
+static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
+{
+	if (!strcmp(var, "top.call-graph")) {
+		var = "call-graph.record-mode";
+		return perf_default_config(var, value, cb);
+	}
+	if (!strcmp(var, "top.children")) {
+		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int
+parse_percent_limit(const struct option *opt, const char *arg,
+		    int unset __maybe_unused)
+{
+	struct perf_top *top = opt->value;
+
+	top->min_percent = strtof(arg, NULL);
+	return 0;
+}
+
+const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP
+	"\n\t\t\t\tDefault: fp,graph,0.5,caller,function";
+
+int cmd_top(int argc, const char **argv)
+{
+	char errbuf[BUFSIZ];
+	struct perf_top top = {
+		.count_filter	     = 5,
+		.delay_secs	     = 2,
+		.record_opts = {
+			.mmap_pages	= UINT_MAX,
+			.user_freq	= UINT_MAX,
+			.user_interval	= ULLONG_MAX,
+			.freq		= 4000, /* 4 KHz */
+			.target		= {
+				.uses_mmap   = true,
+			},
+			.proc_map_timeout    = 500,
+			.overwrite	= 1,
+		},
+		.max_stack	     = sysctl_perf_event_max_stack,
+		.sym_pcnt_filter     = 5,
+		.nr_threads_synthesize = UINT_MAX,
+	};
+	struct record_opts *opts = &top.record_opts;
+	struct target *target = &opts->target;
+	const struct option options[] = {
+	OPT_CALLBACK('e', "event", &top.evlist, "event",
+		     "event selector. use 'perf list' to list available events",
+		     parse_events_option),
+	OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
+	OPT_STRING('p', "pid", &target->pid, "pid",
+		    "profile events on existing process id"),
+	OPT_STRING('t', "tid", &target->tid, "tid",
+		    "profile events on existing thread id"),
+	OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
+			    "system-wide collection from all CPUs"),
+	OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
+		    "list of cpus to monitor"),
+	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+		    "don't load vmlinux even if found"),
+	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
+		    "hide kernel symbols"),
+	OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
+		     "number of mmap data pages",
+		     perf_evlist__parse_mmap_pages),
+	OPT_INTEGER('r', "realtime", &top.realtime_prio,
+		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_INTEGER('d', "delay", &top.delay_secs,
+		    "number of seconds to delay between refreshes"),
+	OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
+			    "dump the symbol table used for profiling"),
+	OPT_INTEGER('f', "count-filter", &top.count_filter,
+		    "only display functions with more events than this"),
+	OPT_BOOLEAN(0, "group", &opts->group,
+			    "put the counters into a counter group"),
+	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
+		    "child tasks do not inherit counters"),
+	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
+		    "symbol to annotate"),
+	OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
+	OPT_CALLBACK('F', "freq", &top.record_opts, "freq or 'max'",
+		     "profile at this frequency",
+		      record__parse_freq),
+	OPT_INTEGER('E', "entries", &top.print_entries,
+		    "display this many functions"),
+	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
+		    "hide user symbols"),
+	OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
+	OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show counter open errors, etc)"),
+	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+		   " Please refer the man page for the complete list."),
+	OPT_STRING(0, "fields", &field_order, "key[,keys...]",
+		   "output field(s): overhead, period, sample plus all of sort keys"),
+	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
+		    "Show a column with the number of samples"),
+	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+			   NULL, "enables call-graph recording and display",
+			   &callchain_opt),
+	OPT_CALLBACK(0, "call-graph", &callchain_param,
+		     "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
+		     top_callchain_help, &parse_callchain_opt),
+	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+		    "Accumulate callchains of children and show total overhead as well"),
+	OPT_INTEGER(0, "max-stack", &top.max_stack,
+		    "Set the maximum stack depth when parsing the callchain. "
+		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
+	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+		    "Show a column with the sum of periods"),
+	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+		   "only consider symbols in these dsos"),
+	OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+		   "only consider symbols in these comms"),
+	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+		   "only consider these symbols"),
+	OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
+		    "Interleave source code with assembly code (default)"),
+	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
+		    "Display raw encoding of assembly instructions (default)"),
+	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+		    "Enable kernel symbol demangling"),
+	OPT_STRING(0, "objdump", &objdump_path, "path",
+		    "objdump binary to use for disassembly and annotations"),
+	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
+	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
+	OPT_CALLBACK(0, "percent-limit", &top, "percent",
+		     "Don't show entries under that percent", parse_percent_limit),
+	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+		     "How to display percentage of filtered entries", parse_filter_percentage),
+	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
+		   "width[,width...]",
+		   "don't try to adjust column width, use these fixed values"),
+	OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
+	OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
+		     "branch any", "sample any taken branches",
+		     parse_branch_stack),
+	OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
+		     "branch filter mask", "branch stack filter modes",
+		     parse_branch_stack),
+	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
+	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
+	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
+			"number of thread to run event synthesize"),
+	OPT_END()
+	};
+	const char * const top_usage[] = {
+		"perf top [<options>]",
+		NULL
+	};
+	int status = hists__init();
+
+	if (status < 0)
+		return status;
+
+	top.evlist = perf_evlist__new();
+	if (top.evlist == NULL)
+		return -ENOMEM;
+
+	status = perf_config(perf_top_config, &top);
+	if (status)
+		return status;
+
+	argc = parse_options(argc, argv, options, top_usage, 0);
+	if (argc)
+		usage_with_options(top_usage, options);
+
+	if (!top.evlist->nr_entries &&
+	    perf_evlist__add_default(top.evlist) < 0) {
+		pr_err("Not enough memory for event selector list\n");
+		goto out_delete_evlist;
+	}
+
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(top_usage, options, "fields", 0);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto out_delete_evlist;
+		}
+	}
+
+	sort__mode = SORT_MODE__TOP;
+	/* display thread wants entries to be collapsed in a different tree */
+	perf_hpp_list.need_collapse = 1;
+
+	if (top.use_stdio)
+		use_browser = 0;
+	else if (top.use_tui)
+		use_browser = 1;
+
+	setup_browser(false);
+
+	if (setup_sorting(top.evlist) < 0) {
+		if (sort_order)
+			parse_options_usage(top_usage, options, "s", 1);
+		if (field_order)
+			parse_options_usage(sort_order ? NULL : top_usage,
+					    options, "fields", 0);
+		goto out_delete_evlist;
+	}
+
+	status = target__validate(target);
+	if (status) {
+		target__strerror(target, status, errbuf, BUFSIZ);
+		ui__warning("%s\n", errbuf);
+	}
+
+	status = target__parse_uid(target);
+	if (status) {
+		int saved_errno = errno;
+
+		target__strerror(target, status, errbuf, BUFSIZ);
+		ui__error("%s\n", errbuf);
+
+		status = -saved_errno;
+		goto out_delete_evlist;
+	}
+
+	if (target__none(target))
+		target->system_wide = true;
+
+	if (perf_evlist__create_maps(top.evlist, target) < 0) {
+		ui__error("Couldn't create thread/CPU maps: %s\n",
+			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
+		goto out_delete_evlist;
+	}
+
+	symbol_conf.nr_events = top.evlist->nr_entries;
+
+	if (top.delay_secs < 1)
+		top.delay_secs = 1;
+
+	if (record_opts__config(opts)) {
+		status = -EINVAL;
+		goto out_delete_evlist;
+	}
+
+	top.sym_evsel = perf_evlist__first(top.evlist);
+
+	if (!callchain_param.enabled) {
+		symbol_conf.cumulate_callchain = false;
+		perf_hpp__cancel_cumulate();
+	}
+
+	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
+		callchain_param.order = ORDER_CALLER;
+
+	status = symbol__annotation_init();
+	if (status < 0)
+		goto out_delete_evlist;
+
+	annotation_config__init();
+
+	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+	if (symbol__init(NULL) < 0)
+		return -1;
+
+	sort__setup_elide(stdout);
+
+	get_term_dimensions(&top.winsize);
+	if (top.print_entries == 0) {
+		perf_top__update_print_entries(&top);
+		signal(SIGWINCH, winch_sig);
+	}
+
+	status = __cmd_top(&top);
+
+out_delete_evlist:
+	perf_evlist__delete(top.evlist);
+
+	return status;
+}
diff --git a/builtin-trace.c b/builtin-trace.c
new file mode 100644
index 0000000..3ad17ee
--- /dev/null
+++ b/builtin-trace.c
@@ -0,0 +1,3243 @@
+/*
+ * builtin-trace.c
+ *
+ * Builtin 'trace' command:
+ *
+ * Display a continuously updated trace of any workload, CPU, specific PID,
+ * system wide, etc.  Default format is loosely strace like, but any other
+ * event may be specified using --event.
+ *
+ * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Initially based on the 'trace' prototype by Thomas Gleixner:
+ *
+ * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <traceevent/event-parse.h>
+#include <api/fs/tracing_path.h>
+#include "builtin.h"
+#include "util/cgroup.h"
+#include "util/color.h"
+#include "util/debug.h"
+#include "util/env.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include <subcmd/exec-cmd.h>
+#include "util/machine.h"
+#include "util/path.h"
+#include "util/session.h"
+#include "util/thread.h"
+#include <subcmd/parse-options.h>
+#include "util/strlist.h"
+#include "util/intlist.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
+#include "trace/beauty/beauty.h"
+#include "trace-event.h"
+#include "util/parse-events.h"
+#include "util/bpf-loader.h"
+#include "callchain.h"
+#include "print_binary.h"
+#include "string2.h"
+#include "syscalltbl.h"
+#include "rb_resort.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/err.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/stringify.h>
+#include <linux/time64.h>
+#include <fcntl.h>
+
+#include "sane_ctype.h"
+
+#ifndef O_CLOEXEC
+# define O_CLOEXEC		02000000
+#endif
+
+#ifndef F_LINUX_SPECIFIC_BASE
+# define F_LINUX_SPECIFIC_BASE	1024
+#endif
+
+struct trace {
+	struct perf_tool	tool;
+	struct syscalltbl	*sctbl;
+	struct {
+		int		max;
+		struct syscall  *table;
+		struct {
+			struct perf_evsel *sys_enter,
+					  *sys_exit;
+		}		events;
+	} syscalls;
+	struct record_opts	opts;
+	struct perf_evlist	*evlist;
+	struct machine		*host;
+	struct thread		*current;
+	struct cgroup		*cgroup;
+	u64			base_time;
+	FILE			*output;
+	unsigned long		nr_events;
+	struct strlist		*ev_qualifier;
+	struct {
+		size_t		nr;
+		int		*entries;
+	}			ev_qualifier_ids;
+	struct {
+		size_t		nr;
+		pid_t		*entries;
+	}			filter_pids;
+	double			duration_filter;
+	double			runtime_ms;
+	struct {
+		u64		vfs_getname,
+				proc_getname;
+	} stats;
+	unsigned int		max_stack;
+	unsigned int		min_stack;
+	bool			not_ev_qualifier;
+	bool			live;
+	bool			full_time;
+	bool			sched;
+	bool			multiple_threads;
+	bool			summary;
+	bool			summary_only;
+	bool			failure_only;
+	bool			show_comm;
+	bool			print_sample;
+	bool			show_tool_stats;
+	bool			trace_syscalls;
+	bool			kernel_syscallchains;
+	bool			force;
+	bool			vfs_getname;
+	int			trace_pgfaults;
+	int			open_id;
+};
+
+struct tp_field {
+	int offset;
+	union {
+		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
+		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
+	};
+};
+
+#define TP_UINT_FIELD(bits) \
+static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
+{ \
+	u##bits value; \
+	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
+	return value;  \
+}
+
+TP_UINT_FIELD(8);
+TP_UINT_FIELD(16);
+TP_UINT_FIELD(32);
+TP_UINT_FIELD(64);
+
+#define TP_UINT_FIELD__SWAPPED(bits) \
+static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
+{ \
+	u##bits value; \
+	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
+	return bswap_##bits(value);\
+}
+
+TP_UINT_FIELD__SWAPPED(16);
+TP_UINT_FIELD__SWAPPED(32);
+TP_UINT_FIELD__SWAPPED(64);
+
+static int tp_field__init_uint(struct tp_field *field,
+			       struct format_field *format_field,
+			       bool needs_swap)
+{
+	field->offset = format_field->offset;
+
+	switch (format_field->size) {
+	case 1:
+		field->integer = tp_field__u8;
+		break;
+	case 2:
+		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
+		break;
+	case 4:
+		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
+		break;
+	case 8:
+		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
+{
+	return sample->raw_data + field->offset;
+}
+
+static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+{
+	field->offset = format_field->offset;
+	field->pointer = tp_field__ptr;
+	return 0;
+}
+
+struct syscall_tp {
+	struct tp_field id;
+	union {
+		struct tp_field args, ret;
+	};
+};
+
+static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
+					  struct tp_field *field,
+					  const char *name)
+{
+	struct format_field *format_field = perf_evsel__field(evsel, name);
+
+	if (format_field == NULL)
+		return -1;
+
+	return tp_field__init_uint(field, format_field, evsel->needs_swap);
+}
+
+#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
+	({ struct syscall_tp *sc = evsel->priv;\
+	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
+
+static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
+					 struct tp_field *field,
+					 const char *name)
+{
+	struct format_field *format_field = perf_evsel__field(evsel, name);
+
+	if (format_field == NULL)
+		return -1;
+
+	return tp_field__init_ptr(field, format_field);
+}
+
+#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
+	({ struct syscall_tp *sc = evsel->priv;\
+	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
+
+static void perf_evsel__delete_priv(struct perf_evsel *evsel)
+{
+	zfree(&evsel->priv);
+	perf_evsel__delete(evsel);
+}
+
+static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
+{
+	evsel->priv = malloc(sizeof(struct syscall_tp));
+	if (evsel->priv != NULL) {
+		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
+			goto out_delete;
+
+		evsel->handler = handler;
+		return 0;
+	}
+
+	return -ENOMEM;
+
+out_delete:
+	zfree(&evsel->priv);
+	return -ENOENT;
+}
+
+static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
+
+	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
+	if (IS_ERR(evsel))
+		evsel = perf_evsel__newtp("syscalls", direction);
+
+	if (IS_ERR(evsel))
+		return NULL;
+
+	if (perf_evsel__init_syscall_tp(evsel, handler))
+		goto out_delete;
+
+	return evsel;
+
+out_delete:
+	perf_evsel__delete_priv(evsel);
+	return NULL;
+}
+
+#define perf_evsel__sc_tp_uint(evsel, name, sample) \
+	({ struct syscall_tp *fields = evsel->priv; \
+	   fields->name.integer(&fields->name, sample); })
+
+#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
+	({ struct syscall_tp *fields = evsel->priv; \
+	   fields->name.pointer(&fields->name, sample); })
+
+size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
+{
+	int idx = val - sa->offset;
+
+	if (idx < 0 || idx >= sa->nr_entries)
+		return scnprintf(bf, size, intfmt, val);
+
+	return scnprintf(bf, size, "%s", sa->entries[idx]);
+}
+
+static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
+						const char *intfmt,
+					        struct syscall_arg *arg)
+{
+	return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
+}
+
+static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
+					      struct syscall_arg *arg)
+{
+	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
+}
+
+#define SCA_STRARRAY syscall_arg__scnprintf_strarray
+
+struct strarrays {
+	int		nr_entries;
+	struct strarray **entries;
+};
+
+#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
+	.nr_entries = ARRAY_SIZE(array), \
+	.entries = array, \
+}
+
+size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
+					struct syscall_arg *arg)
+{
+	struct strarrays *sas = arg->parm;
+	int i;
+
+	for (i = 0; i < sas->nr_entries; ++i) {
+		struct strarray *sa = sas->entries[i];
+		int idx = arg->val - sa->offset;
+
+		if (idx >= 0 && idx < sa->nr_entries) {
+			if (sa->entries[idx] == NULL)
+				break;
+			return scnprintf(bf, size, "%s", sa->entries[idx]);
+		}
+	}
+
+	return scnprintf(bf, size, "%d", arg->val);
+}
+
+#ifndef AT_FDCWD
+#define AT_FDCWD	-100
+#endif
+
+static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
+					   struct syscall_arg *arg)
+{
+	int fd = arg->val;
+
+	if (fd == AT_FDCWD)
+		return scnprintf(bf, size, "CWD");
+
+	return syscall_arg__scnprintf_fd(bf, size, arg);
+}
+
+#define SCA_FDAT syscall_arg__scnprintf_fd_at
+
+static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
+					      struct syscall_arg *arg);
+
+#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
+
+size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
+{
+	return scnprintf(bf, size, "%#lx", arg->val);
+}
+
+size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
+{
+	return scnprintf(bf, size, "%d", arg->val);
+}
+
+size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
+{
+	return scnprintf(bf, size, "%ld", arg->val);
+}
+
+static const char *bpf_cmd[] = {
+	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
+	"MAP_GET_NEXT_KEY", "PROG_LOAD",
+};
+static DEFINE_STRARRAY(bpf_cmd);
+
+static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
+static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
+
+static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
+static DEFINE_STRARRAY(itimers);
+
+static const char *keyctl_options[] = {
+	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
+	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
+	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
+	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
+	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
+};
+static DEFINE_STRARRAY(keyctl_options);
+
+static const char *whences[] = { "SET", "CUR", "END",
+#ifdef SEEK_DATA
+"DATA",
+#endif
+#ifdef SEEK_HOLE
+"HOLE",
+#endif
+};
+static DEFINE_STRARRAY(whences);
+
+static const char *fcntl_cmds[] = {
+	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
+	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
+	"SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
+	"GETOWNER_UIDS",
+};
+static DEFINE_STRARRAY(fcntl_cmds);
+
+static const char *fcntl_linux_specific_cmds[] = {
+	"SETLEASE", "GETLEASE", "NOTIFY", [5] =	"CANCELLK", "DUPFD_CLOEXEC",
+	"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
+	"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
+};
+
+static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
+
+static struct strarray *fcntl_cmds_arrays[] = {
+	&strarray__fcntl_cmds,
+	&strarray__fcntl_linux_specific_cmds,
+};
+
+static DEFINE_STRARRAYS(fcntl_cmds_arrays);
+
+static const char *rlimit_resources[] = {
+	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
+	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
+	"RTTIME",
+};
+static DEFINE_STRARRAY(rlimit_resources);
+
+static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
+static DEFINE_STRARRAY(sighow);
+
+static const char *clockid[] = {
+	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
+	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
+	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
+};
+static DEFINE_STRARRAY(clockid);
+
+static const char *socket_families[] = {
+	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
+	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
+	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
+	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
+	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
+	"ALG", "NFC", "VSOCK",
+};
+static DEFINE_STRARRAY(socket_families);
+
+static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
+						 struct syscall_arg *arg)
+{
+	size_t printed = 0;
+	int mode = arg->val;
+
+	if (mode == F_OK) /* 0 */
+		return scnprintf(bf, size, "F");
+#define	P_MODE(n) \
+	if (mode & n##_OK) { \
+		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
+		mode &= ~n##_OK; \
+	}
+
+	P_MODE(R);
+	P_MODE(W);
+	P_MODE(X);
+#undef P_MODE
+
+	if (mode)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
+
+	return printed;
+}
+
+#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+					      struct syscall_arg *arg);
+
+#define SCA_FILENAME syscall_arg__scnprintf_filename
+
+static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & O_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~O_##n; \
+	}
+
+	P_FLAG(CLOEXEC);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
+
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK	0x0001
+#endif
+#ifndef GRND_RANDOM
+#define GRND_RANDOM	0x0002
+#endif
+
+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
+						   struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & GRND_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~GRND_##n; \
+	}
+
+	P_FLAG(RANDOM);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+
+#define STRARRAY(name, array) \
+	  { .scnprintf	= SCA_STRARRAY, \
+	    .parm	= &strarray__##array, }
+
+#include "trace/beauty/arch_errno_names.c"
+#include "trace/beauty/eventfd.c"
+#include "trace/beauty/futex_op.c"
+#include "trace/beauty/futex_val3.c"
+#include "trace/beauty/mmap.c"
+#include "trace/beauty/mode_t.c"
+#include "trace/beauty/msg_flags.c"
+#include "trace/beauty/open_flags.c"
+#include "trace/beauty/perf_event_open.c"
+#include "trace/beauty/pid.c"
+#include "trace/beauty/sched_policy.c"
+#include "trace/beauty/seccomp.c"
+#include "trace/beauty/signum.c"
+#include "trace/beauty/socket_type.c"
+#include "trace/beauty/waitid_options.c"
+
+struct syscall_arg_fmt {
+	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+	void	   *parm;
+	const char *name;
+	bool	   show_zero;
+};
+
+static struct syscall_fmt {
+	const char *name;
+	const char *alias;
+	struct syscall_arg_fmt arg[6];
+	u8	   nr_args;
+	bool	   errpid;
+	bool	   timeout;
+	bool	   hexret;
+} syscall_fmts[] = {
+	{ .name	    = "access",
+	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
+	{ .name	    = "bpf",
+	  .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
+	{ .name	    = "brk",	    .hexret = true,
+	  .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
+	{ .name     = "clock_gettime",
+	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
+	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
+	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
+		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
+		   [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
+		   [3] = { .name = "child_tidptr",  .scnprintf = SCA_HEX, },
+		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
+	{ .name	    = "close",
+	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
+	{ .name	    = "epoll_ctl",
+	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
+	{ .name	    = "eventfd2",
+	  .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
+	{ .name	    = "fchmodat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+	{ .name	    = "fchownat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+	{ .name	    = "fcntl",
+	  .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
+			   .parm      = &strarrays__fcntl_cmds_arrays,
+			   .show_zero = true, },
+		   [2] = { .scnprintf =  SCA_FCNTL_ARG, /* arg */ }, }, },
+	{ .name	    = "flock",
+	  .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
+	{ .name	    = "fstat", .alias = "newfstat", },
+	{ .name	    = "fstatat", .alias = "newfstatat", },
+	{ .name	    = "futex",
+	  .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
+		   [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
+	{ .name	    = "futimesat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+	{ .name	    = "getitimer",
+	  .arg = { [0] = STRARRAY(which, itimers), }, },
+	{ .name	    = "getpid",	    .errpid = true, },
+	{ .name	    = "getpgid",    .errpid = true, },
+	{ .name	    = "getppid",    .errpid = true, },
+	{ .name	    = "getrandom",
+	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
+	{ .name	    = "getrlimit",
+	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+	{ .name	    = "gettid",	    .errpid = true, },
+	{ .name	    = "ioctl",
+	  .arg = {
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * FIXME: Make this available to all arches.
+ */
+		   [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
+		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
+#else
+		   [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
+#endif
+	{ .name	    = "kcmp",	    .nr_args = 5,
+	  .arg = { [0] = { .name = "pid1",	.scnprintf = SCA_PID, },
+		   [1] = { .name = "pid2",	.scnprintf = SCA_PID, },
+		   [2] = { .name = "type",	.scnprintf = SCA_KCMP_TYPE, },
+		   [3] = { .name = "idx1",	.scnprintf = SCA_KCMP_IDX, },
+		   [4] = { .name = "idx2",	.scnprintf = SCA_KCMP_IDX, }, }, },
+	{ .name	    = "keyctl",
+	  .arg = { [0] = STRARRAY(option, keyctl_options), }, },
+	{ .name	    = "kill",
+	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name	    = "linkat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+	{ .name	    = "lseek",
+	  .arg = { [2] = STRARRAY(whence, whences), }, },
+	{ .name	    = "lstat", .alias = "newlstat", },
+	{ .name     = "madvise",
+	  .arg = { [0] = { .scnprintf = SCA_HEX,      /* start */ },
+		   [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
+	{ .name	    = "mkdirat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+	{ .name	    = "mknodat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+	{ .name	    = "mlock",
+	  .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+	{ .name	    = "mlockall",
+	  .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+	{ .name	    = "mmap",	    .hexret = true,
+/* The standard mmap maps to old_mmap on s390x */
+#if defined(__s390x__)
+	.alias = "old_mmap",
+#endif
+	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* addr */ },
+		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
+		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */ }, }, },
+	{ .name	    = "mprotect",
+	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
+		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
+	{ .name	    = "mq_unlink",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
+	{ .name	    = "mremap",	    .hexret = true,
+	  .arg = { [0] = { .scnprintf = SCA_HEX,	  /* addr */ },
+		   [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
+		   [4] = { .scnprintf = SCA_HEX,	  /* new_addr */ }, }, },
+	{ .name	    = "munlock",
+	  .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+	{ .name	    = "munmap",
+	  .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+	{ .name	    = "name_to_handle_at",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "newfstatat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "open",
+	  .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
+	{ .name	    = "open_by_handle_at",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
+		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
+	{ .name	    = "openat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
+		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
+	{ .name	    = "perf_event_open",
+	  .arg = { [2] = { .scnprintf = SCA_INT,	/* cpu */ },
+		   [3] = { .scnprintf = SCA_FD,		/* group_fd */ },
+		   [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
+	{ .name	    = "pipe2",
+	  .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
+	{ .name	    = "pkey_alloc",
+	  .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS,	/* access_rights */ }, }, },
+	{ .name	    = "pkey_free",
+	  .arg = { [0] = { .scnprintf = SCA_INT,	/* key */ }, }, },
+	{ .name	    = "pkey_mprotect",
+	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
+		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
+		   [3] = { .scnprintf = SCA_INT,	/* pkey */ }, }, },
+	{ .name	    = "poll", .timeout = true, },
+	{ .name	    = "ppoll", .timeout = true, },
+	{ .name	    = "prctl", .alias = "arch_prctl",
+	  .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
+		   [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
+		   [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
+	{ .name	    = "pread", .alias = "pread64", },
+	{ .name	    = "preadv", .alias = "pread", },
+	{ .name	    = "prlimit64",
+	  .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
+	{ .name	    = "pwrite", .alias = "pwrite64", },
+	{ .name	    = "readlinkat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "recvfrom",
+	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	{ .name	    = "recvmmsg",
+	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	{ .name	    = "recvmsg",
+	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	{ .name	    = "renameat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "rt_sigaction",
+	  .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name	    = "rt_sigprocmask",
+	  .arg = { [0] = STRARRAY(how, sighow), }, },
+	{ .name	    = "rt_sigqueueinfo",
+	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name	    = "rt_tgsigqueueinfo",
+	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name	    = "sched_setscheduler",
+	  .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
+	{ .name	    = "seccomp",
+	  .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP,	   /* op */ },
+		   [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
+	{ .name	    = "select", .timeout = true, },
+	{ .name	    = "sendmmsg",
+	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	{ .name	    = "sendmsg",
+	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	{ .name	    = "sendto",
+	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	{ .name	    = "set_tid_address", .errpid = true, },
+	{ .name	    = "setitimer",
+	  .arg = { [0] = STRARRAY(which, itimers), }, },
+	{ .name	    = "setrlimit",
+	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+	{ .name	    = "socket",
+	  .arg = { [0] = STRARRAY(family, socket_families),
+		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
+	{ .name	    = "socketpair",
+	  .arg = { [0] = STRARRAY(family, socket_families),
+		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
+	{ .name	    = "stat", .alias = "newstat", },
+	{ .name	    = "statx",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT,	 /* fdat */ },
+		   [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
+		   [3] = { .scnprintf = SCA_STATX_MASK,	 /* mask */ }, }, },
+	{ .name	    = "swapoff",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+	{ .name	    = "swapon",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+	{ .name	    = "symlinkat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "tgkill",
+	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name	    = "tkill",
+	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name	    = "uname", .alias = "newuname", },
+	{ .name	    = "unlinkat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+	{ .name	    = "utimensat",
+	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
+	{ .name	    = "wait4",	    .errpid = true,
+	  .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
+	{ .name	    = "waitid",	    .errpid = true,
+	  .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
+};
+
+static int syscall_fmt__cmp(const void *name, const void *fmtp)
+{
+	const struct syscall_fmt *fmt = fmtp;
+	return strcmp(name, fmt->name);
+}
+
+static struct syscall_fmt *syscall_fmt__find(const char *name)
+{
+	const int nmemb = ARRAY_SIZE(syscall_fmts);
+	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
+}
+
+struct syscall {
+	struct event_format *tp_format;
+	int		    nr_args;
+	struct format_field *args;
+	const char	    *name;
+	bool		    is_exit;
+	struct syscall_fmt  *fmt;
+	struct syscall_arg_fmt *arg_fmt;
+};
+
+/*
+ * We need to have this 'calculated' boolean because in some cases we really
+ * don't know what is the duration of a syscall, for instance, when we start
+ * a session and some threads are waiting for a syscall to finish, say 'poll',
+ * in which case all we can do is to print "( ? ) for duration and for the
+ * start timestamp.
+ */
+static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
+{
+	double duration = (double)t / NSEC_PER_MSEC;
+	size_t printed = fprintf(fp, "(");
+
+	if (!calculated)
+		printed += fprintf(fp, "         ");
+	else if (duration >= 1.0)
+		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
+	else if (duration >= 0.01)
+		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
+	else
+		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
+	return printed + fprintf(fp, "): ");
+}
+
+/**
+ * filename.ptr: The filename char pointer that will be vfs_getname'd
+ * filename.entry_str_pos: Where to insert the string translated from
+ *                         filename.ptr by the vfs_getname tracepoint/kprobe.
+ * ret_scnprintf: syscall args may set this to a different syscall return
+ *                formatter, for instance, fcntl may return fds, file flags, etc.
+ */
+struct thread_trace {
+	u64		  entry_time;
+	bool		  entry_pending;
+	unsigned long	  nr_events;
+	unsigned long	  pfmaj, pfmin;
+	char		  *entry_str;
+	double		  runtime_ms;
+	size_t		  (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+        struct {
+		unsigned long ptr;
+		short int     entry_str_pos;
+		bool	      pending_open;
+		unsigned int  namelen;
+		char	      *name;
+	} filename;
+	struct {
+		int	  max;
+		char	  **table;
+	} paths;
+
+	struct intlist *syscall_stats;
+};
+
+static struct thread_trace *thread_trace__new(void)
+{
+	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
+
+	if (ttrace)
+		ttrace->paths.max = -1;
+
+	ttrace->syscall_stats = intlist__new(NULL);
+
+	return ttrace;
+}
+
+static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
+{
+	struct thread_trace *ttrace;
+
+	if (thread == NULL)
+		goto fail;
+
+	if (thread__priv(thread) == NULL)
+		thread__set_priv(thread, thread_trace__new());
+
+	if (thread__priv(thread) == NULL)
+		goto fail;
+
+	ttrace = thread__priv(thread);
+	++ttrace->nr_events;
+
+	return ttrace;
+fail:
+	color_fprintf(fp, PERF_COLOR_RED,
+		      "WARNING: not enough memory, dropping samples!\n");
+	return NULL;
+}
+
+
+void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
+				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
+{
+	struct thread_trace *ttrace = thread__priv(arg->thread);
+
+	ttrace->ret_scnprintf = ret_scnprintf;
+}
+
+#define TRACE_PFMAJ		(1 << 0)
+#define TRACE_PFMIN		(1 << 1)
+
+static const size_t trace__entry_str_size = 2048;
+
+static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
+{
+	struct thread_trace *ttrace = thread__priv(thread);
+
+	if (fd > ttrace->paths.max) {
+		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
+
+		if (npath == NULL)
+			return -1;
+
+		if (ttrace->paths.max != -1) {
+			memset(npath + ttrace->paths.max + 1, 0,
+			       (fd - ttrace->paths.max) * sizeof(char *));
+		} else {
+			memset(npath, 0, (fd + 1) * sizeof(char *));
+		}
+
+		ttrace->paths.table = npath;
+		ttrace->paths.max   = fd;
+	}
+
+	ttrace->paths.table[fd] = strdup(pathname);
+
+	return ttrace->paths.table[fd] != NULL ? 0 : -1;
+}
+
+static int thread__read_fd_path(struct thread *thread, int fd)
+{
+	char linkname[PATH_MAX], pathname[PATH_MAX];
+	struct stat st;
+	int ret;
+
+	if (thread->pid_ == thread->tid) {
+		scnprintf(linkname, sizeof(linkname),
+			  "/proc/%d/fd/%d", thread->pid_, fd);
+	} else {
+		scnprintf(linkname, sizeof(linkname),
+			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
+	}
+
+	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
+		return -1;
+
+	ret = readlink(linkname, pathname, sizeof(pathname));
+
+	if (ret < 0 || ret > st.st_size)
+		return -1;
+
+	pathname[ret] = '\0';
+	return trace__set_fd_pathname(thread, fd, pathname);
+}
+
+static const char *thread__fd_path(struct thread *thread, int fd,
+				   struct trace *trace)
+{
+	struct thread_trace *ttrace = thread__priv(thread);
+
+	if (ttrace == NULL)
+		return NULL;
+
+	if (fd < 0)
+		return NULL;
+
+	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
+		if (!trace->live)
+			return NULL;
+		++trace->stats.proc_getname;
+		if (thread__read_fd_path(thread, fd))
+			return NULL;
+	}
+
+	return ttrace->paths.table[fd];
+}
+
+size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int fd = arg->val;
+	size_t printed = scnprintf(bf, size, "%d", fd);
+	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
+
+	if (path)
+		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
+
+	return printed;
+}
+
+size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
+{
+        size_t printed = scnprintf(bf, size, "%d", fd);
+	struct thread *thread = machine__find_thread(trace->host, pid, pid);
+
+	if (thread) {
+		const char *path = thread__fd_path(thread, fd, trace);
+
+		if (path)
+			printed += scnprintf(bf + printed, size - printed, "<%s>", path);
+
+		thread__put(thread);
+	}
+
+        return printed;
+}
+
+static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
+					      struct syscall_arg *arg)
+{
+	int fd = arg->val;
+	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
+	struct thread_trace *ttrace = thread__priv(arg->thread);
+
+	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
+		zfree(&ttrace->paths.table[fd]);
+
+	return printed;
+}
+
+static void thread__set_filename_pos(struct thread *thread, const char *bf,
+				     unsigned long ptr)
+{
+	struct thread_trace *ttrace = thread__priv(thread);
+
+	ttrace->filename.ptr = ptr;
+	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
+}
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+					      struct syscall_arg *arg)
+{
+	unsigned long ptr = arg->val;
+
+	if (!arg->trace->vfs_getname)
+		return scnprintf(bf, size, "%#x", ptr);
+
+	thread__set_filename_pos(arg->thread, bf, ptr);
+	return 0;
+}
+
+static bool trace__filter_duration(struct trace *trace, double t)
+{
+	return t < (trace->duration_filter * NSEC_PER_MSEC);
+}
+
+static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
+
+	return fprintf(fp, "%10.3f ", ts);
+}
+
+/*
+ * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
+ * using ttrace->entry_time for a thread that receives a sys_exit without
+ * first having received a sys_enter ("poll" issued before tracing session
+ * starts, lost sys_enter exit due to ring buffer overflow).
+ */
+static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+	if (tstamp > 0)
+		return __trace__fprintf_tstamp(trace, tstamp, fp);
+
+	return fprintf(fp, "         ? ");
+}
+
+static bool done = false;
+static bool interrupted = false;
+
+static void sig_handler(int sig)
+{
+	done = true;
+	interrupted = sig == SIGINT;
+}
+
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
+					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+{
+	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
+	printed += fprintf_duration(duration, duration_calculated, fp);
+
+	if (trace->multiple_threads) {
+		if (trace->show_comm)
+			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
+		printed += fprintf(fp, "%d ", thread->tid);
+	}
+
+	return printed;
+}
+
+static int trace__process_event(struct trace *trace, struct machine *machine,
+				union perf_event *event, struct perf_sample *sample)
+{
+	int ret = 0;
+
+	switch (event->header.type) {
+	case PERF_RECORD_LOST:
+		color_fprintf(trace->output, PERF_COLOR_RED,
+			      "LOST %" PRIu64 " events!\n", event->lost.lost);
+		ret = machine__process_lost_event(machine, event, sample);
+		break;
+	default:
+		ret = machine__process_event(machine, event, sample);
+		break;
+	}
+
+	return ret;
+}
+
+static int trace__tool_process(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine)
+{
+	struct trace *trace = container_of(tool, struct trace, tool);
+	return trace__process_event(trace, machine, event, sample);
+}
+
+static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+	struct machine *machine = vmachine;
+
+	if (machine->kptr_restrict_warned)
+		return NULL;
+
+	if (symbol_conf.kptr_restrict) {
+		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
+			   "Check /proc/sys/kernel/kptr_restrict.\n\n"
+			   "Kernel samples will not be resolved.\n");
+		machine->kptr_restrict_warned = true;
+		return NULL;
+	}
+
+	return machine__resolve_kernel_addr(vmachine, addrp, modp);
+}
+
+static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
+{
+	int err = symbol__init(NULL);
+
+	if (err)
+		return err;
+
+	trace->host = machine__new_host();
+	if (trace->host == NULL)
+		return -ENOMEM;
+
+	err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
+	if (err < 0)
+		goto out;
+
+	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
+					    evlist->threads, trace__tool_process, false,
+					    trace->opts.proc_map_timeout, 1);
+out:
+	if (err)
+		symbol__exit();
+
+	return err;
+}
+
+static void trace__symbols__exit(struct trace *trace)
+{
+	machine__exit(trace->host);
+	trace->host = NULL;
+
+	symbol__exit();
+}
+
+static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
+{
+	int idx;
+
+	if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
+		nr_args = sc->fmt->nr_args;
+
+	sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
+	if (sc->arg_fmt == NULL)
+		return -1;
+
+	for (idx = 0; idx < nr_args; ++idx) {
+		if (sc->fmt)
+			sc->arg_fmt[idx] = sc->fmt->arg[idx];
+	}
+
+	sc->nr_args = nr_args;
+	return 0;
+}
+
+static int syscall__set_arg_fmts(struct syscall *sc)
+{
+	struct format_field *field;
+	int idx = 0, len;
+
+	for (field = sc->args; field; field = field->next, ++idx) {
+		if (sc->fmt && sc->fmt->arg[idx].scnprintf)
+			continue;
+
+		if (strcmp(field->type, "const char *") == 0 &&
+			 (strcmp(field->name, "filename") == 0 ||
+			  strcmp(field->name, "path") == 0 ||
+			  strcmp(field->name, "pathname") == 0))
+			sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
+		else if (field->flags & FIELD_IS_POINTER)
+			sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
+		else if (strcmp(field->type, "pid_t") == 0)
+			sc->arg_fmt[idx].scnprintf = SCA_PID;
+		else if (strcmp(field->type, "umode_t") == 0)
+			sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
+		else if ((strcmp(field->type, "int") == 0 ||
+			  strcmp(field->type, "unsigned int") == 0 ||
+			  strcmp(field->type, "long") == 0) &&
+			 (len = strlen(field->name)) >= 2 &&
+			 strcmp(field->name + len - 2, "fd") == 0) {
+			/*
+			 * /sys/kernel/tracing/events/syscalls/sys_enter*
+			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
+			 * 65 int
+			 * 23 unsigned int
+			 * 7 unsigned long
+			 */
+			sc->arg_fmt[idx].scnprintf = SCA_FD;
+		}
+	}
+
+	return 0;
+}
+
+static int trace__read_syscall_info(struct trace *trace, int id)
+{
+	char tp_name[128];
+	struct syscall *sc;
+	const char *name = syscalltbl__name(trace->sctbl, id);
+
+	if (name == NULL)
+		return -1;
+
+	if (id > trace->syscalls.max) {
+		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
+
+		if (nsyscalls == NULL)
+			return -1;
+
+		if (trace->syscalls.max != -1) {
+			memset(nsyscalls + trace->syscalls.max + 1, 0,
+			       (id - trace->syscalls.max) * sizeof(*sc));
+		} else {
+			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
+		}
+
+		trace->syscalls.table = nsyscalls;
+		trace->syscalls.max   = id;
+	}
+
+	sc = trace->syscalls.table + id;
+	sc->name = name;
+
+	sc->fmt  = syscall_fmt__find(sc->name);
+
+	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
+	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
+
+	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
+		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
+		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
+	}
+
+	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
+		return -1;
+
+	if (IS_ERR(sc->tp_format))
+		return -1;
+
+	sc->args = sc->tp_format->format.fields;
+	/*
+	 * We need to check and discard the first variable '__syscall_nr'
+	 * or 'nr' that mean the syscall number. It is needless here.
+	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+	 */
+	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
+		sc->args = sc->args->next;
+		--sc->nr_args;
+	}
+
+	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
+
+	return syscall__set_arg_fmts(sc);
+}
+
+static int trace__validate_ev_qualifier(struct trace *trace)
+{
+	int err = 0, i;
+	size_t nr_allocated;
+	struct str_node *pos;
+
+	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
+	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
+						 sizeof(trace->ev_qualifier_ids.entries[0]));
+
+	if (trace->ev_qualifier_ids.entries == NULL) {
+		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
+		       trace->output);
+		err = -EINVAL;
+		goto out;
+	}
+
+	nr_allocated = trace->ev_qualifier_ids.nr;
+	i = 0;
+
+	strlist__for_each_entry(pos, trace->ev_qualifier) {
+		const char *sc = pos->s;
+		int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
+
+		if (id < 0) {
+			id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
+			if (id >= 0)
+				goto matches;
+
+			if (err == 0) {
+				fputs("Error:\tInvalid syscall ", trace->output);
+				err = -EINVAL;
+			} else {
+				fputs(", ", trace->output);
+			}
+
+			fputs(sc, trace->output);
+		}
+matches:
+		trace->ev_qualifier_ids.entries[i++] = id;
+		if (match_next == -1)
+			continue;
+
+		while (1) {
+			id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
+			if (id < 0)
+				break;
+			if (nr_allocated == trace->ev_qualifier_ids.nr) {
+				void *entries;
+
+				nr_allocated += 8;
+				entries = realloc(trace->ev_qualifier_ids.entries,
+						  nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
+				if (entries == NULL) {
+					err = -ENOMEM;
+					fputs("\nError:\t Not enough memory for parsing\n", trace->output);
+					goto out_free;
+				}
+				trace->ev_qualifier_ids.entries = entries;
+			}
+			trace->ev_qualifier_ids.nr++;
+			trace->ev_qualifier_ids.entries[i++] = id;
+		}
+	}
+
+	if (err < 0) {
+		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
+		      "\nHint:\tand: 'man syscalls'\n", trace->output);
+out_free:
+		zfree(&trace->ev_qualifier_ids.entries);
+		trace->ev_qualifier_ids.nr = 0;
+	}
+out:
+	return err;
+}
+
+/*
+ * args is to be interpreted as a series of longs but we need to handle
+ * 8-byte unaligned accesses. args points to raw_data within the event
+ * and raw_data is guaranteed to be 8-byte unaligned because it is
+ * preceded by raw_size which is a u32. So we need to copy args to a temp
+ * variable to read it. Most notably this avoids extended load instructions
+ * on unaligned addresses
+ */
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
+{
+	unsigned long val;
+	unsigned char *p = arg->args + sizeof(unsigned long) * idx;
+
+	memcpy(&val, p, sizeof(val));
+	return val;
+}
+
+static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
+				      struct syscall_arg *arg)
+{
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
+		return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
+
+	return scnprintf(bf, size, "arg%d: ", arg->idx);
+}
+
+static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
+				     struct syscall_arg *arg, unsigned long val)
+{
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
+		arg->val = val;
+		if (sc->arg_fmt[arg->idx].parm)
+			arg->parm = sc->arg_fmt[arg->idx].parm;
+		return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
+	}
+	return scnprintf(bf, size, "%ld", val);
+}
+
+static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
+				      unsigned char *args, struct trace *trace,
+				      struct thread *thread)
+{
+	size_t printed = 0;
+	unsigned long val;
+	u8 bit = 1;
+	struct syscall_arg arg = {
+		.args	= args,
+		.idx	= 0,
+		.mask	= 0,
+		.trace  = trace,
+		.thread = thread,
+	};
+	struct thread_trace *ttrace = thread__priv(thread);
+
+	/*
+	 * Things like fcntl will set this in its 'cmd' formatter to pick the
+	 * right formatter for the return value (an fd? file flags?), which is
+	 * not needed for syscalls that always return a given type, say an fd.
+	 */
+	ttrace->ret_scnprintf = NULL;
+
+	if (sc->args != NULL) {
+		struct format_field *field;
+
+		for (field = sc->args; field;
+		     field = field->next, ++arg.idx, bit <<= 1) {
+			if (arg.mask & bit)
+				continue;
+
+			val = syscall_arg__val(&arg, arg.idx);
+
+			/*
+ 			 * Suppress this argument if its value is zero and
+ 			 * and we don't have a string associated in an
+ 			 * strarray for it.
+ 			 */
+			if (val == 0 &&
+			    !(sc->arg_fmt &&
+			      (sc->arg_fmt[arg.idx].show_zero ||
+			       sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
+			       sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
+			      sc->arg_fmt[arg.idx].parm))
+				continue;
+
+			printed += scnprintf(bf + printed, size - printed,
+					     "%s%s: ", printed ? ", " : "", field->name);
+			printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+		}
+	} else if (IS_ERR(sc->tp_format)) {
+		/*
+		 * If we managed to read the tracepoint /format file, then we
+		 * may end up not having any args, like with gettid(), so only
+		 * print the raw args when we didn't manage to read it.
+		 */
+		while (arg.idx < sc->nr_args) {
+			if (arg.mask & bit)
+				goto next_arg;
+			val = syscall_arg__val(&arg, arg.idx);
+			if (printed)
+				printed += scnprintf(bf + printed, size - printed, ", ");
+			printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
+			printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+next_arg:
+			++arg.idx;
+			bit <<= 1;
+		}
+	}
+
+	return printed;
+}
+
+typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
+				  union perf_event *event,
+				  struct perf_sample *sample);
+
+static struct syscall *trace__syscall_info(struct trace *trace,
+					   struct perf_evsel *evsel, int id)
+{
+
+	if (id < 0) {
+
+		/*
+		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
+		 * before that, leaving at a higher verbosity level till that is
+		 * explained. Reproduced with plain ftrace with:
+		 *
+		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
+		 * grep "NR -1 " /t/trace_pipe
+		 *
+		 * After generating some load on the machine.
+ 		 */
+		if (verbose > 1) {
+			static u64 n;
+			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
+				id, perf_evsel__name(evsel), ++n);
+		}
+		return NULL;
+	}
+
+	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
+	    trace__read_syscall_info(trace, id))
+		goto out_cant_read;
+
+	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
+		goto out_cant_read;
+
+	return &trace->syscalls.table[id];
+
+out_cant_read:
+	if (verbose > 0) {
+		fprintf(trace->output, "Problems reading syscall %d", id);
+		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
+			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
+		fputs(" information\n", trace->output);
+	}
+	return NULL;
+}
+
+static void thread__update_stats(struct thread_trace *ttrace,
+				 int id, struct perf_sample *sample)
+{
+	struct int_node *inode;
+	struct stats *stats;
+	u64 duration = 0;
+
+	inode = intlist__findnew(ttrace->syscall_stats, id);
+	if (inode == NULL)
+		return;
+
+	stats = inode->priv;
+	if (stats == NULL) {
+		stats = malloc(sizeof(struct stats));
+		if (stats == NULL)
+			return;
+		init_stats(stats);
+		inode->priv = stats;
+	}
+
+	if (ttrace->entry_time && sample->time > ttrace->entry_time)
+		duration = sample->time - ttrace->entry_time;
+
+	update_stats(stats, duration);
+}
+
+static int trace__printf_interrupted_entry(struct trace *trace)
+{
+	struct thread_trace *ttrace;
+	size_t printed;
+
+	if (trace->failure_only || trace->current == NULL)
+		return 0;
+
+	ttrace = thread__priv(trace->current);
+
+	if (!ttrace->entry_pending)
+		return 0;
+
+	printed  = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
+	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
+	ttrace->entry_pending = false;
+
+	return printed;
+}
+
+static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
+				 struct perf_sample *sample, struct thread *thread)
+{
+	int printed = 0;
+
+	if (trace->print_sample) {
+		double ts = (double)sample->time / NSEC_PER_MSEC;
+
+		printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
+				   perf_evsel__name(evsel), ts,
+				   thread__comm_str(thread),
+				   sample->pid, sample->tid, sample->cpu);
+	}
+
+	return printed;
+}
+
+static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
+			    union perf_event *event __maybe_unused,
+			    struct perf_sample *sample)
+{
+	char *msg;
+	void *args;
+	size_t printed = 0;
+	struct thread *thread;
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+	struct syscall *sc = trace__syscall_info(trace, evsel, id);
+	struct thread_trace *ttrace;
+
+	if (sc == NULL)
+		return -1;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	trace__fprintf_sample(trace, evsel, sample, thread);
+
+	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
+
+	if (ttrace->entry_str == NULL) {
+		ttrace->entry_str = malloc(trace__entry_str_size);
+		if (!ttrace->entry_str)
+			goto out_put;
+	}
+
+	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
+		trace__printf_interrupted_entry(trace);
+
+	ttrace->entry_time = sample->time;
+	msg = ttrace->entry_str;
+	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
+
+	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
+					   args, trace, thread);
+
+	if (sc->is_exit) {
+		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
+			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
+			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
+		}
+	} else {
+		ttrace->entry_pending = true;
+		/* See trace__vfs_getname & trace__sys_exit */
+		ttrace->filename.pending_open = false;
+	}
+
+	if (trace->current != thread) {
+		thread__put(trace->current);
+		trace->current = thread__get(thread);
+	}
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
+				    struct perf_sample *sample,
+				    struct callchain_cursor *cursor)
+{
+	struct addr_location al;
+	int max_stack = evsel->attr.sample_max_stack ?
+			evsel->attr.sample_max_stack :
+			trace->max_stack;
+
+	if (machine__resolve(trace->host, &al, sample) < 0 ||
+	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+		return -1;
+
+	return 0;
+}
+
+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
+{
+	/* TODO: user-configurable print_opts */
+	const unsigned int print_opts = EVSEL__PRINT_SYM |
+				        EVSEL__PRINT_DSO |
+				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+}
+
+static const char *errno_to_name(struct perf_evsel *evsel, int err)
+{
+	struct perf_env *env = perf_evsel__env(evsel);
+	const char *arch_name = perf_env__arch(env);
+
+	return arch_syscalls__strerrno(arch_name, err);
+}
+
+static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
+			   union perf_event *event __maybe_unused,
+			   struct perf_sample *sample)
+{
+	long ret;
+	u64 duration = 0;
+	bool duration_calculated = false;
+	struct thread *thread;
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
+	struct syscall *sc = trace__syscall_info(trace, evsel, id);
+	struct thread_trace *ttrace;
+
+	if (sc == NULL)
+		return -1;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	trace__fprintf_sample(trace, evsel, sample, thread);
+
+	if (trace->summary)
+		thread__update_stats(ttrace, id, sample);
+
+	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
+
+	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
+		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
+		ttrace->filename.pending_open = false;
+		++trace->stats.vfs_getname;
+	}
+
+	if (ttrace->entry_time) {
+		duration = sample->time - ttrace->entry_time;
+		if (trace__filter_duration(trace, duration))
+			goto out;
+		duration_calculated = true;
+	} else if (trace->duration_filter)
+		goto out;
+
+	if (sample->callchain) {
+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out;
+			callchain_ret = 1;
+		}
+	}
+
+	if (trace->summary_only || (ret >= 0 && trace->failure_only))
+		goto out;
+
+	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
+
+	if (ttrace->entry_pending) {
+		fprintf(trace->output, "%-70s", ttrace->entry_str);
+	} else {
+		fprintf(trace->output, " ... [");
+		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
+		fprintf(trace->output, "]: %s()", sc->name);
+	}
+
+	if (sc->fmt == NULL) {
+		if (ret < 0)
+			goto errno_print;
+signed_print:
+		fprintf(trace->output, ") = %ld", ret);
+	} else if (ret < 0) {
+errno_print: {
+		char bf[STRERR_BUFSIZE];
+		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
+			   *e = errno_to_name(evsel, -ret);
+
+		fprintf(trace->output, ") = -1 %s %s", e, emsg);
+	}
+	} else if (ret == 0 && sc->fmt->timeout)
+		fprintf(trace->output, ") = 0 Timeout");
+	else if (ttrace->ret_scnprintf) {
+		char bf[1024];
+		struct syscall_arg arg = {
+			.val	= ret,
+			.thread	= thread,
+			.trace	= trace,
+		};
+		ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
+		ttrace->ret_scnprintf = NULL;
+		fprintf(trace->output, ") = %s", bf);
+	} else if (sc->fmt->hexret)
+		fprintf(trace->output, ") = %#lx", ret);
+	else if (sc->fmt->errpid) {
+		struct thread *child = machine__find_thread(trace->host, ret, ret);
+
+		if (child != NULL) {
+			fprintf(trace->output, ") = %ld", ret);
+			if (child->comm_set)
+				fprintf(trace->output, " (%s)", thread__comm_str(child));
+			thread__put(child);
+		}
+	} else
+		goto signed_print;
+
+	fputc('\n', trace->output);
+
+	if (callchain_ret > 0)
+		trace__fprintf_callchain(trace, sample);
+	else if (callchain_ret < 0)
+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
+	ttrace->entry_pending = false;
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	struct thread_trace *ttrace;
+	size_t filename_len, entry_str_len, to_move;
+	ssize_t remaining_space;
+	char *pos;
+	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
+
+	if (!thread)
+		goto out;
+
+	ttrace = thread__priv(thread);
+	if (!ttrace)
+		goto out_put;
+
+	filename_len = strlen(filename);
+	if (filename_len == 0)
+		goto out_put;
+
+	if (ttrace->filename.namelen < filename_len) {
+		char *f = realloc(ttrace->filename.name, filename_len + 1);
+
+		if (f == NULL)
+			goto out_put;
+
+		ttrace->filename.namelen = filename_len;
+		ttrace->filename.name = f;
+	}
+
+	strcpy(ttrace->filename.name, filename);
+	ttrace->filename.pending_open = true;
+
+	if (!ttrace->filename.ptr)
+		goto out_put;
+
+	entry_str_len = strlen(ttrace->entry_str);
+	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
+	if (remaining_space <= 0)
+		goto out_put;
+
+	if (filename_len > (size_t)remaining_space) {
+		filename += filename_len - remaining_space;
+		filename_len = remaining_space;
+	}
+
+	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
+	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
+	memmove(pos + filename_len, pos, to_move);
+	memcpy(pos, filename, filename_len);
+
+	ttrace->filename.ptr = 0;
+	ttrace->filename.entry_str_pos = 0;
+out_put:
+	thread__put(thread);
+out:
+	return 0;
+}
+
+static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample)
+{
+        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
+	struct thread *thread = machine__findnew_thread(trace->host,
+							sample->pid,
+							sample->tid);
+	struct thread_trace *ttrace = thread__trace(thread, trace->output);
+
+	if (ttrace == NULL)
+		goto out_dump;
+
+	ttrace->runtime_ms += runtime_ms;
+	trace->runtime_ms += runtime_ms;
+out_put:
+	thread__put(thread);
+	return 0;
+
+out_dump:
+	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
+	       evsel->name,
+	       perf_evsel__strval(evsel, sample, "comm"),
+	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
+	       runtime,
+	       perf_evsel__intval(evsel, sample, "vruntime"));
+	goto out_put;
+}
+
+static int bpf_output__printer(enum binary_printer_ops op,
+			       unsigned int val, void *extra __maybe_unused, FILE *fp)
+{
+	unsigned char ch = (unsigned char)val;
+
+	switch (op) {
+	case BINARY_PRINT_CHAR_DATA:
+		return fprintf(fp, "%c", isprint(ch) ? ch : '.');
+	case BINARY_PRINT_DATA_BEGIN:
+	case BINARY_PRINT_LINE_BEGIN:
+	case BINARY_PRINT_ADDR:
+	case BINARY_PRINT_NUM_DATA:
+	case BINARY_PRINT_NUM_PAD:
+	case BINARY_PRINT_SEP:
+	case BINARY_PRINT_CHAR_PAD:
+	case BINARY_PRINT_LINE_END:
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+				struct perf_sample *sample)
+{
+	binary__fprintf(sample->raw_data, sample->raw_size, 8,
+			bpf_output__printer, NULL, trace->output);
+}
+
+static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample)
+{
+	int callchain_ret = 0;
+
+	if (sample->callchain) {
+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out;
+			callchain_ret = 1;
+		}
+	}
+
+	trace__printf_interrupted_entry(trace);
+	trace__fprintf_tstamp(trace, sample->time, trace->output);
+
+	if (trace->trace_syscalls)
+		fprintf(trace->output, "(         ): ");
+
+	fprintf(trace->output, "%s:", evsel->name);
+
+	if (perf_evsel__is_bpf_output(evsel)) {
+		bpf_output__fprintf(trace, sample);
+	} else if (evsel->tp_format) {
+		event_format__fprintf(evsel->tp_format, sample->cpu,
+				      sample->raw_data, sample->raw_size,
+				      trace->output);
+	}
+
+	fprintf(trace->output, "\n");
+
+	if (callchain_ret > 0)
+		trace__fprintf_callchain(trace, sample);
+	else if (callchain_ret < 0)
+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
+	return 0;
+}
+
+static void print_location(FILE *f, struct perf_sample *sample,
+			   struct addr_location *al,
+			   bool print_dso, bool print_sym)
+{
+
+	if ((verbose > 0 || print_dso) && al->map)
+		fprintf(f, "%s@", al->map->dso->long_name);
+
+	if ((verbose > 0 || print_sym) && al->sym)
+		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
+			al->addr - al->sym->start);
+	else if (al->map)
+		fprintf(f, "0x%" PRIx64, al->addr);
+	else
+		fprintf(f, "0x%" PRIx64, sample->addr);
+}
+
+static int trace__pgfault(struct trace *trace,
+			  struct perf_evsel *evsel,
+			  union perf_event *event __maybe_unused,
+			  struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct addr_location al;
+	char map_type = 'd';
+	struct thread_trace *ttrace;
+	int err = -1;
+	int callchain_ret = 0;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+	if (sample->callchain) {
+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out_put;
+			callchain_ret = 1;
+		}
+	}
+
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+		ttrace->pfmaj++;
+	else
+		ttrace->pfmin++;
+
+	if (trace->summary_only)
+		goto out;
+
+	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
+			      sample->ip, &al);
+
+	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
+
+	fprintf(trace->output, "%sfault [",
+		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+		"maj" : "min");
+
+	print_location(trace->output, sample, &al, false, true);
+
+	fprintf(trace->output, "] => ");
+
+	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
+				   sample->addr, &al);
+
+	if (!al.map) {
+		thread__find_addr_location(thread, sample->cpumode,
+					   MAP__FUNCTION, sample->addr, &al);
+
+		if (al.map)
+			map_type = 'x';
+		else
+			map_type = '?';
+	}
+
+	print_location(trace->output, sample, &al, true, false);
+
+	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+	if (callchain_ret > 0)
+		trace__fprintf_callchain(trace, sample);
+	else if (callchain_ret < 0)
+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static void trace__set_base_time(struct trace *trace,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample)
+{
+	/*
+	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
+	 * and don't use sample->time unconditionally, we may end up having
+	 * some other event in the future without PERF_SAMPLE_TIME for good
+	 * reason, i.e. we may not be interested in its timestamps, just in
+	 * it taking place, picking some piece of information when it
+	 * appears in our event stream (vfs_getname comes to mind).
+	 */
+	if (trace->base_time == 0 && !trace->full_time &&
+	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
+		trace->base_time = sample->time;
+}
+
+static int trace__process_sample(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_sample *sample,
+				 struct perf_evsel *evsel,
+				 struct machine *machine __maybe_unused)
+{
+	struct trace *trace = container_of(tool, struct trace, tool);
+	struct thread *thread;
+	int err = 0;
+
+	tracepoint_handler handler = evsel->handler;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	if (thread && thread__is_filtered(thread))
+		goto out;
+
+	trace__set_base_time(trace, evsel, sample);
+
+	if (handler) {
+		++trace->nr_events;
+		handler(trace, evsel, event, sample);
+	}
+out:
+	thread__put(thread);
+	return err;
+}
+
+static int trace__record(struct trace *trace, int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-R",
+		"-m", "1024",
+		"-c", "1",
+	};
+
+	const char * const sc_args[] = { "-e", };
+	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
+	const char * const majpf_args[] = { "-e", "major-faults" };
+	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
+	const char * const minpf_args[] = { "-e", "minor-faults" };
+	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
+
+	/* +1 is for the event string below */
+	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
+		majpf_args_nr + minpf_args_nr + argc;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	j = 0;
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[j++] = record_args[i];
+
+	if (trace->trace_syscalls) {
+		for (i = 0; i < sc_args_nr; i++)
+			rec_argv[j++] = sc_args[i];
+
+		/* event string may be different for older kernels - e.g., RHEL6 */
+		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
+			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+		else if (is_valid_tracepoint("syscalls:sys_enter"))
+			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
+		else {
+			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+			free(rec_argv);
+			return -1;
+		}
+	}
+
+	if (trace->trace_pgfaults & TRACE_PFMAJ)
+		for (i = 0; i < majpf_args_nr; i++)
+			rec_argv[j++] = majpf_args[i];
+
+	if (trace->trace_pgfaults & TRACE_PFMIN)
+		for (i = 0; i < minpf_args_nr; i++)
+			rec_argv[j++] = minpf_args[i];
+
+	for (i = 0; i < (unsigned int)argc; i++)
+		rec_argv[j++] = argv[i];
+
+	return cmd_record(j, rec_argv);
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
+
+static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
+
+	if (IS_ERR(evsel))
+		return false;
+
+	if (perf_evsel__field(evsel, "pathname") == NULL) {
+		perf_evsel__delete(evsel);
+		return false;
+	}
+
+	evsel->handler = trace__vfs_getname;
+	perf_evlist__add(evlist, evsel);
+	return true;
+}
+
+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
+{
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.mmap_data = 1,
+	};
+
+	attr.config = config;
+	attr.sample_period = 1;
+
+	event_attr_init(&attr);
+
+	evsel = perf_evsel__new(&attr);
+	if (evsel)
+		evsel->handler = trace__pgfault;
+
+	return evsel;
+}
+
+static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
+{
+	const u32 type = event->header.type;
+	struct perf_evsel *evsel;
+
+	if (type != PERF_RECORD_SAMPLE) {
+		trace__process_event(trace, trace->host, event, sample);
+		return;
+	}
+
+	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
+	if (evsel == NULL) {
+		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
+		return;
+	}
+
+	trace__set_base_time(trace, evsel, sample);
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+	    sample->raw_data == NULL) {
+		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
+		       perf_evsel__name(evsel), sample->tid,
+		       sample->cpu, sample->raw_size);
+	} else {
+		tracepoint_handler handler = evsel->handler;
+		handler(trace, evsel, event, sample);
+	}
+}
+
+static int trace__add_syscall_newtp(struct trace *trace)
+{
+	int ret = -1;
+	struct perf_evlist *evlist = trace->evlist;
+	struct perf_evsel *sys_enter, *sys_exit;
+
+	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
+	if (sys_enter == NULL)
+		goto out;
+
+	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
+		goto out_delete_sys_enter;
+
+	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
+	if (sys_exit == NULL)
+		goto out_delete_sys_enter;
+
+	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
+		goto out_delete_sys_exit;
+
+	perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
+	perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
+
+	perf_evlist__add(evlist, sys_enter);
+	perf_evlist__add(evlist, sys_exit);
+
+	if (callchain_param.enabled && !trace->kernel_syscallchains) {
+		/*
+		 * We're interested only in the user space callchain
+		 * leading to the syscall, allow overriding that for
+		 * debugging reasons using --kernel_syscall_callchains
+		 */
+		sys_exit->attr.exclude_callchain_kernel = 1;
+	}
+
+	trace->syscalls.events.sys_enter = sys_enter;
+	trace->syscalls.events.sys_exit  = sys_exit;
+
+	ret = 0;
+out:
+	return ret;
+
+out_delete_sys_exit:
+	perf_evsel__delete_priv(sys_exit);
+out_delete_sys_enter:
+	perf_evsel__delete_priv(sys_enter);
+	goto out;
+}
+
+static int trace__set_ev_qualifier_filter(struct trace *trace)
+{
+	int err = -1;
+	struct perf_evsel *sys_exit;
+	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
+						trace->ev_qualifier_ids.nr,
+						trace->ev_qualifier_ids.entries);
+
+	if (filter == NULL)
+		goto out_enomem;
+
+	if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
+					  filter)) {
+		sys_exit = trace->syscalls.events.sys_exit;
+		err = perf_evsel__append_tp_filter(sys_exit, filter);
+	}
+
+	free(filter);
+out:
+	return err;
+out_enomem:
+	errno = ENOMEM;
+	goto out;
+}
+
+static int trace__set_filter_loop_pids(struct trace *trace)
+{
+	unsigned int nr = 1;
+	pid_t pids[32] = {
+		getpid(),
+	};
+	struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
+
+	while (thread && nr < ARRAY_SIZE(pids)) {
+		struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
+
+		if (parent == NULL)
+			break;
+
+		if (!strcmp(thread__comm_str(parent), "sshd")) {
+			pids[nr++] = parent->tid;
+			break;
+		}
+		thread = parent;
+	}
+
+	return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
+}
+
+static int trace__run(struct trace *trace, int argc, const char **argv)
+{
+	struct perf_evlist *evlist = trace->evlist;
+	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
+	int err = -1, i;
+	unsigned long before;
+	const bool forks = argc > 0;
+	bool draining = false;
+
+	trace->live = true;
+
+	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
+		goto out_error_raw_syscalls;
+
+	if (trace->trace_syscalls)
+		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
+
+	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
+		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+		if (pgfault_maj == NULL)
+			goto out_error_mem;
+		perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+		perf_evlist__add(evlist, pgfault_maj);
+	}
+
+	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
+		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+		if (pgfault_min == NULL)
+			goto out_error_mem;
+		perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+		perf_evlist__add(evlist, pgfault_min);
+	}
+
+	if (trace->sched &&
+	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+				   trace__sched_stat_runtime))
+		goto out_error_sched_stat_runtime;
+
+	/*
+	 * If a global cgroup was set, apply it to all the events without an
+	 * explicit cgroup. I.e.:
+	 *
+	 * 	trace -G A -e sched:*switch
+	 *
+	 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+	 * _and_ sched:sched_switch to the 'A' cgroup, while:
+	 *
+	 * trace -e sched:*switch -G A
+	 *
+	 * will only set the sched:sched_switch event to the 'A' cgroup, all the
+	 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+	 * a cgroup (on the root cgroup, sys wide, etc).
+	 *
+	 * Multiple cgroups:
+	 *
+	 * trace -G A -e sched:*switch -G B
+	 *
+	 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+	 * to the 'B' cgroup.
+	 *
+	 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
+	 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
+	 */
+	if (trace->cgroup)
+		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
+
+	err = perf_evlist__create_maps(evlist, &trace->opts.target);
+	if (err < 0) {
+		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
+		goto out_delete_evlist;
+	}
+
+	err = trace__symbols_init(trace, evlist);
+	if (err < 0) {
+		fprintf(trace->output, "Problems initializing symbol libraries!\n");
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__config(evlist, &trace->opts, &callchain_param);
+
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
+
+	if (forks) {
+		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
+						    argv, false, NULL);
+		if (err < 0) {
+			fprintf(trace->output, "Couldn't run the workload!\n");
+			goto out_delete_evlist;
+		}
+	}
+
+	err = perf_evlist__open(evlist);
+	if (err < 0)
+		goto out_error_open;
+
+	err = bpf__apply_obj_config();
+	if (err) {
+		char errbuf[BUFSIZ];
+
+		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Apply config to BPF failed: %s\n",
+			 errbuf);
+		goto out_error_open;
+	}
+
+	/*
+	 * Better not use !target__has_task() here because we need to cover the
+	 * case where no threads were specified in the command line, but a
+	 * workload was, and in that case we will fill in the thread_map when
+	 * we fork the workload in perf_evlist__prepare_workload.
+	 */
+	if (trace->filter_pids.nr > 0)
+		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
+	else if (thread_map__pid(evlist->threads, 0) == -1)
+		err = trace__set_filter_loop_pids(trace);
+
+	if (err < 0)
+		goto out_error_mem;
+
+	if (trace->ev_qualifier_ids.nr > 0) {
+		err = trace__set_ev_qualifier_filter(trace);
+		if (err < 0)
+			goto out_errno;
+
+		pr_debug("event qualifier tracepoint filter: %s\n",
+			 trace->syscalls.events.sys_exit->filter);
+	}
+
+	err = perf_evlist__apply_filters(evlist, &evsel);
+	if (err < 0)
+		goto out_error_apply_filters;
+
+	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
+	if (err < 0)
+		goto out_error_mmap;
+
+	if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
+		perf_evlist__enable(evlist);
+
+	if (forks)
+		perf_evlist__start_workload(evlist);
+
+	if (trace->opts.initial_delay) {
+		usleep(trace->opts.initial_delay * 1000);
+		perf_evlist__enable(evlist);
+	}
+
+	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
+				  evlist->threads->nr > 1 ||
+				  perf_evlist__first(evlist)->attr.inherit;
+
+	/*
+	 * Now that we already used evsel->attr to ask the kernel to setup the
+	 * events, lets reuse evsel->attr.sample_max_stack as the limit in
+	 * trace__resolve_callchain(), allowing per-event max-stack settings
+	 * to override an explicitely set --max-stack global setting.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+		    evsel->attr.sample_max_stack == 0)
+			evsel->attr.sample_max_stack = trace->max_stack;
+	}
+again:
+	before = trace->nr_events;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		union perf_event *event;
+		struct perf_mmap *md;
+
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md)) != NULL) {
+			struct perf_sample sample;
+
+			++trace->nr_events;
+
+			err = perf_evlist__parse_sample(evlist, event, &sample);
+			if (err) {
+				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
+				goto next_event;
+			}
+
+			trace__handle_event(trace, event, &sample);
+next_event:
+			perf_mmap__consume(md);
+
+			if (interrupted)
+				goto out_disable;
+
+			if (done && !draining) {
+				perf_evlist__disable(evlist);
+				draining = true;
+			}
+		}
+		perf_mmap__read_done(md);
+	}
+
+	if (trace->nr_events == before) {
+		int timeout = done ? 100 : -1;
+
+		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
+			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+				draining = true;
+
+			goto again;
+		}
+	} else {
+		goto again;
+	}
+
+out_disable:
+	thread__zput(trace->current);
+
+	perf_evlist__disable(evlist);
+
+	if (!err) {
+		if (trace->summary)
+			trace__fprintf_thread_summary(trace, trace->output);
+
+		if (trace->show_tool_stats) {
+			fprintf(trace->output, "Stats:\n "
+					       " vfs_getname : %" PRIu64 "\n"
+					       " proc_getname: %" PRIu64 "\n",
+				trace->stats.vfs_getname,
+				trace->stats.proc_getname);
+		}
+	}
+
+out_delete_evlist:
+	trace__symbols__exit(trace);
+
+	perf_evlist__delete(evlist);
+	cgroup__put(trace->cgroup);
+	trace->evlist = NULL;
+	trace->live = false;
+	return err;
+{
+	char errbuf[BUFSIZ];
+
+out_error_sched_stat_runtime:
+	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
+	goto out_error;
+
+out_error_raw_syscalls:
+	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
+	goto out_error;
+
+out_error_mmap:
+	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+	goto out_error;
+
+out_error_open:
+	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+
+out_error:
+	fprintf(trace->output, "%s\n", errbuf);
+	goto out_delete_evlist;
+
+out_error_apply_filters:
+	fprintf(trace->output,
+		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
+		evsel->filter, perf_evsel__name(evsel), errno,
+		str_error_r(errno, errbuf, sizeof(errbuf)));
+	goto out_delete_evlist;
+}
+out_error_mem:
+	fprintf(trace->output, "Not enough memory to run!\n");
+	goto out_delete_evlist;
+
+out_errno:
+	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
+	goto out_delete_evlist;
+}
+
+static int trace__replay(struct trace *trace)
+{
+	const struct perf_evsel_str_handler handlers[] = {
+		{ "probe:vfs_getname",	     trace__vfs_getname, },
+	};
+	struct perf_data data = {
+		.file      = {
+			.path = input_name,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = trace->force,
+	};
+	struct perf_session *session;
+	struct perf_evsel *evsel;
+	int err = -1;
+
+	trace->tool.sample	  = trace__process_sample;
+	trace->tool.mmap	  = perf_event__process_mmap;
+	trace->tool.mmap2	  = perf_event__process_mmap2;
+	trace->tool.comm	  = perf_event__process_comm;
+	trace->tool.exit	  = perf_event__process_exit;
+	trace->tool.fork	  = perf_event__process_fork;
+	trace->tool.attr	  = perf_event__process_attr;
+	trace->tool.tracing_data  = perf_event__process_tracing_data;
+	trace->tool.build_id	  = perf_event__process_build_id;
+	trace->tool.namespaces	  = perf_event__process_namespaces;
+
+	trace->tool.ordered_events = true;
+	trace->tool.ordering_requires_timestamps = true;
+
+	/* add tid to output */
+	trace->multiple_threads = true;
+
+	session = perf_session__new(&data, false, &trace->tool);
+	if (session == NULL)
+		return -1;
+
+	if (trace->opts.target.pid)
+		symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
+
+	if (trace->opts.target.tid)
+		symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
+
+	if (symbol__init(&session->header.env) < 0)
+		goto out;
+
+	trace->host = &session->machines.host;
+
+	err = perf_session__set_tracepoints_handlers(session, handlers);
+	if (err)
+		goto out;
+
+	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+						     "raw_syscalls:sys_enter");
+	/* older kernels have syscalls tp versus raw_syscalls */
+	if (evsel == NULL)
+		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+							     "syscalls:sys_enter");
+
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
+	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
+		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
+		goto out;
+	}
+
+	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+						     "raw_syscalls:sys_exit");
+	if (evsel == NULL)
+		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+							     "syscalls:sys_exit");
+	if (evsel &&
+	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
+	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
+		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
+		goto out;
+	}
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
+		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
+			evsel->handler = trace__pgfault;
+	}
+
+	setup_pager();
+
+	err = perf_session__process_events(session);
+	if (err)
+		pr_err("Failed to process events, error %d", err);
+
+	else if (trace->summary)
+		trace__fprintf_thread_summary(trace, trace->output);
+
+out:
+	perf_session__delete(session);
+
+	return err;
+}
+
+static size_t trace__fprintf_threads_header(FILE *fp)
+{
+	size_t printed;
+
+	printed  = fprintf(fp, "\n Summary of events:\n\n");
+
+	return printed;
+}
+
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+	struct stats 	*stats;
+	double		msecs;
+	int		syscall;
+)
+{
+	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+	struct stats *stats = source->priv;
+
+	entry->syscall = source->i;
+	entry->stats   = stats;
+	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
+static size_t thread__dump_stats(struct thread_trace *ttrace,
+				 struct trace *trace, FILE *fp)
+{
+	size_t printed = 0;
+	struct syscall *sc;
+	struct rb_node *nd;
+	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
+
+	if (syscall_stats == NULL)
+		return 0;
+
+	printed += fprintf(fp, "\n");
+
+	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
+	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
+	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
+
+	resort_rb__for_each_entry(nd, syscall_stats) {
+		struct stats *stats = syscall_stats_entry->stats;
+		if (stats) {
+			double min = (double)(stats->min) / NSEC_PER_MSEC;
+			double max = (double)(stats->max) / NSEC_PER_MSEC;
+			double avg = avg_stats(stats);
+			double pct;
+			u64 n = (u64) stats->n;
+
+			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
+			avg /= NSEC_PER_MSEC;
+
+			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
+			printed += fprintf(fp, "   %-15s", sc->name);
+			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
+					   n, syscall_stats_entry->msecs, min, avg);
+			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
+		}
+	}
+
+	resort_rb__delete(syscall_stats);
+	printed += fprintf(fp, "\n\n");
+
+	return printed;
+}
+
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
+{
+	size_t printed = 0;
+	struct thread_trace *ttrace = thread__priv(thread);
+	double ratio;
+
+	if (ttrace == NULL)
+		return 0;
+
+	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
+
+	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
+	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
+	printed += fprintf(fp, "%.1f%%", ratio);
+	if (ttrace->pfmaj)
+		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
+	if (ttrace->pfmin)
+		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
+	if (trace->sched)
+		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+	else if (fputc('\n', fp) != EOF)
+		++printed;
+
+	printed += thread__dump_stats(ttrace, trace, fp);
+
+	return printed;
+}
+
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+	return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+	struct thread *thread;
+)
+{
+	entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
+{
+	size_t printed = trace__fprintf_threads_header(fp);
+	struct rb_node *nd;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
+
+		if (threads == NULL) {
+			fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+			return 0;
+		}
+
+		resort_rb__for_each_entry(nd, threads)
+			printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+
+		resort_rb__delete(threads);
+	}
+	return printed;
+}
+
+static int trace__set_duration(const struct option *opt, const char *str,
+			       int unset __maybe_unused)
+{
+	struct trace *trace = opt->value;
+
+	trace->duration_filter = atof(str);
+	return 0;
+}
+
+static int trace__set_filter_pids(const struct option *opt, const char *str,
+				  int unset __maybe_unused)
+{
+	int ret = -1;
+	size_t i;
+	struct trace *trace = opt->value;
+	/*
+	 * FIXME: introduce a intarray class, plain parse csv and create a
+	 * { int nr, int entries[] } struct...
+	 */
+	struct intlist *list = intlist__new(str);
+
+	if (list == NULL)
+		return -1;
+
+	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
+	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
+
+	if (trace->filter_pids.entries == NULL)
+		goto out;
+
+	trace->filter_pids.entries[0] = getpid();
+
+	for (i = 1; i < trace->filter_pids.nr; ++i)
+		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
+
+	intlist__delete(list);
+	ret = 0;
+out:
+	return ret;
+}
+
+static int trace__open_output(struct trace *trace, const char *filename)
+{
+	struct stat st;
+
+	if (!stat(filename, &st) && st.st_size) {
+		char oldname[PATH_MAX];
+
+		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
+		unlink(oldname);
+		rename(filename, oldname);
+	}
+
+	trace->output = fopen(filename, "w");
+
+	return trace->output == NULL ? -errno : 0;
+}
+
+static int parse_pagefaults(const struct option *opt, const char *str,
+			    int unset __maybe_unused)
+{
+	int *trace_pgfaults = opt->value;
+
+	if (strcmp(str, "all") == 0)
+		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
+	else if (strcmp(str, "maj") == 0)
+		*trace_pgfaults |= TRACE_PFMAJ;
+	else if (strcmp(str, "min") == 0)
+		*trace_pgfaults |= TRACE_PFMIN;
+	else
+		return -1;
+
+	return 0;
+}
+
+static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel->handler = handler;
+}
+
+/*
+ * XXX: Hackish, just splitting the combined -e+--event (syscalls
+ * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
+ * existing facilities unchanged (trace->ev_qualifier + parse_options()).
+ *
+ * It'd be better to introduce a parse_options() variant that would return a
+ * list with the terms it didn't match to an event...
+ */
+static int trace__parse_events_option(const struct option *opt, const char *str,
+				      int unset __maybe_unused)
+{
+	struct trace *trace = (struct trace *)opt->value;
+	const char *s = str;
+	char *sep = NULL, *lists[2] = { NULL, NULL, };
+	int len = strlen(str) + 1, err = -1, list, idx;
+	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
+	char group_name[PATH_MAX];
+
+	if (strace_groups_dir == NULL)
+		return -1;
+
+	if (*s == '!') {
+		++s;
+		trace->not_ev_qualifier = true;
+	}
+
+	while (1) {
+		if ((sep = strchr(s, ',')) != NULL)
+			*sep = '\0';
+
+		list = 0;
+		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
+		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
+			list = 1;
+		} else {
+			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
+			if (access(group_name, R_OK) == 0)
+				list = 1;
+		}
+
+		if (lists[list]) {
+			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
+		} else {
+			lists[list] = malloc(len);
+			if (lists[list] == NULL)
+				goto out;
+			strcpy(lists[list], s);
+		}
+
+		if (!sep)
+			break;
+
+		*sep = ',';
+		s = sep + 1;
+	}
+
+	if (lists[1] != NULL) {
+		struct strlist_config slist_config = {
+			.dirname = strace_groups_dir,
+		};
+
+		trace->ev_qualifier = strlist__new(lists[1], &slist_config);
+		if (trace->ev_qualifier == NULL) {
+			fputs("Not enough memory to parse event qualifier", trace->output);
+			goto out;
+		}
+
+		if (trace__validate_ev_qualifier(trace))
+			goto out;
+	}
+
+	err = 0;
+
+	if (lists[0]) {
+		struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
+					       "event selector. use 'perf list' to list available events",
+					       parse_events_option);
+		err = parse_events_option(&o, lists[0], 0);
+	}
+out:
+	if (sep)
+		*sep = ',';
+
+	return err;
+}
+
+static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
+{
+	struct trace *trace = opt->value;
+
+	if (!list_empty(&trace->evlist->entries))
+		return parse_cgroups(opt, str, unset);
+
+	trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
+
+	return 0;
+}
+
+int cmd_trace(int argc, const char **argv)
+{
+	const char *trace_usage[] = {
+		"perf trace [<options>] [<command>]",
+		"perf trace [<options>] -- <command> [<options>]",
+		"perf trace record [<options>] [<command>]",
+		"perf trace record [<options>] -- <command> [<options>]",
+		NULL
+	};
+	struct trace trace = {
+		.syscalls = {
+			. max = -1,
+		},
+		.opts = {
+			.target = {
+				.uid	   = UINT_MAX,
+				.uses_mmap = true,
+			},
+			.user_freq     = UINT_MAX,
+			.user_interval = ULLONG_MAX,
+			.no_buffering  = true,
+			.mmap_pages    = UINT_MAX,
+			.proc_map_timeout  = 500,
+		},
+		.output = stderr,
+		.show_comm = true,
+		.trace_syscalls = true,
+		.kernel_syscallchains = false,
+		.max_stack = UINT_MAX,
+	};
+	const char *output_name = NULL;
+	const struct option trace_options[] = {
+	OPT_CALLBACK('e', "event", &trace, "event",
+		     "event/syscall selector. use 'perf list' to list available events",
+		     trace__parse_events_option),
+	OPT_BOOLEAN(0, "comm", &trace.show_comm,
+		    "show the thread COMM next to its id"),
+	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
+	OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
+		     trace__parse_events_option),
+	OPT_STRING('o', "output", &output_name, "file", "output file name"),
+	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
+	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
+		    "trace events on existing process id"),
+	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
+		    "trace events on existing thread id"),
+	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
+		     "pids to filter (by the kernel)", trace__set_filter_pids),
+	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
+		    "system-wide collection from all CPUs"),
+	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
+		    "list of cpus to monitor"),
+	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
+		    "child tasks do not inherit counters"),
+	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
+		     "number of mmap data pages",
+		     perf_evlist__parse_mmap_pages),
+	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
+		   "user to profile"),
+	OPT_CALLBACK(0, "duration", &trace, "float",
+		     "show only events with duration > N.M ms",
+		     trace__set_duration),
+	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
+	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+	OPT_BOOLEAN('T', "time", &trace.full_time,
+		    "Show full timestamp, not time relative to first start"),
+	OPT_BOOLEAN(0, "failure", &trace.failure_only,
+		    "Show only syscalls that failed"),
+	OPT_BOOLEAN('s', "summary", &trace.summary_only,
+		    "Show only syscall summary with statistics"),
+	OPT_BOOLEAN('S', "with-summary", &trace.summary,
+		    "Show all syscalls and summary with statistics"),
+	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
+		     "Trace pagefaults", parse_pagefaults, "maj"),
+	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
+	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+	OPT_CALLBACK(0, "call-graph", &trace.opts,
+		     "record_mode[,record_size]", record_callchain_help,
+		     &record_parse_callchain_opt),
+	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+		    "Show the kernel callchains on the syscall exit path"),
+	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
+		     "Set the minimum stack depth when parsing the callchain, "
+		     "anything below the specified depth will be ignored."),
+	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
+		     "Set the maximum stack depth when parsing the callchain, "
+		     "anything beyond the specified depth will be ignored. "
+		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+	OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
+			"print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
+	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
+	OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
+		     trace__parse_cgroups),
+	OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+		     "ms to wait before starting measurement after program "
+		     "start"),
+	OPT_END()
+	};
+	bool __maybe_unused max_stack_user_set = true;
+	bool mmap_pages_user_set = true;
+	const char * const trace_subcommands[] = { "record", NULL };
+	int err;
+	char bf[BUFSIZ];
+
+	signal(SIGSEGV, sighandler_dump_stack);
+	signal(SIGFPE, sighandler_dump_stack);
+
+	trace.evlist = perf_evlist__new();
+	trace.sctbl = syscalltbl__new();
+
+	if (trace.evlist == NULL || trace.sctbl == NULL) {
+		pr_err("Not enough memory to run!\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
+				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
+		usage_with_options_msg(trace_usage, trace_options,
+				       "cgroup monitoring only available in system-wide mode");
+	}
+
+	err = bpf__setup_stdout(trace.evlist);
+	if (err) {
+		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+		goto out;
+	}
+
+	err = -1;
+
+	if (trace.trace_pgfaults) {
+		trace.opts.sample_address = true;
+		trace.opts.sample_time = true;
+	}
+
+	if (trace.opts.mmap_pages == UINT_MAX)
+		mmap_pages_user_set = false;
+
+	if (trace.max_stack == UINT_MAX) {
+		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
+		max_stack_user_set = false;
+	}
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
+		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+	}
+#endif
+
+	if (callchain_param.enabled) {
+		if (!mmap_pages_user_set && geteuid() == 0)
+			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
+
+		symbol_conf.use_callchain = true;
+	}
+
+	if (trace.evlist->nr_entries > 0)
+		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
+
+	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
+		return trace__record(&trace, argc-1, &argv[1]);
+
+	/* summary_only implies summary option, but don't overwrite summary if set */
+	if (trace.summary_only)
+		trace.summary = trace.summary_only;
+
+	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
+	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
+		pr_err("Please specify something to trace.\n");
+		return -1;
+	}
+
+	if (!trace.trace_syscalls && trace.ev_qualifier) {
+		pr_err("The -e option can't be used with --no-syscalls.\n");
+		goto out;
+	}
+
+	if (output_name != NULL) {
+		err = trace__open_output(&trace, output_name);
+		if (err < 0) {
+			perror("failed to create output file");
+			goto out;
+		}
+	}
+
+	trace.open_id = syscalltbl__id(trace.sctbl, "open");
+
+	err = target__validate(&trace.opts.target);
+	if (err) {
+		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
+		fprintf(trace.output, "%s", bf);
+		goto out_close;
+	}
+
+	err = target__parse_uid(&trace.opts.target);
+	if (err) {
+		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
+		fprintf(trace.output, "%s", bf);
+		goto out_close;
+	}
+
+	if (!argc && target__none(&trace.opts.target))
+		trace.opts.target.system_wide = true;
+
+	if (input_name)
+		err = trace__replay(&trace);
+	else
+		err = trace__run(&trace, argc, argv);
+
+out_close:
+	if (output_name != NULL)
+		fclose(trace.output);
+out:
+	return err;
+}
diff --git a/builtin-version.c b/builtin-version.c
new file mode 100644
index 0000000..50df168
--- /dev/null
+++ b/builtin-version.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+#include "color.h"
+#include <linux/compiler.h>
+#include <tools/config.h>
+#include <stdio.h>
+#include <string.h>
+#include <subcmd/parse-options.h>
+
+int version_verbose;
+
+struct version {
+	bool	build_options;
+};
+
+static struct version version;
+
+static struct option version_options[] = {
+	OPT_BOOLEAN(0, "build-options", &version.build_options,
+		    "display the build options"),
+};
+
+static const char * const version_usage[] = {
+	"perf version [<options>]",
+	NULL
+};
+
+static void on_off_print(const char *status)
+{
+	printf("[ ");
+
+	if (!strcmp(status, "OFF"))
+		color_fprintf(stdout, PERF_COLOR_RED, "%-3s", status);
+	else
+		color_fprintf(stdout, PERF_COLOR_GREEN, "%-3s", status);
+
+	printf(" ]");
+}
+
+static void status_print(const char *name, const char *macro,
+			 const char *status)
+{
+	printf("%22s: ", name);
+	on_off_print(status);
+	printf("  # %s\n", macro);
+}
+
+#define STATUS(__d, __m)				\
+do {							\
+	if (IS_BUILTIN(__d))				\
+		status_print(#__m, #__d, "on");		\
+	else						\
+		status_print(#__m, #__d, "OFF");	\
+} while (0)
+
+static void library_status(void)
+{
+	STATUS(HAVE_DWARF_SUPPORT, dwarf);
+	STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
+	STATUS(HAVE_GLIBC_SUPPORT, glibc);
+	STATUS(HAVE_GTK2_SUPPORT, gtk2);
+#ifndef HAVE_SYSCALL_TABLE_SUPPORT
+	STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
+#endif
+	STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table);
+	STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
+	STATUS(HAVE_LIBELF_SUPPORT, libelf);
+	STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
+	STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus);
+	STATUS(HAVE_LIBPERL_SUPPORT, libperl);
+	STATUS(HAVE_LIBPYTHON_SUPPORT, libpython);
+	STATUS(HAVE_SLANG_SUPPORT, libslang);
+	STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto);
+	STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind);
+	STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind);
+	STATUS(HAVE_ZLIB_SUPPORT, zlib);
+	STATUS(HAVE_LZMA_SUPPORT, lzma);
+	STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
+	STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+}
+
+int cmd_version(int argc, const char **argv)
+{
+	argc = parse_options(argc, argv, version_options, version_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
+	printf("perf version %s\n", perf_version_string);
+
+	if (version.build_options || version_verbose == 1)
+		library_status();
+
+	return 0;
+}
diff --git a/builtin.h b/builtin.h
new file mode 100644
index 0000000..05745f3
--- /dev/null
+++ b/builtin.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUILTIN_H
+#define BUILTIN_H
+
+#include "util/util.h"
+
+extern const char perf_usage_string[];
+extern const char perf_more_info_string[];
+
+void list_common_cmds_help(void);
+const char *help_unknown_cmd(const char *cmd);
+
+int cmd_annotate(int argc, const char **argv);
+int cmd_bench(int argc, const char **argv);
+int cmd_buildid_cache(int argc, const char **argv);
+int cmd_buildid_list(int argc, const char **argv);
+int cmd_config(int argc, const char **argv);
+int cmd_c2c(int argc, const char **argv);
+int cmd_diff(int argc, const char **argv);
+int cmd_evlist(int argc, const char **argv);
+int cmd_help(int argc, const char **argv);
+int cmd_sched(int argc, const char **argv);
+int cmd_kallsyms(int argc, const char **argv);
+int cmd_list(int argc, const char **argv);
+int cmd_record(int argc, const char **argv);
+int cmd_report(int argc, const char **argv);
+int cmd_stat(int argc, const char **argv);
+int cmd_timechart(int argc, const char **argv);
+int cmd_top(int argc, const char **argv);
+int cmd_script(int argc, const char **argv);
+int cmd_version(int argc, const char **argv);
+int cmd_probe(int argc, const char **argv);
+int cmd_kmem(int argc, const char **argv);
+int cmd_lock(int argc, const char **argv);
+int cmd_kvm(int argc, const char **argv);
+int cmd_test(int argc, const char **argv);
+int cmd_trace(int argc, const char **argv);
+int cmd_inject(int argc, const char **argv);
+int cmd_mem(int argc, const char **argv);
+int cmd_data(int argc, const char **argv);
+int cmd_ftrace(int argc, const char **argv);
+
+int find_scripts(char **scripts_array, char **scripts_path_array);
+#endif
diff --git a/check-headers.sh b/check-headers.sh
new file mode 100755
index 0000000..9aff89b
--- /dev/null
+++ b/check-headers.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+HEADERS='
+include/uapi/drm/drm.h
+include/uapi/drm/i915_drm.h
+include/uapi/linux/fcntl.h
+include/uapi/linux/kcmp.h
+include/uapi/linux/kvm.h
+include/uapi/linux/perf_event.h
+include/uapi/linux/prctl.h
+include/uapi/linux/sched.h
+include/uapi/linux/stat.h
+include/uapi/linux/vhost.h
+include/uapi/sound/asound.h
+include/linux/hash.h
+include/uapi/linux/hw_breakpoint.h
+arch/x86/include/asm/disabled-features.h
+arch/x86/include/asm/required-features.h
+arch/x86/include/asm/cpufeatures.h
+arch/arm/include/uapi/asm/perf_regs.h
+arch/arm64/include/uapi/asm/perf_regs.h
+arch/powerpc/include/uapi/asm/perf_regs.h
+arch/s390/include/uapi/asm/perf_regs.h
+arch/x86/include/uapi/asm/perf_regs.h
+arch/x86/include/uapi/asm/kvm.h
+arch/x86/include/uapi/asm/kvm_perf.h
+arch/x86/include/uapi/asm/svm.h
+arch/x86/include/uapi/asm/unistd.h
+arch/x86/include/uapi/asm/vmx.h
+arch/powerpc/include/uapi/asm/kvm.h
+arch/s390/include/uapi/asm/kvm.h
+arch/s390/include/uapi/asm/kvm_perf.h
+arch/s390/include/uapi/asm/ptrace.h
+arch/s390/include/uapi/asm/sie.h
+arch/arm/include/uapi/asm/kvm.h
+arch/arm64/include/uapi/asm/kvm.h
+arch/alpha/include/uapi/asm/errno.h
+arch/mips/include/asm/errno.h
+arch/mips/include/uapi/asm/errno.h
+arch/parisc/include/uapi/asm/errno.h
+arch/powerpc/include/uapi/asm/errno.h
+arch/sparc/include/uapi/asm/errno.h
+arch/x86/include/uapi/asm/errno.h
+arch/powerpc/include/uapi/asm/unistd.h
+include/asm-generic/bitops/arch_hweight.h
+include/asm-generic/bitops/const_hweight.h
+include/asm-generic/bitops/__fls.h
+include/asm-generic/bitops/fls.h
+include/asm-generic/bitops/fls64.h
+include/linux/coresight-pmu.h
+include/uapi/asm-generic/errno.h
+include/uapi/asm-generic/errno-base.h
+include/uapi/asm-generic/ioctls.h
+include/uapi/asm-generic/mman-common.h
+'
+
+check () {
+  file=$1
+
+  shift
+  opts=
+  while [ -n "$*" ]; do
+    opts="$opts \"$1\""
+    shift
+  done
+
+  cmd="diff $opts ../$file ../../$file > /dev/null"
+
+  test -f ../../$file &&
+  eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2
+}
+
+
+# Check if we have the kernel headers (tools/perf/../../include), else
+# we're probably on a detached tarball, so no point in trying to check
+# differences.
+test -d ../../include || exit 0
+
+# simple diff check
+for i in $HEADERS; do
+  check $i -B
+done
+
+# diff with extra ignore lines
+check arch/x86/lib/memcpy_64.S        -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
+check arch/x86/lib/memset_64.S        -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
+check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>"
+check include/uapi/linux/mman.h       -I "^#include <\(uapi/\)*asm/mman.h>"
diff --git a/command-list.txt b/command-list.txt
new file mode 100644
index 0000000..2d0caf2
--- /dev/null
+++ b/command-list.txt
@@ -0,0 +1,32 @@
+#
+# List of known perf commands.
+# command name			category [deprecated] [common]
+#
+perf-annotate			mainporcelain common
+perf-archive			mainporcelain common
+perf-bench			mainporcelain common
+perf-buildid-cache		mainporcelain common
+perf-buildid-list		mainporcelain common
+perf-data			mainporcelain common
+perf-diff			mainporcelain common
+perf-c2c			mainporcelain common
+perf-config			mainporcelain common
+perf-evlist			mainporcelain common
+perf-ftrace			mainporcelain common
+perf-inject			mainporcelain common
+perf-kallsyms			mainporcelain common
+perf-kmem			mainporcelain common
+perf-kvm			mainporcelain common
+perf-list			mainporcelain common
+perf-lock			mainporcelain common
+perf-mem			mainporcelain common
+perf-probe			mainporcelain full
+perf-record			mainporcelain common
+perf-report			mainporcelain common
+perf-sched			mainporcelain common
+perf-script			mainporcelain common
+perf-stat			mainporcelain common
+perf-test			mainporcelain common
+perf-timechart			mainporcelain common
+perf-top			mainporcelain common
+perf-trace			mainporcelain audit
diff --git a/design.txt b/design.txt
new file mode 100644
index 0000000..a28dca2
--- /dev/null
+++ b/design.txt
@@ -0,0 +1,462 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those.  It
+provides "virtual" 64-bit counters, regardless of the width of the
+underlying hardware counters.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the sys_perf_event_open()
+system call:
+
+   int sys_perf_event_open(struct perf_event_attr *hw_event_uptr,
+			     pid_t pid, int cpu, int group_fd,
+			     unsigned long flags);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'perf_event_attr' is:
+
+struct perf_event_attr {
+        /*
+         * The MSB of the config word signifies if the rest contains cpu
+         * specific (raw) counter configuration data, if unset, the next
+         * 7 bits are an event type and the rest of the bits are the event
+         * identifier.
+         */
+        __u64                   config;
+
+        __u64                   irq_period;
+        __u32                   record_type;
+        __u32                   read_format;
+
+        __u64                   disabled       :  1, /* off by default        */
+                                inherit        :  1, /* children inherit it   */
+                                pinned         :  1, /* must always be on PMU */
+                                exclusive      :  1, /* only group on PMU     */
+                                exclude_user   :  1, /* don't count user      */
+                                exclude_kernel :  1, /* ditto kernel          */
+                                exclude_hv     :  1, /* ditto hypervisor      */
+                                exclude_idle   :  1, /* don't count when idle */
+                                mmap           :  1, /* include mmap data     */
+                                munmap         :  1, /* include munmap data   */
+                                comm           :  1, /* include comm data     */
+
+                                __reserved_1   : 52;
+
+        __u32                   extra_config_len;
+        __u32                   wakeup_events;  /* wakeup every n events */
+
+        __u64                   __reserved_2;
+        __u64                   __reserved_3;
+};
+
+The 'config' field specifies what the counter should count.  It
+is divided into 3 bit-fields:
+
+raw_type: 1 bit   (most significant bit)	0x8000_0000_0000_0000
+type:	  7 bits  (next most significant)	0x7f00_0000_0000_0000
+event_id: 56 bits (least significant)		0x00ff_ffff_ffff_ffff
+
+If 'raw_type' is 1, then the counter will count a hardware event
+specified by the remaining 63 bits of event_config.  The encoding is
+machine-specific.
+
+If 'raw_type' is 0, then the 'type' field says what kind of counter
+this is, with the following encoding:
+
+enum perf_type_id {
+	PERF_TYPE_HARDWARE		= 0,
+	PERF_TYPE_SOFTWARE		= 1,
+	PERF_TYPE_TRACEPOINT		= 2,
+};
+
+A counter of PERF_TYPE_HARDWARE will count the hardware event
+specified by 'event_id':
+
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
+ * parameter of the sys_perf_event_open() syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+};
+
+These are standardized types of events that work relatively uniformly
+on all CPUs that implement Performance Counters support under Linux,
+although there may be variations (e.g., different CPUs might count
+cache references and misses at different levels of the cache hierarchy).
+If a CPU is not able to count the selected event, then the system call
+will return -EINVAL.
+
+More hw_event_types are supported as well, but they are CPU-specific
+and accessed as raw events.  For example, to count "External bus
+cycles while bus lock signal asserted" events on Intel Core CPUs, pass
+in a 0x4064 event_id value and set hw_event.raw_type to 1.
+
+A counter of type PERF_TYPE_SOFTWARE will count one of the available
+software events, selected by 'event_id':
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK		= 0,
+	PERF_COUNT_SW_TASK_CLOCK	= 1,
+	PERF_COUNT_SW_PAGE_FAULTS	= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS	= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS	= 8,
+};
+
+Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
+tracer is available, and event_id values can be obtained from
+/debug/tracing/events/*/*/id
+
+
+Counters come in two flavours: counting counters and sampling
+counters.  A "counting" counter is one that is used for counting the
+number of events that occur, and is characterised by having
+irq_period = 0.
+
+
+A read() on a counter returns the current value of the counter and possible
+additional values as specified by 'read_format', each value is a u64 (8 bytes)
+in size.
+
+/*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_event_read_format {
+        PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
+        PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
+};
+
+Using these additional values one can establish the overcommit ratio for a
+particular counter allowing one to take the round-robin scheduling effect
+into account.
+
+
+A "sampling" counter is one that is set up to generate an interrupt
+every N events, where N is given by 'irq_period'.  A sampling counter
+has irq_period > 0. The record_type controls what data is recorded on each
+interrupt:
+
+/*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_record_format {
+        PERF_RECORD_IP          = 1U << 0,
+        PERF_RECORD_TID         = 1U << 1,
+        PERF_RECORD_TIME        = 1U << 2,
+        PERF_RECORD_ADDR        = 1U << 3,
+        PERF_RECORD_GROUP       = 1U << 4,
+        PERF_RECORD_CALLCHAIN   = 1U << 5,
+};
+
+Such (and other) events will be recorded in a ring-buffer, which is
+available to user-space using mmap() (see below).
+
+The 'disabled' bit specifies whether the counter starts out disabled
+or enabled.  If it is initially disabled, it can be enabled by ioctl
+or prctl (see below).
+
+The 'inherit' bit, if set, specifies that this counter should count
+events on descendant tasks as well as the task specified.  This only
+applies to new descendents, not to any existing descendents at the
+time the counter is created (nor to any new descendents of existing
+descendents).
+
+The 'pinned' bit, if set, specifies that the counter should always be
+on the CPU if at all possible.  It only applies to hardware counters
+and only to group leaders.  If a pinned counter cannot be put onto the
+CPU (e.g. because there are not enough hardware counters or because of
+a conflict with some other event), then the counter goes into an
+'error' state, where reads return end-of-file (i.e. read() returns 0)
+until the counter is subsequently enabled or disabled.
+
+The 'exclusive' bit, if set, specifies that when this counter's group
+is on the CPU, it should be the only group using the CPU's counters.
+In future, this will allow sophisticated monitoring programs to supply
+extra configuration information via 'extra_config_len' to exploit
+advanced features of the CPU's Performance Monitor Unit (PMU) that are
+not otherwise accessible and that might disrupt other hardware
+counters.
+
+The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
+way to request that counting of events be restricted to times when the
+CPU is in user, kernel and/or hypervisor mode.
+
+The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
+operations, these can be used to relate userspace IP addresses to actual
+code, even after the mapping (or even the whole process) is gone,
+these events are recorded in the ring-buffer (see below).
+
+The 'comm' bit allows tracking of process comm data on process creation.
+This too is recorded in the ring-buffer (see below).
+
+The 'pid' parameter to the sys_perf_event_open() system call allows the
+counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
+The 'flags' parameter is currently unused and must be zero.
+
+The 'group_fd' parameter allows counter "groups" to be set up.  A
+counter group has one counter which is the group "leader".  The leader
+is created first, with group_fd = -1 in the sys_perf_event_open call
+that creates it.  The rest of the group members are created
+subsequently, with group_fd giving the fd of the group leader.
+(A single counter on its own is created with group_fd = -1 and is
+considered to be a group with only 1 member.)
+
+A counter group is scheduled onto the CPU as a unit, that is, it will
+only be put onto the CPU if all of the counters in the group can be
+put onto the CPU.  This means that the values of the member counters
+can be meaningfully compared, added, divided (to get ratios), etc.,
+with each other, since they have counted events for the same set of
+executed instructions.
+
+
+Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
+tracking are logged into a ring-buffer. This ring-buffer is created and
+accessed through mmap().
+
+The mmap size should be 1+2^n pages, where the first page is a meta-data page
+(struct perf_event_mmap_page) that contains various bits of information such
+as where the ring-buffer head is.
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+        __u32   version;                /* version number of this structure */
+        __u32   compat_version;         /* lowest version this is compat with */
+
+        /*
+         * Bits needed to read the hw counters in user-space.
+         *
+         *   u32 seq;
+         *   s64 count;
+         *
+         *   do {
+         *     seq = pc->lock;
+         *
+         *     barrier()
+         *     if (pc->index) {
+         *       count = pmc_read(pc->index - 1);
+         *       count += pc->offset;
+         *     } else
+         *       goto regular_read;
+         *
+         *     barrier();
+         *   } while (pc->lock != seq);
+         *
+         * NOTE: for obvious reason this only works on self-monitoring
+         *       processes.
+         */
+        __u32   lock;                   /* seqlock for synchronization */
+        __u32   index;                  /* hardware counter identifier */
+        __s64   offset;                 /* add to hardware counter value */
+
+        /*
+         * Control data for the mmap() data buffer.
+         *
+         * User-space reading this value should issue an rmb(), on SMP capable
+         * platforms, after reading this value -- see perf_event_wakeup().
+         */
+        __u32   data_head;              /* head in the data section */
+};
+
+NOTE: the hw-counter userspace bits are arch specific and are currently only
+      implemented on powerpc.
+
+The following 2^n pages are the ring-buffer which contains events of the form:
+
+#define PERF_RECORD_MISC_KERNEL          (1 << 0)
+#define PERF_RECORD_MISC_USER            (1 << 1)
+#define PERF_RECORD_MISC_OVERFLOW        (1 << 2)
+
+struct perf_event_header {
+        __u32   type;
+        __u16   misc;
+        __u16   size;
+};
+
+enum perf_event_type {
+
+        /*
+         * The MMAP events record the PROT_EXEC mappings so that we can
+         * correlate userspace IPs to code. They have the following structure:
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      u64                             addr;
+         *      u64                             len;
+         *      u64                             pgoff;
+         *      char                            filename[];
+         * };
+         */
+        PERF_RECORD_MMAP                 = 1,
+        PERF_RECORD_MUNMAP               = 2,
+
+        /*
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      u32                             pid, tid;
+         *      char                            comm[];
+         * };
+         */
+        PERF_RECORD_COMM                 = 3,
+
+        /*
+         * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field
+         * will be PERF_RECORD_*
+         *
+         * struct {
+         *      struct perf_event_header        header;
+         *
+         *      { u64                   ip;       } && PERF_RECORD_IP
+         *      { u32                   pid, tid; } && PERF_RECORD_TID
+         *      { u64                   time;     } && PERF_RECORD_TIME
+         *      { u64                   addr;     } && PERF_RECORD_ADDR
+         *
+         *      { u64                   nr;
+         *        { u64 event, val; }   cnt[nr];  } && PERF_RECORD_GROUP
+         *
+         *      { u16                   nr,
+         *                              hv,
+         *                              kernel,
+         *                              user;
+         *        u64                   ips[nr];  } && PERF_RECORD_CALLCHAIN
+         * };
+         */
+};
+
+NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
+      on x86.
+
+Notification of new events is possible through poll()/select()/epoll() and
+fcntl() managing signals.
+
+Normally a notification is generated for every page filled, however one can
+additionally set perf_event_attr.wakeup_events to generate one every
+so many counter overflow events.
+
+Future work will include a splice() interface to the ring-buffer.
+
+
+Counters can be enabled and disabled in two ways: via ioctl and via
+prctl.  When a counter is disabled, it doesn't count or generate
+events but does continue to exist and maintain its count value.
+
+An individual counter can be enabled with
+
+	ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+or disabled with
+
+	ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+
+For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument.
+Enabling or disabling the leader of a group enables or disables the
+whole group; that is, while the group leader is disabled, none of the
+counters in the group will count.  Enabling or disabling a member of a
+group other than the leader only affects that counter - disabling an
+non-leader stops that counter from counting but doesn't affect any
+other counter.
+
+Additionally, non-inherited overflow counters can use
+
+	ioctl(fd, PERF_EVENT_IOC_REFRESH, nr);
+
+to enable a counter for 'nr' events, after which it gets disabled again.
+
+A process can enable or disable all the counter groups that are
+attached to it, using prctl:
+
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+This applies to all counters on the current process, whether created
+by this process or by another, and doesn't affect any counters that
+this process has created on other processes.  It only enables or
+disables the group leaders, not any other members in the groups.
+
+
+Arch requirements
+-----------------
+
+If your architecture does not have hardware performance metrics, you can
+still use the generic software counters based on hrtimers for sampling.
+
+So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
+will need at least this:
+	- asm/perf_event.h - a basic stub will suffice at first
+	- support for atomic64 types (and associated helper functions)
+
+If your architecture does have hardware capabilities, you can override the
+weak stub hw_perf_event_init() to register hardware counters.
+
+Architectures that have d-cache aliassing issues, such as Sparc and ARM,
+should select PERF_USE_VMALLOC in order to avoid these for perf mmap().
diff --git a/jvmti/Build b/jvmti/Build
new file mode 100644
index 0000000..eaeb8cb
--- /dev/null
+++ b/jvmti/Build
@@ -0,0 +1,8 @@
+jvmti-y += libjvmti.o
+jvmti-y += jvmti_agent.o
+
+CFLAGS_jvmti         = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
+CFLAGS_REMOVE_jvmti  = -Wmissing-declarations
+CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
+CFLAGS_REMOVE_jvmti += -Wextra
+CFLAGS_REMOVE_jvmti += -Wwrite-strings
diff --git a/jvmti/jvmti_agent.c b/jvmti/jvmti_agent.c
new file mode 100644
index 0000000..0c6d100
--- /dev/null
+++ b/jvmti/jvmti_agent.c
@@ -0,0 +1,466 @@
+/*
+ * jvmti_agent.c: JVMTI agent interface
+ *
+ * Adapted from the Oprofile code in opagent.c:
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#include <sys/types.h>
+#include <sys/stat.h> /* for mkdir() */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <syscall.h> /* for gettid() */
+#include <err.h>
+
+#include "jvmti_agent.h"
+#include "../util/jitdump.h"
+
+#define JIT_LANG "java"
+
+static char jit_path[PATH_MAX];
+static void *marker_addr;
+
+static inline pid_t gettid(void)
+{
+	return (pid_t)syscall(__NR_gettid);
+}
+
+static int get_e_machine(struct jitheader *hdr)
+{
+	ssize_t sret;
+	char id[16];
+	int fd, ret = -1;
+	struct {
+		uint16_t e_type;
+		uint16_t e_machine;
+	} info;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	sret = read(fd, id, sizeof(id));
+	if (sret != sizeof(id))
+		goto error;
+
+	/* check ELF signature */
+	if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F')
+		goto error;
+
+	sret = read(fd, &info, sizeof(info));
+	if (sret != sizeof(info))
+		goto error;
+
+	hdr->elf_mach = info.e_machine;
+	ret = 0;
+error:
+	close(fd);
+	return ret;
+}
+
+static int use_arch_timestamp;
+
+static inline uint64_t
+get_arch_timestamp(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+	unsigned int low, high;
+
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+	return low | ((uint64_t)high) << 32;
+#else
+	return 0;
+#endif
+}
+
+#define NSEC_PER_SEC	1000000000
+static int perf_clk_id = CLOCK_MONOTONIC;
+
+static inline uint64_t
+timespec_to_ns(const struct timespec *ts)
+{
+        return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline uint64_t
+perf_get_timestamp(void)
+{
+	struct timespec ts;
+	int ret;
+
+	if (use_arch_timestamp)
+		return get_arch_timestamp();
+
+	ret = clock_gettime(perf_clk_id, &ts);
+	if (ret)
+		return 0;
+
+	return timespec_to_ns(&ts);
+}
+
+static int
+debug_cache_init(void)
+{
+	char str[32];
+	char *base, *p;
+	struct tm tm;
+	time_t t;
+	int ret;
+
+	time(&t);
+	localtime_r(&t, &tm);
+
+	base = getenv("JITDUMPDIR");
+	if (!base)
+		base = getenv("HOME");
+	if (!base)
+		base = ".";
+
+	strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
+
+	ret = mkdir(jit_path, 0755);
+	if (ret == -1) {
+		if (errno != EEXIST) {
+			warn("jvmti: cannot create jit cache dir %s", jit_path);
+			return -1;
+		}
+	}
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+	ret = mkdir(jit_path, 0755);
+	if (ret == -1) {
+		if (errno != EEXIST) {
+			warn("cannot create jit cache dir %s", jit_path);
+			return -1;
+		}
+	}
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+
+	p = mkdtemp(jit_path);
+	if (p != jit_path) {
+		warn("cannot create jit cache dir %s", jit_path);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+perf_open_marker_file(int fd)
+{
+	long pgsz;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	if (pgsz == -1)
+		return -1;
+
+	/*
+	 * we mmap the jitdump to create an MMAP RECORD in perf.data file.
+	 * The mmap is captured either live (perf record running when we mmap)
+	 * or  in deferred mode, via /proc/PID/maps
+	 * the MMAP record is used as a marker of a jitdump file for more meta
+	 * data info about the jitted code. Perf report/annotate detect this
+	 * special filename and process the jitdump file.
+	 *
+	 * mapping must be PROT_EXEC to ensure it is captured by perf record
+	 * even when not using -d option
+	 */
+	marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0);
+	return (marker_addr == MAP_FAILED) ? -1 : 0;
+}
+
+static void
+perf_close_marker_file(void)
+{
+	long pgsz;
+
+	if (!marker_addr)
+		return;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	if (pgsz == -1)
+		return;
+
+	munmap(marker_addr, pgsz);
+}
+
+static void
+init_arch_timestamp(void)
+{
+	char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
+
+	if (!str || !*str || !strcmp(str, "0"))
+		return;
+
+	use_arch_timestamp = 1;
+}
+
+void *jvmti_open(void)
+{
+	char dump_path[PATH_MAX];
+	struct jitheader header;
+	int fd;
+	FILE *fp;
+
+	init_arch_timestamp();
+
+	/*
+	 * check if clockid is supported
+	 */
+	if (!perf_get_timestamp()) {
+		if (use_arch_timestamp)
+			warnx("jvmti: arch timestamp not supported");
+		else
+			warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+	}
+
+	memset(&header, 0, sizeof(header));
+
+	debug_cache_init();
+
+	/*
+	 * jitdump file name
+	 */
+	snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+
+	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
+	if (fd == -1)
+		return NULL;
+
+	/*
+	 * create perf.data maker for the jitdump file
+	 */
+	if (perf_open_marker_file(fd)) {
+		warnx("jvmti: failed to create marker file");
+		return NULL;
+	}
+
+	fp = fdopen(fd, "w+");
+	if (!fp) {
+		warn("jvmti: cannot create %s", dump_path);
+		close(fd);
+		goto error;
+	}
+
+	warnx("jvmti: jitdump in %s", dump_path);
+
+	if (get_e_machine(&header)) {
+		warn("get_e_machine failed\n");
+		goto error;
+	}
+
+	header.magic      = JITHEADER_MAGIC;
+	header.version    = JITHEADER_VERSION;
+	header.total_size = sizeof(header);
+	header.pid        = getpid();
+
+	header.timestamp = perf_get_timestamp();
+
+	if (use_arch_timestamp)
+		header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
+	if (!fwrite(&header, sizeof(header), 1, fp)) {
+		warn("jvmti: cannot write dumpfile header");
+		goto error;
+	}
+	return fp;
+error:
+	fclose(fp);
+	return NULL;
+}
+
+int
+jvmti_close(void *agent)
+{
+	struct jr_code_close rec;
+	FILE *fp = agent;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in close_agent");
+		return -1;
+	}
+
+	rec.p.id = JIT_CODE_CLOSE;
+	rec.p.total_size = sizeof(rec);
+
+	rec.p.timestamp = perf_get_timestamp();
+
+	if (!fwrite(&rec, sizeof(rec), 1, fp))
+		return -1;
+
+	fclose(fp);
+
+	fp = NULL;
+
+	perf_close_marker_file();
+
+	return 0;
+}
+
+int
+jvmti_write_code(void *agent, char const *sym,
+	uint64_t vma, void const *code, unsigned int const size)
+{
+	static int code_generation = 1;
+	struct jr_code_load rec;
+	size_t sym_len;
+	FILE *fp = agent;
+	int ret = -1;
+
+	/* don't care about 0 length function, no samples */
+	if (size == 0)
+		return 0;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in write_native_code");
+		return -1;
+	}
+
+	sym_len = strlen(sym) + 1;
+
+	rec.p.id           = JIT_CODE_LOAD;
+	rec.p.total_size   = sizeof(rec) + sym_len;
+	rec.p.timestamp    = perf_get_timestamp();
+
+	rec.code_size  = size;
+	rec.vma        = vma;
+	rec.code_addr  = vma;
+	rec.pid	       = getpid();
+	rec.tid	       = gettid();
+
+	if (code)
+		rec.p.total_size += size;
+
+	/*
+	 * If JVM is multi-threaded, nultiple concurrent calls to agent
+	 * may be possible, so protect file writes
+	 */
+	flockfile(fp);
+
+	/*
+	 * get code index inside lock to avoid race condition
+	 */
+	rec.code_index = code_generation++;
+
+	ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+	fwrite_unlocked(sym, sym_len, 1, fp);
+
+	if (code)
+		fwrite_unlocked(code, size, 1, fp);
+
+	funlockfile(fp);
+
+	ret = 0;
+
+	return ret;
+}
+
+int
+jvmti_write_debug_info(void *agent, uint64_t code,
+    int nr_lines, jvmti_line_info_t *li,
+    const char * const * file_names)
+{
+	struct jr_code_debug_info rec;
+	size_t sret, len, size, flen = 0;
+	uint64_t addr;
+	FILE *fp = agent;
+	int i;
+
+	/*
+	 * no entry to write
+	 */
+	if (!nr_lines)
+		return 0;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in write_debug_info");
+		return -1;
+	}
+
+	for (i = 0; i < nr_lines; ++i) {
+	    flen += strlen(file_names[i]) + 1;
+	}
+
+	rec.p.id        = JIT_CODE_DEBUG_INFO;
+	size            = sizeof(rec);
+	rec.p.timestamp = perf_get_timestamp();
+	rec.code_addr   = (uint64_t)(uintptr_t)code;
+	rec.nr_entry    = nr_lines;
+
+	/*
+	 * on disk source line info layout:
+	 * uint64_t : addr
+	 * int      : line number
+	 * int      : column discriminator
+	 * file[]   : source file name
+	 */
+	size += nr_lines * sizeof(struct debug_entry);
+	size += flen;
+	rec.p.total_size = size;
+
+	/*
+	 * If JVM is multi-threaded, nultiple concurrent calls to agent
+	 * may be possible, so protect file writes
+	 */
+	flockfile(fp);
+
+	sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+	if (sret != 1)
+		goto error;
+
+	for (i = 0; i < nr_lines; i++) {
+
+		addr = (uint64_t)li[i].pc;
+		len  = sizeof(addr);
+		sret = fwrite_unlocked(&addr, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		len  = sizeof(li[0].line_number);
+		sret = fwrite_unlocked(&li[i].line_number, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		len  = sizeof(li[0].discrim);
+		sret = fwrite_unlocked(&li[i].discrim, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
+		if (sret != 1)
+			goto error;
+	}
+	funlockfile(fp);
+	return 0;
+error:
+	funlockfile(fp);
+	return -1;
+}
diff --git a/jvmti/jvmti_agent.h b/jvmti/jvmti_agent.h
new file mode 100644
index 0000000..6ed82f6
--- /dev/null
+++ b/jvmti/jvmti_agent.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JVMTI_AGENT_H__
+#define __JVMTI_AGENT_H__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <jvmti.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct {
+	unsigned long	pc;
+	int		line_number;
+	int		discrim; /* discriminator -- 0 for now */
+	jmethodID	methodID;
+} jvmti_line_info_t;
+
+void *jvmti_open(void);
+int   jvmti_close(void *agent);
+int   jvmti_write_code(void *agent, char const *symbol_name,
+		       uint64_t vma, void const *code,
+		       const unsigned int code_size);
+
+int   jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
+			     jvmti_line_info_t *li,
+			     const char * const * file_names);
+
+#if defined(__cplusplus)
+}
+
+#endif
+#endif /* __JVMTI_H__ */
diff --git a/jvmti/libjvmti.c b/jvmti/libjvmti.c
new file mode 100644
index 0000000..6add3e9
--- /dev/null
+++ b/jvmti/libjvmti.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <jvmti.h>
+#include <jvmticmlr.h>
+#include <limits.h>
+
+#include "jvmti_agent.h"
+
+static int has_line_numbers;
+void *jvmti_agent;
+
+static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
+{
+	char *err_msg = NULL;
+	jvmtiError err;
+	err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg);
+	if (err == JVMTI_ERROR_NONE) {
+		warnx("%s failed with %s", msg, err_msg);
+		(*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
+	} else {
+		warnx("%s failed with an unknown error %d", msg, ret);
+	}
+}
+
+static jvmtiError
+do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+		    jvmti_line_info_t *tab, jint *nr)
+{
+	jint i, lines = 0;
+	jint nr_lines = 0;
+	jvmtiLineNumberEntry *loc_tab = NULL;
+	jvmtiError ret;
+
+	ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetLineNumberTable", ret);
+		return ret;
+	}
+
+	for (i = 0; i < nr_lines; i++) {
+		if (loc_tab[i].start_location < bci) {
+			tab[lines].pc = (unsigned long)pc;
+			tab[lines].line_number = loc_tab[i].line_number;
+			tab[lines].discrim = 0; /* not yet used */
+			tab[lines].methodID = m;
+			lines++;
+		} else {
+			break;
+		}
+	}
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
+	*nr = lines;
+	return JVMTI_ERROR_NONE;
+}
+
+static jvmtiError
+get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines)
+{
+	const jvmtiCompiledMethodLoadRecordHeader *hdr;
+	jvmtiCompiledMethodLoadInlineRecord *rec;
+	jvmtiLineNumberEntry *lne = NULL;
+	PCStackInfo *c;
+	jint nr, ret;
+	int nr_total = 0;
+	int i, lines_total = 0;
+
+	if (!(tab && nr_lines))
+		return JVMTI_ERROR_NULL_POINTER;
+
+	/*
+	 * Phase 1 -- get the number of lines necessary
+	 */
+	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+			for (i = 0; i < rec->numpcs; i++) {
+				c = rec->pcinfo + i;
+				nr = 0;
+				/*
+				 * unfortunately, need a tab to get the number of lines!
+				 */
+				ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
+				if (ret == JVMTI_ERROR_NONE) {
+					/* free what was allocated for nothing */
+					(*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
+					nr_total += (int)nr;
+				} else {
+					print_error(jvmti, "GetLineNumberTable", ret);
+				}
+			}
+		}
+	}
+
+	if (nr_total == 0)
+		return JVMTI_ERROR_NOT_FOUND;
+
+	/*
+	 * Phase 2 -- allocate big enough line table
+	 */
+	*tab = malloc(nr_total * sizeof(**tab));
+	if (!*tab)
+		return JVMTI_ERROR_OUT_OF_MEMORY;
+
+	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+			for (i = 0; i < rec->numpcs; i++) {
+				c = rec->pcinfo + i;
+				nr = 0;
+				ret = do_get_line_numbers(jvmti, c->pc,
+							  c->methods[0],
+							  c->bcis[0],
+							  *tab + lines_total,
+							  &nr);
+				if (ret == JVMTI_ERROR_NONE)
+					lines_total += nr;
+			}
+		}
+	}
+	*nr_lines = lines_total;
+	return JVMTI_ERROR_NONE;
+}
+
+static void
+copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
+{
+	/*
+	* Assume path name is class hierarchy, this is a common practice with Java programs
+	*/
+	if (*class_sign == 'L') {
+		int j, i = 0;
+		char *p = strrchr(class_sign, '/');
+		if (p) {
+			/* drop the 'L' prefix and copy up to the final '/' */
+			for (i = 0; i < (p - class_sign); i++)
+				result[i] = class_sign[i+1];
+		}
+		/*
+		* append file name, we use loops and not string ops to avoid modifying
+		* class_sign which is used later for the symbol name
+		*/
+		for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
+			result[i] = file_name[j];
+
+		result[i] = '\0';
+	} else {
+		/* fallback case */
+		size_t file_name_len = strlen(file_name);
+		strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
+	}
+}
+
+static jvmtiError
+get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
+{
+	jvmtiError ret;
+	jclass decl_class;
+	char *file_name = NULL;
+	char *class_sign = NULL;
+	char fn[PATH_MAX];
+	size_t len;
+
+	ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetMethodDeclaringClass", ret);
+		return ret;
+	}
+
+	ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetSourceFileName", ret);
+		return ret;
+	}
+
+	ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetClassSignature", ret);
+		goto free_file_name_error;
+	}
+
+	copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+	len = strlen(fn);
+	*buffer = malloc((len + 1) * sizeof(char));
+	if (!*buffer) {
+		print_error(jvmti, "GetClassSignature", ret);
+		ret = JVMTI_ERROR_OUT_OF_MEMORY;
+		goto free_class_sign_error;
+	}
+	strcpy(*buffer, fn);
+	ret = JVMTI_ERROR_NONE;
+
+free_class_sign_error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+free_file_name_error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+
+	return ret;
+}
+
+static jvmtiError
+fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
+		      const jvmti_line_info_t * line_tab,
+		      char ** file_names)
+{
+	int index;
+	jvmtiError ret;
+
+	for (index = 0; index < nr_lines; ++index) {
+		ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
+		if (ret != JVMTI_ERROR_NONE)
+			return ret;
+	}
+
+	return JVMTI_ERROR_NONE;
+}
+
+static void JNICALL
+compiled_method_load_cb(jvmtiEnv *jvmti,
+			jmethodID method,
+			jint code_size,
+			void const *code_addr,
+			jint map_length,
+			jvmtiAddrLocationMap const *map,
+			const void *compile_info)
+{
+	jvmti_line_info_t *line_tab = NULL;
+	char ** line_file_names = NULL;
+	jclass decl_class;
+	char *class_sign = NULL;
+	char *func_name = NULL;
+	char *func_sign = NULL;
+	char *file_name = NULL;
+	char fn[PATH_MAX];
+	uint64_t addr = (uint64_t)(uintptr_t)code_addr;
+	jvmtiError ret;
+	int nr_lines = 0; /* in line_tab[] */
+	size_t len;
+	int output_debug_info = 0;
+
+	ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
+						&decl_class);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetMethodDeclaringClass", ret);
+		return;
+	}
+
+	if (has_line_numbers && map && map_length) {
+		ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
+		if (ret != JVMTI_ERROR_NONE) {
+			warnx("jvmti: cannot get line table for method");
+			nr_lines = 0;
+		} else if (nr_lines > 0) {
+			line_file_names = malloc(sizeof(char*) * nr_lines);
+			if (!line_file_names) {
+				warnx("jvmti: cannot allocate space for line table method names");
+			} else {
+				memset(line_file_names, 0, sizeof(char*) * nr_lines);
+				ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
+				if (ret != JVMTI_ERROR_NONE) {
+					warnx("jvmti: fill_source_filenames failed");
+				} else {
+					output_debug_info = 1;
+				}
+			}
+		}
+	}
+
+	ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetSourceFileName", ret);
+		goto error;
+	}
+
+	ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
+					  &class_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetClassSignature", ret);
+		goto error;
+	}
+
+	ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
+				      &func_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "GetMethodName", ret);
+		goto error;
+	}
+
+	copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+
+	/*
+	 * write source line info record if we have it
+	 */
+	if (output_debug_info)
+		if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
+			warnx("jvmti: write_debug_info() failed");
+
+	len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
+	{
+		char str[len];
+		snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign);
+
+		if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size))
+			warnx("jvmti: write_code() failed");
+	}
+error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+	free(line_tab);
+	while (line_file_names && (nr_lines > 0)) {
+	    if (line_file_names[nr_lines - 1]) {
+	        free(line_file_names[nr_lines - 1]);
+	    }
+	    nr_lines -= 1;
+	}
+	free(line_file_names);
+}
+
+static void JNICALL
+code_generated_cb(jvmtiEnv *jvmti,
+		  char const *name,
+		  void const *code_addr,
+		  jint code_size)
+{
+	uint64_t addr = (uint64_t)(unsigned long)code_addr;
+	int ret;
+
+	ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size);
+	if (ret)
+		warnx("jvmti: write_code() failed for code_generated");
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __maybe_unused)
+{
+	jvmtiEventCallbacks cb;
+	jvmtiCapabilities caps1;
+	jvmtiJlocationFormat format;
+	jvmtiEnv *jvmti = NULL;
+	jint ret;
+
+	jvmti_agent = jvmti_open();
+	if (!jvmti_agent) {
+		warnx("jvmti: open_agent failed");
+		return -1;
+	}
+
+	/*
+	 * Request a JVMTI interface version 1 environment
+	 */
+	ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1);
+	if (ret != JNI_OK) {
+		warnx("jvmti: jvmti version 1 not supported");
+		return -1;
+	}
+
+	/*
+	 * acquire method_load capability, we require it
+	 * request line numbers (optional)
+	 */
+	memset(&caps1, 0, sizeof(caps1));
+	caps1.can_generate_compiled_method_load_events = 1;
+
+	ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "AddCapabilities", ret);
+		return -1;
+	}
+	ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
+        if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) {
+                memset(&caps1, 0, sizeof(caps1));
+                caps1.can_get_line_numbers = 1;
+                caps1.can_get_source_file_name = 1;
+		ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+                if (ret == JVMTI_ERROR_NONE)
+                        has_line_numbers = 1;
+        } else if (ret != JVMTI_ERROR_NONE)
+		print_error(jvmti, "GetJLocationFormat", ret);
+
+
+	memset(&cb, 0, sizeof(cb));
+
+	cb.CompiledMethodLoad   = compiled_method_load_cb;
+	cb.DynamicCodeGenerated = code_generated_cb;
+
+	ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "SetEventCallbacks", ret);
+		return -1;
+	}
+
+	ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+			JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "SetEventNotificationMode(METHOD_LOAD)", ret);
+		return -1;
+	}
+
+	ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+			JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		print_error(jvmti, "SetEventNotificationMode(CODE_GENERATED)", ret);
+		return -1;
+	}
+	return 0;
+}
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *jvm __maybe_unused)
+{
+	int ret;
+
+	ret = jvmti_close(jvmti_agent);
+	if (ret)
+		errx(1, "Error: op_close_agent()");
+}
diff --git a/perf-archive.sh b/perf-archive.sh
new file mode 100644
index 0000000..0cfb3e2
--- /dev/null
+++ b/perf-archive.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# perf archive
+# Arnaldo Carvalho de Melo <acme@redhat.com>
+
+PERF_DATA=perf.data
+if [ $# -ne 0 ] ; then
+	PERF_DATA=$1
+fi
+
+#
+# PERF_BUILDID_DIR environment variable set by perf
+# path to buildid directory, default to $HOME/.debug
+#
+if [ -z $PERF_BUILDID_DIR ]; then
+	PERF_BUILDID_DIR=~/.debug/
+else
+        # append / to make substitutions work
+        PERF_BUILDID_DIR=$PERF_BUILDID_DIR/
+fi
+
+BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
+NOBUILDID=0000000000000000000000000000000000000000
+
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+if [ ! -s $BUILDIDS ] ; then
+	echo "perf archive: no build-ids found"
+	rm $BUILDIDS || true
+	exit 1
+fi
+
+MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
+PERF_BUILDID_LINKDIR=$(readlink -f $PERF_BUILDID_DIR)/
+
+cut -d ' ' -f 1 $BUILDIDS | \
+while read build_id ; do
+	linkname=$PERF_BUILDID_DIR.build-id/${build_id:0:2}/${build_id:2}
+	filename=$(readlink -f $linkname)
+	echo ${linkname#$PERF_BUILDID_DIR} >> $MANIFEST
+	echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST
+done
+
+tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
+rm $MANIFEST $BUILDIDS || true
+echo -e "Now please run:\n"
+echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
+echo "wherever you need to run 'perf report' on."
+exit 0
diff --git a/perf-completion.sh b/perf-completion.sh
new file mode 100644
index 0000000..fdf75d4
--- /dev/null
+++ b/perf-completion.sh
@@ -0,0 +1,298 @@
+# perf bash and zsh completion
+# SPDX-License-Identifier: GPL-2.0
+
+# Taken from git.git's completion script.
+__my_reassemble_comp_words_by_ref()
+{
+	local exclude i j first
+	# Which word separators to exclude?
+	exclude="${1//[^$COMP_WORDBREAKS]}"
+	cword_=$COMP_CWORD
+	if [ -z "$exclude" ]; then
+		words_=("${COMP_WORDS[@]}")
+		return
+	fi
+	# List of word completion separators has shrunk;
+	# re-assemble words to complete.
+	for ((i=0, j=0; i < ${#COMP_WORDS[@]}; i++, j++)); do
+		# Append each nonempty word consisting of just
+		# word separator characters to the current word.
+		first=t
+		while
+			[ $i -gt 0 ] &&
+			[ -n "${COMP_WORDS[$i]}" ] &&
+			# word consists of excluded word separators
+			[ "${COMP_WORDS[$i]//[^$exclude]}" = "${COMP_WORDS[$i]}" ]
+		do
+			# Attach to the previous token,
+			# unless the previous token is the command name.
+			if [ $j -ge 2 ] && [ -n "$first" ]; then
+				((j--))
+			fi
+			first=
+			words_[$j]=${words_[j]}${COMP_WORDS[i]}
+			if [ $i = $COMP_CWORD ]; then
+				cword_=$j
+			fi
+			if (($i < ${#COMP_WORDS[@]} - 1)); then
+				((i++))
+			else
+				# Done.
+				return
+			fi
+		done
+		words_[$j]=${words_[j]}${COMP_WORDS[i]}
+		if [ $i = $COMP_CWORD ]; then
+			cword_=$j
+		fi
+	done
+}
+
+# Define preload_get_comp_words_by_ref="false", if the function
+# __perf_get_comp_words_by_ref() is required instead.
+preload_get_comp_words_by_ref="true"
+
+if [ $preload_get_comp_words_by_ref = "true" ]; then
+	type _get_comp_words_by_ref &>/dev/null ||
+	preload_get_comp_words_by_ref="false"
+fi
+[ $preload_get_comp_words_by_ref = "true" ] ||
+__perf_get_comp_words_by_ref()
+{
+	local exclude cur_ words_ cword_
+	if [ "$1" = "-n" ]; then
+		exclude=$2
+		shift 2
+	fi
+	__my_reassemble_comp_words_by_ref "$exclude"
+	cur_=${words_[cword_]}
+	while [ $# -gt 0 ]; do
+		case "$1" in
+		cur)
+			cur=$cur_
+			;;
+		prev)
+			prev=${words_[$cword_-1]}
+			;;
+		words)
+			words=("${words_[@]}")
+			;;
+		cword)
+			cword=$cword_
+			;;
+		esac
+		shift
+	done
+}
+
+# Define preload__ltrim_colon_completions="false", if the function
+# __perf__ltrim_colon_completions() is required instead.
+preload__ltrim_colon_completions="true"
+
+if [ $preload__ltrim_colon_completions = "true" ]; then
+	type __ltrim_colon_completions &>/dev/null ||
+	preload__ltrim_colon_completions="false"
+fi
+[ $preload__ltrim_colon_completions = "true" ] ||
+__perf__ltrim_colon_completions()
+{
+	if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then
+		# Remove colon-word prefix from COMPREPLY items
+		local colon_word=${1%"${1##*:}"}
+		local i=${#COMPREPLY[*]}
+		while [[ $((--i)) -ge 0 ]]; do
+			COMPREPLY[$i]=${COMPREPLY[$i]#"$colon_word"}
+		done
+	fi
+}
+
+__perfcomp ()
+{
+	COMPREPLY=( $( compgen -W "$1" -- "$2" ) )
+}
+
+__perfcomp_colon ()
+{
+	__perfcomp "$1" "$2"
+	if [ $preload__ltrim_colon_completions = "true" ]; then
+		__ltrim_colon_completions $cur
+	else
+		__perf__ltrim_colon_completions $cur
+	fi
+}
+
+__perf_prev_skip_opts ()
+{
+	local i cmd_ cmds_
+
+	let i=cword-1
+	cmds_=$($cmd $1 --list-cmds)
+	prev_skip_opts=()
+	while [ $i -ge 0 ]; do
+		if [[ ${words[i]} == $1 ]]; then
+			return
+		fi
+		for cmd_ in $cmds_; do
+			if [[ ${words[i]} == $cmd_ ]]; then
+				prev_skip_opts=${words[i]}
+				return
+			fi
+		done
+		((i--))
+	done
+}
+
+__perf_main ()
+{
+	local cmd
+
+	cmd=${words[0]}
+	COMPREPLY=()
+
+	# Skip options backward and find the last perf command
+	__perf_prev_skip_opts
+	# List perf subcommands or long options
+	if [ -z $prev_skip_opts ]; then
+		if [[ $cur == --* ]]; then
+			cmds=$($cmd --list-opts)
+		else
+			cmds=$($cmd --list-cmds)
+		fi
+		__perfcomp "$cmds" "$cur"
+	# List possible events for -e option
+	elif [[ $prev == @("-e"|"--event") &&
+		$prev_skip_opts == @(record|stat|top) ]]; then
+
+		local cur1=${COMP_WORDS[COMP_CWORD]}
+		local raw_evts=$($cmd list --raw-dump)
+		local arr s tmp result
+
+		if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then
+			OLD_IFS="$IFS"
+			IFS=" "
+			arr=($raw_evts)
+			IFS="$OLD_IFS"
+
+			for s in ${arr[@]}
+			do
+				if [[ "$s" == *cpu/* ]]; then
+					tmp=${s#*cpu/}
+					result=$result" ""cpu/"${tmp^^}
+				else
+					result=$result" "$s
+				fi
+			done
+
+			evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events)
+		else
+			evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events)
+		fi
+
+		if [[ "$cur1" == , ]]; then
+			__perfcomp_colon "$evts" ""
+		else
+			__perfcomp_colon "$evts" "$cur1"
+		fi
+	else
+		# List subcommands for perf commands
+		if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched|
+			|data|help|script|test|timechart|trace) ]]; then
+			subcmds=$($cmd $prev_skip_opts --list-cmds)
+			__perfcomp_colon "$subcmds" "$cur"
+		fi
+		# List long option names
+		if [[ $cur == --* ]];  then
+			subcmd=$prev_skip_opts
+			__perf_prev_skip_opts $subcmd
+			subcmd=$subcmd" "$prev_skip_opts
+			opts=$($cmd $subcmd --list-opts)
+			__perfcomp "$opts" "$cur"
+		fi
+	fi
+}
+
+if [[ -n ${ZSH_VERSION-} ]]; then
+	autoload -U +X compinit && compinit
+
+	__perfcomp ()
+	{
+		emulate -L zsh
+
+		local c IFS=$' \t\n'
+		local -a array
+
+		for c in ${=1}; do
+			case $c in
+			--*=*|*.) ;;
+			*) c="$c " ;;
+			esac
+			array[${#array[@]}+1]="$c"
+		done
+
+		compset -P '*[=:]'
+		compadd -Q -S '' -a -- array && _ret=0
+	}
+
+	__perfcomp_colon ()
+	{
+		emulate -L zsh
+
+		local cur_="${2-$cur}"
+		local c IFS=$' \t\n'
+		local -a array
+
+		if [[ "$cur_" == *:* ]]; then
+			local colon_word=${cur_%"${cur_##*:}"}
+		fi
+
+		for c in ${=1}; do
+			case $c in
+			--*=*|*.) ;;
+			*) c="$c " ;;
+			esac
+			array[$#array+1]=${c#"$colon_word"}
+		done
+
+		compset -P '*[=:]'
+		compadd -Q -S '' -a -- array && _ret=0
+	}
+
+	_perf ()
+	{
+		local _ret=1 cur cword prev
+		cur=${words[CURRENT]}
+		prev=${words[CURRENT-1]}
+		let cword=CURRENT-1
+		emulate ksh -c __perf_main
+		let _ret && _default && _ret=0
+		return _ret
+	}
+
+	compdef _perf perf
+	return
+fi
+
+type perf &>/dev/null &&
+_perf()
+{
+	if [[ "$COMP_WORDBREAKS" != *,* ]]; then
+		COMP_WORDBREAKS="${COMP_WORDBREAKS},"
+		export COMP_WORDBREAKS
+	fi
+
+	if [[ "$COMP_WORDBREAKS" == *:* ]]; then
+		COMP_WORDBREAKS="${COMP_WORDBREAKS/:/}"
+		export COMP_WORDBREAKS
+	fi
+
+	local cur words cword prev
+	if [ $preload_get_comp_words_by_ref = "true" ]; then
+		_get_comp_words_by_ref -n =:, cur words cword prev
+	else
+		__perf_get_comp_words_by_ref -n =:, cur words cword prev
+	fi
+	__perf_main
+} &&
+
+complete -o bashdefault -o default -o nospace -F _perf perf 2>/dev/null \
+	|| complete -o default -o nospace -F _perf perf
diff --git a/perf-read-vdso.c b/perf-read-vdso.c
new file mode 100644
index 0000000..8c0ca0c
--- /dev/null
+++ b/perf-read-vdso.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+/*
+ * Include definition of find_vdso_map() also used in util/vdso.c for
+ * building perf.
+ */
+#include "util/find-vdso-map.c"
+
+int main(void)
+{
+	void *start, *end;
+	size_t size, written;
+
+	if (find_vdso_map(&start, &end))
+		return 1;
+
+	size = end - start;
+
+	while (size) {
+		written = fwrite(start, 1, size, stdout);
+		if (!written)
+			return 1;
+		start += written;
+		size -= written;
+	}
+
+	if (fflush(stdout))
+		return 1;
+
+	return 0;
+}
diff --git a/perf-sys.h b/perf-sys.h
new file mode 100644
index 0000000..3eb7a39
--- /dev/null
+++ b/perf-sys.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_SYS_H
+#define _PERF_SYS_H
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <asm/barrier.h>
+
+#ifdef __powerpc__
+#define CPUINFO_PROC	{"cpu"}
+#endif
+
+#ifdef __s390__
+#define CPUINFO_PROC	{"vendor_id"}
+#endif
+
+#ifdef __sh__
+#define CPUINFO_PROC	{"cpu type"}
+#endif
+
+#ifdef __hppa__
+#define CPUINFO_PROC	{"cpu"}
+#endif
+
+#ifdef __sparc__
+#define CPUINFO_PROC	{"cpu"}
+#endif
+
+#ifdef __alpha__
+#define CPUINFO_PROC	{"cpu model"}
+#endif
+
+#ifdef __arm__
+#define CPUINFO_PROC	{"model name", "Processor"}
+#endif
+
+#ifdef __mips__
+#define CPUINFO_PROC	{"cpu model"}
+#endif
+
+#ifdef __arc__
+#define CPUINFO_PROC	{"Processor"}
+#endif
+
+#ifdef __xtensa__
+#define CPUINFO_PROC	{"core ID"}
+#endif
+
+#ifndef CPUINFO_PROC
+#define CPUINFO_PROC	{ "model name", }
+#endif
+
+static inline int
+sys_perf_event_open(struct perf_event_attr *attr,
+		      pid_t pid, int cpu, int group_fd,
+		      unsigned long flags)
+{
+	int fd;
+
+	fd = syscall(__NR_perf_event_open, attr, pid, cpu,
+		     group_fd, flags);
+
+#ifdef HAVE_ATTR_TEST
+	if (unlikely(test_attr__enabled))
+		test_attr__open(attr, pid, cpu, fd, group_fd, flags);
+#endif
+	return fd;
+}
+
+#endif /* _PERF_SYS_H */
diff --git a/perf-with-kcore.sh b/perf-with-kcore.sh
new file mode 100644
index 0000000..7e47a7c
--- /dev/null
+++ b/perf-with-kcore.sh
@@ -0,0 +1,259 @@
+#!/bin/bash
+# perf-with-kcore: use perf with a copy of kcore
+# Copyright (c) 2014, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+set -e
+
+usage()
+{
+        echo "Usage: perf-with-kcore <perf sub-command> <perf.data directory> [<sub-command options> [ -- <workload>]]" >&2
+        echo "       <perf sub-command> can be record, script, report or inject" >&2
+        echo "   or: perf-with-kcore fix_buildid_cache_permissions" >&2
+        exit 1
+}
+
+find_perf()
+{
+	if [ -n "$PERF" ] ; then
+		return
+	fi
+	PERF=`which perf || true`
+	if [ -z "$PERF" ] ; then
+		echo "Failed to find perf" >&2
+	        exit 1
+	fi
+	if [ ! -x "$PERF" ] ; then
+		echo "Failed to find perf" >&2
+	        exit 1
+	fi
+	echo "Using $PERF"
+	"$PERF" version
+}
+
+copy_kcore()
+{
+	echo "Copying kcore"
+
+	if [ $EUID -eq 0 ] ; then
+		SUDO=""
+	else
+		SUDO="sudo"
+	fi
+
+	rm -f perf.data.junk
+	("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
+	PERF_PID=$!
+
+	# Need to make sure that perf has started
+	sleep 1
+
+	KCORE=$(($SUDO "$PERF" buildid-cache -v -f -k /proc/kcore >/dev/null) 2>&1)
+	case "$KCORE" in
+	"kcore added to build-id cache directory "*)
+		KCORE_DIR=${KCORE#"kcore added to build-id cache directory "}
+	;;
+	*)
+		kill $PERF_PID
+		wait >/dev/null 2>/dev/null || true
+		rm perf.data.junk
+		echo "$KCORE"
+		echo "Failed to find kcore" >&2
+		exit 1
+	;;
+	esac
+
+	kill $PERF_PID
+	wait >/dev/null 2>/dev/null || true
+	rm perf.data.junk
+
+	$SUDO cp -a "$KCORE_DIR" "$(pwd)/$PERF_DATA_DIR"
+	$SUDO rm -f "$KCORE_DIR/kcore"
+	$SUDO rm -f "$KCORE_DIR/kallsyms"
+	$SUDO rm -f "$KCORE_DIR/modules"
+	$SUDO rmdir "$KCORE_DIR"
+
+	KCORE_DIR_BASENAME=$(basename "$KCORE_DIR")
+	KCORE_DIR="$(pwd)/$PERF_DATA_DIR/$KCORE_DIR_BASENAME"
+
+	$SUDO chown $UID "$KCORE_DIR"
+	$SUDO chown $UID "$KCORE_DIR/kcore"
+	$SUDO chown $UID "$KCORE_DIR/kallsyms"
+	$SUDO chown $UID "$KCORE_DIR/modules"
+
+	$SUDO chgrp $GROUPS "$KCORE_DIR"
+	$SUDO chgrp $GROUPS "$KCORE_DIR/kcore"
+	$SUDO chgrp $GROUPS "$KCORE_DIR/kallsyms"
+	$SUDO chgrp $GROUPS "$KCORE_DIR/modules"
+
+	ln -s "$KCORE_DIR_BASENAME" "$PERF_DATA_DIR/kcore_dir"
+}
+
+fix_buildid_cache_permissions()
+{
+	if [ $EUID -ne 0 ] ; then
+		echo "This script must be run as root via sudo " >&2
+		exit 1
+	fi
+
+	if [ -z "$SUDO_USER" ] ; then
+		echo "This script must be run via sudo" >&2
+		exit 1
+	fi
+
+	USER_HOME=$(bash <<< "echo ~$SUDO_USER")
+
+	if [ "$HOME" != "$USER_HOME" ] ; then
+		echo "Fix unnecessary because root has a home: $HOME" >&2
+		exit 1
+	fi
+
+	echo "Fixing buildid cache permissions"
+
+	find "$USER_HOME/.debug" -xdev -type d          ! -user "$SUDO_USER" -ls -exec chown    "$SUDO_USER" \{\} \;
+	find "$USER_HOME/.debug" -xdev -type f -links 1 ! -user "$SUDO_USER" -ls -exec chown    "$SUDO_USER" \{\} \;
+	find "$USER_HOME/.debug" -xdev -type l          ! -user "$SUDO_USER" -ls -exec chown -h "$SUDO_USER" \{\} \;
+
+	if [ -n "$SUDO_GID" ] ; then
+		find "$USER_HOME/.debug" -xdev -type d          ! -group "$SUDO_GID" -ls -exec chgrp    "$SUDO_GID" \{\} \;
+		find "$USER_HOME/.debug" -xdev -type f -links 1 ! -group "$SUDO_GID" -ls -exec chgrp    "$SUDO_GID" \{\} \;
+		find "$USER_HOME/.debug" -xdev -type l          ! -group "$SUDO_GID" -ls -exec chgrp -h "$SUDO_GID" \{\} \;
+	fi
+
+	echo "Done"
+}
+
+check_buildid_cache_permissions()
+{
+	if [ $EUID -eq 0 ] ; then
+		return
+	fi
+
+	PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type d          ! -user "$USER" -print -quit)
+	PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type f -links 1 ! -user "$USER" -print -quit)
+	PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type l          ! -user "$USER" -print -quit)
+
+	PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type d          ! -group "$GROUPS" -print -quit)
+	PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type f -links 1 ! -group "$GROUPS" -print -quit)
+	PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type l          ! -group "$GROUPS" -print -quit)
+
+	if [ -n "$PERMISSIONS_OK" ] ; then
+		echo "*** WARNING *** buildid cache permissions may need fixing" >&2
+	fi
+}
+
+record()
+{
+	echo "Recording"
+
+	if [ $EUID -ne 0 ] ; then
+
+		if [ "$(cat /proc/sys/kernel/kptr_restrict)" -ne 0 ] ; then
+			echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
+		fi
+
+		if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
+			echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
+		fi
+
+		if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
+			if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
+				echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
+			fi
+
+			if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
+				true
+			elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
+				true
+			elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
+				echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
+			fi
+		fi
+	fi
+
+	if [ -z "$1" ] ; then
+		echo "Workload is required for recording" >&2
+		usage
+	fi
+
+	if [ -e "$PERF_DATA_DIR" ] ; then
+		echo "'$PERF_DATA_DIR' exists" >&2
+		exit 1
+	fi
+
+	find_perf
+
+	mkdir "$PERF_DATA_DIR"
+
+	echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
+	"$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
+
+	if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
+		exit 1
+	fi
+
+	copy_kcore
+
+	echo "Done"
+}
+
+subcommand()
+{
+	find_perf
+	check_buildid_cache_permissions
+	echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
+	"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
+}
+
+if [ "$1" = "fix_buildid_cache_permissions" ] ; then
+	fix_buildid_cache_permissions
+	exit 0
+fi
+
+PERF_SUB_COMMAND=$1
+PERF_DATA_DIR=$2
+shift || true
+shift || true
+
+if [ -z "$PERF_SUB_COMMAND" ] ; then
+	usage
+fi
+
+if [ -z "$PERF_DATA_DIR" ] ; then
+	usage
+fi
+
+case "$PERF_SUB_COMMAND" in
+"record")
+	while [ "$1" != "--" ] ; do
+		PERF_OPTIONS+=("$1")
+		shift || break
+	done
+	if [ "$1" != "--" ] ; then
+		echo "Options and workload are required for recording" >&2
+		usage
+	fi
+	shift
+	record "$@"
+;;
+"script")
+	subcommand "$@"
+;;
+"report")
+	subcommand "$@"
+;;
+"inject")
+	subcommand "$@"
+;;
+*)
+	usage
+;;
+esac
diff --git a/perf.c b/perf.c
new file mode 100644
index 0000000..20a08cb
--- /dev/null
+++ b/perf.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf.c
+ *
+ * Performance analysis utility.
+ *
+ * This is the main hub from which the sub-commands (perf stat,
+ * perf top, perf record, perf report, etc.) are started.
+ */
+#include "builtin.h"
+
+#include "util/env.h"
+#include <subcmd/exec-cmd.h>
+#include "util/config.h"
+#include "util/quote.h"
+#include <subcmd/run-command.h>
+#include "util/parse-events.h"
+#include <subcmd/parse-options.h>
+#include "util/bpf-loader.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/kernel.h>
+
+const char perf_usage_string[] =
+	"perf [--version] [--help] [OPTIONS] COMMAND [ARGS]";
+
+const char perf_more_info_string[] =
+	"See 'perf help COMMAND' for more information on a specific command.";
+
+static int use_pager = -1;
+const char *input_name;
+
+struct cmd_struct {
+	const char *cmd;
+	int (*fn)(int, const char **);
+	int option;
+};
+
+static struct cmd_struct commands[] = {
+	{ "buildid-cache", cmd_buildid_cache, 0 },
+	{ "buildid-list", cmd_buildid_list, 0 },
+	{ "config",	cmd_config,	0 },
+	{ "c2c",	cmd_c2c,	0 },
+	{ "diff",	cmd_diff,	0 },
+	{ "evlist",	cmd_evlist,	0 },
+	{ "help",	cmd_help,	0 },
+	{ "kallsyms",	cmd_kallsyms,	0 },
+	{ "list",	cmd_list,	0 },
+	{ "record",	cmd_record,	0 },
+	{ "report",	cmd_report,	0 },
+	{ "bench",	cmd_bench,	0 },
+	{ "stat",	cmd_stat,	0 },
+	{ "timechart",	cmd_timechart,	0 },
+	{ "top",	cmd_top,	0 },
+	{ "annotate",	cmd_annotate,	0 },
+	{ "version",	cmd_version,	0 },
+	{ "script",	cmd_script,	0 },
+	{ "sched",	cmd_sched,	0 },
+#ifdef HAVE_LIBELF_SUPPORT
+	{ "probe",	cmd_probe,	0 },
+#endif
+	{ "kmem",	cmd_kmem,	0 },
+	{ "lock",	cmd_lock,	0 },
+	{ "kvm",	cmd_kvm,	0 },
+	{ "test",	cmd_test,	0 },
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+	{ "trace",	cmd_trace,	0 },
+#endif
+	{ "inject",	cmd_inject,	0 },
+	{ "mem",	cmd_mem,	0 },
+	{ "data",	cmd_data,	0 },
+	{ "ftrace",	cmd_ftrace,	0 },
+};
+
+struct pager_config {
+	const char *cmd;
+	int val;
+};
+
+static int pager_command_config(const char *var, const char *value, void *data)
+{
+	struct pager_config *c = data;
+	if (strstarts(var, "pager.") && !strcmp(var + 6, c->cmd))
+		c->val = perf_config_bool(var, value);
+	return 0;
+}
+
+/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
+static int check_pager_config(const char *cmd)
+{
+	int err;
+	struct pager_config c;
+	c.cmd = cmd;
+	c.val = -1;
+	err = perf_config(pager_command_config, &c);
+	return err ?: c.val;
+}
+
+static int browser_command_config(const char *var, const char *value, void *data)
+{
+	struct pager_config *c = data;
+	if (strstarts(var, "tui.") && !strcmp(var + 4, c->cmd))
+		c->val = perf_config_bool(var, value);
+	if (strstarts(var, "gtk.") && !strcmp(var + 4, c->cmd))
+		c->val = perf_config_bool(var, value) ? 2 : 0;
+	return 0;
+}
+
+/*
+ * returns 0 for "no tui", 1 for "use tui", 2 for "use gtk",
+ * and -1 for "not specified"
+ */
+static int check_browser_config(const char *cmd)
+{
+	int err;
+	struct pager_config c;
+	c.cmd = cmd;
+	c.val = -1;
+	err = perf_config(browser_command_config, &c);
+	return err ?: c.val;
+}
+
+static void commit_pager_choice(void)
+{
+	switch (use_pager) {
+	case 0:
+		setenv(PERF_PAGER_ENVIRONMENT, "cat", 1);
+		break;
+	case 1:
+		/* setup_pager(); */
+		break;
+	default:
+		break;
+	}
+}
+
+struct option options[] = {
+	OPT_ARGUMENT("help", "help"),
+	OPT_ARGUMENT("version", "version"),
+	OPT_ARGUMENT("exec-path", "exec-path"),
+	OPT_ARGUMENT("html-path", "html-path"),
+	OPT_ARGUMENT("paginate", "paginate"),
+	OPT_ARGUMENT("no-pager", "no-pager"),
+	OPT_ARGUMENT("debugfs-dir", "debugfs-dir"),
+	OPT_ARGUMENT("buildid-dir", "buildid-dir"),
+	OPT_ARGUMENT("list-cmds", "list-cmds"),
+	OPT_ARGUMENT("list-opts", "list-opts"),
+	OPT_ARGUMENT("debug", "debug"),
+	OPT_END()
+};
+
+static int handle_options(const char ***argv, int *argc, int *envchanged)
+{
+	int handled = 0;
+
+	while (*argc > 0) {
+		const char *cmd = (*argv)[0];
+		if (cmd[0] != '-')
+			break;
+
+		/*
+		 * For legacy reasons, the "version" and "help"
+		 * commands can be written with "--" prepended
+		 * to make them look like flags.
+		 */
+		if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
+			break;
+
+		/*
+		 * Shortcut for '-h' and '-v' options to invoke help
+		 * and version command.
+		 */
+		if (!strcmp(cmd, "-h")) {
+			(*argv)[0] = "--help";
+			break;
+		}
+
+		if (!strcmp(cmd, "-v")) {
+			(*argv)[0] = "--version";
+			break;
+		}
+
+		if (!strcmp(cmd, "-vv")) {
+			(*argv)[0] = "version";
+			version_verbose = 1;
+			break;
+		}
+
+		/*
+		 * Check remaining flags.
+		 */
+		if (strstarts(cmd, CMD_EXEC_PATH)) {
+			cmd += strlen(CMD_EXEC_PATH);
+			if (*cmd == '=')
+				set_argv_exec_path(cmd + 1);
+			else {
+				puts(get_argv_exec_path());
+				exit(0);
+			}
+		} else if (!strcmp(cmd, "--html-path")) {
+			puts(system_path(PERF_HTML_PATH));
+			exit(0);
+		} else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
+			use_pager = 1;
+		} else if (!strcmp(cmd, "--no-pager")) {
+			use_pager = 0;
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--debugfs-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --debugfs-dir.\n");
+				usage(perf_usage_string);
+			}
+			tracing_path_set((*argv)[1]);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+		} else if (!strcmp(cmd, "--buildid-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --buildid-dir.\n");
+				usage(perf_usage_string);
+			}
+			set_buildid_dir((*argv)[1]);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
+		} else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
+			tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
+			fprintf(stderr, "dir: %s\n", tracing_path);
+			if (envchanged)
+				*envchanged = 1;
+		} else if (!strcmp(cmd, "--list-cmds")) {
+			unsigned int i;
+
+			for (i = 0; i < ARRAY_SIZE(commands); i++) {
+				struct cmd_struct *p = commands+i;
+				printf("%s ", p->cmd);
+			}
+			putchar('\n');
+			exit(0);
+		} else if (!strcmp(cmd, "--list-opts")) {
+			unsigned int i;
+
+			for (i = 0; i < ARRAY_SIZE(options)-1; i++) {
+				struct option *p = options+i;
+				printf("--%s ", p->long_name);
+			}
+			putchar('\n');
+			exit(0);
+		} else if (!strcmp(cmd, "--debug")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No variable specified for --debug.\n");
+				usage(perf_usage_string);
+			}
+			if (perf_debug_option((*argv)[1]))
+				usage(perf_usage_string);
+
+			(*argv)++;
+			(*argc)--;
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", cmd);
+			usage(perf_usage_string);
+		}
+
+		(*argv)++;
+		(*argc)--;
+		handled++;
+	}
+	return handled;
+}
+
+#define RUN_SETUP	(1<<0)
+#define USE_PAGER	(1<<1)
+
+static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
+{
+	int status;
+	struct stat st;
+	char sbuf[STRERR_BUFSIZE];
+
+	if (use_browser == -1)
+		use_browser = check_browser_config(p->cmd);
+
+	if (use_pager == -1 && p->option & RUN_SETUP)
+		use_pager = check_pager_config(p->cmd);
+	if (use_pager == -1 && p->option & USE_PAGER)
+		use_pager = 1;
+	commit_pager_choice();
+
+	perf_env__set_cmdline(&perf_env, argc, argv);
+	status = p->fn(argc, argv);
+	perf_config__exit();
+	exit_browser(status);
+	perf_env__exit(&perf_env);
+	bpf__clear();
+
+	if (status)
+		return status & 0xff;
+
+	/* Somebody closed stdout? */
+	if (fstat(fileno(stdout), &st))
+		return 0;
+	/* Ignore write errors for pipes and sockets.. */
+	if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
+		return 0;
+
+	status = 1;
+	/* Check for ENOSPC and EIO errors.. */
+	if (fflush(stdout)) {
+		fprintf(stderr, "write failure on standard output: %s",
+			str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out;
+	}
+	if (ferror(stdout)) {
+		fprintf(stderr, "unknown write failure on standard output");
+		goto out;
+	}
+	if (fclose(stdout)) {
+		fprintf(stderr, "close failed on standard output: %s",
+			str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out;
+	}
+	status = 0;
+out:
+	return status;
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+	const char *cmd = argv[0];
+	unsigned int i;
+
+	/* Turn "perf cmd --help" into "perf help cmd" */
+	if (argc > 1 && !strcmp(argv[1], "--help")) {
+		argv[1] = argv[0];
+		argv[0] = cmd = "help";
+	}
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		struct cmd_struct *p = commands+i;
+		if (strcmp(p->cmd, cmd))
+			continue;
+		exit(run_builtin(p, argc, argv));
+	}
+}
+
+static void execv_dashed_external(const char **argv)
+{
+	char *cmd;
+	const char *tmp;
+	int status;
+
+	if (asprintf(&cmd, "perf-%s", argv[0]) < 0)
+		goto do_die;
+
+	/*
+	 * argv[0] must be the perf command, but the argv array
+	 * belongs to the caller, and may be reused in
+	 * subsequent loop iterations. Save argv[0] and
+	 * restore it on error.
+	 */
+	tmp = argv[0];
+	argv[0] = cmd;
+
+	/*
+	 * if we fail because the command is not found, it is
+	 * OK to return. Otherwise, we just pass along the status code.
+	 */
+	status = run_command_v_opt(argv, 0);
+	if (status != -ERR_RUN_COMMAND_EXEC) {
+		if (IS_RUN_COMMAND_ERR(status)) {
+do_die:
+			pr_err("FATAL: unable to run '%s'", argv[0]);
+			status = -128;
+		}
+		exit(-status);
+	}
+	errno = ENOENT; /* as if we called execvp */
+
+	argv[0] = tmp;
+	zfree(&cmd);
+}
+
+static int run_argv(int *argcp, const char ***argv)
+{
+	/* See if it's an internal command */
+	handle_internal_command(*argcp, *argv);
+
+	/* .. then try the external ones */
+	execv_dashed_external(*argv);
+	return 0;
+}
+
+static void pthread__block_sigwinch(void)
+{
+	sigset_t set;
+
+	sigemptyset(&set);
+	sigaddset(&set, SIGWINCH);
+	pthread_sigmask(SIG_BLOCK, &set, NULL);
+}
+
+void pthread__unblock_sigwinch(void)
+{
+	sigset_t set;
+
+	sigemptyset(&set);
+	sigaddset(&set, SIGWINCH);
+	pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+}
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+static void cache_line_size(int *cacheline_sizep)
+{
+	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+		pr_debug("cannot determine cache line size");
+}
+#endif
+
+int main(int argc, const char **argv)
+{
+	int err;
+	const char *cmd;
+	char sbuf[STRERR_BUFSIZE];
+	int value;
+
+	/* libsubcmd init */
+	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
+	pager_init(PERF_PAGER_ENVIRONMENT);
+
+	/* The page_size is placed in util object. */
+	page_size = sysconf(_SC_PAGE_SIZE);
+	cache_line_size(&cacheline_size);
+
+	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+		sysctl_perf_event_max_stack = value;
+
+	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+		sysctl_perf_event_max_contexts_per_stack = value;
+
+	cmd = extract_argv0_path(argv[0]);
+	if (!cmd)
+		cmd = "perf-help";
+
+	srandom(time(NULL));
+
+	perf_config__init();
+	err = perf_config(perf_default_config, NULL);
+	if (err)
+		return err;
+	set_buildid_dir(NULL);
+
+	/* get debugfs/tracefs mount point from /proc/mounts */
+	tracing_path_mount();
+
+	/*
+	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
+	 *
+	 *  - cannot take flags in between the "perf" and the "xxxx".
+	 *  - cannot execute it externally (since it would just do
+	 *    the same thing over again)
+	 *
+	 * So we just directly call the internal command handler. If that one
+	 * fails to handle this, then maybe we just run a renamed perf binary
+	 * that contains a dash in its name. To handle this scenario, we just
+	 * fall through and ignore the "xxxx" part of the command string.
+	 */
+	if (strstarts(cmd, "perf-")) {
+		cmd += 5;
+		argv[0] = cmd;
+		handle_internal_command(argc, argv);
+		/*
+		 * If the command is handled, the above function does not
+		 * return undo changes and fall through in such a case.
+		 */
+		cmd -= 5;
+		argv[0] = cmd;
+	}
+	if (strstarts(cmd, "trace")) {
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+		setup_path();
+		argv[0] = "trace";
+		return cmd_trace(argc, argv);
+#else
+		fprintf(stderr,
+			"trace command not available: missing audit-libs devel package at build time.\n");
+		goto out;
+#endif
+	}
+	/* Look for flags.. */
+	argv++;
+	argc--;
+	handle_options(&argv, &argc, NULL);
+	commit_pager_choice();
+
+	if (argc > 0) {
+		if (strstarts(argv[0], "--"))
+			argv[0] += 2;
+	} else {
+		/* The user didn't specify a command; give them help */
+		printf("\n usage: %s\n\n", perf_usage_string);
+		list_common_cmds_help();
+		printf("\n %s\n\n", perf_more_info_string);
+		goto out;
+	}
+	cmd = argv[0];
+
+	test_attr__init();
+
+	/*
+	 * We use PATH to find perf commands, but we prepend some higher
+	 * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH
+	 * environment, and the $(perfexecdir) from the Makefile at build
+	 * time.
+	 */
+	setup_path();
+	/*
+	 * Block SIGWINCH notifications so that the thread that wants it can
+	 * unblock and get syscalls like select interrupted instead of waiting
+	 * forever while the signal goes to some other non interested thread.
+	 */
+	pthread__block_sigwinch();
+
+	perf_debug_setup();
+
+	while (1) {
+		static int done_help;
+
+		run_argv(&argc, &argv);
+
+		if (errno != ENOENT)
+			break;
+
+		if (!done_help) {
+			cmd = argv[0] = help_unknown_cmd(cmd);
+			done_help = 1;
+		} else
+			break;
+	}
+
+	fprintf(stderr, "Failed to run command '%s': %s\n",
+		cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+out:
+	return 1;
+}
diff --git a/perf.h b/perf.h
new file mode 100644
index 0000000..a1a9795
--- /dev/null
+++ b/perf.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PERF_H
+#define _PERF_PERF_H
+
+#include <time.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+
+extern bool test_attr__enabled;
+void test_attr__ready(void);
+void test_attr__init(void);
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+		     int fd, int group_fd, unsigned long flags);
+
+#define HAVE_ATTR_TEST
+#include "perf-sys.h"
+
+static inline unsigned long long rdclock(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+#define MAX_NR_CPUS			1024
+
+extern const char *input_name;
+extern bool perf_host, perf_guest;
+extern const char perf_version_string[];
+
+void pthread__unblock_sigwinch(void);
+
+#include "util/target.h"
+
+struct record_opts {
+	struct target target;
+	bool	     group;
+	bool	     inherit_stat;
+	bool	     no_buffering;
+	bool	     no_inherit;
+	bool	     no_inherit_set;
+	bool	     no_samples;
+	bool	     raw_samples;
+	bool	     sample_address;
+	bool	     sample_phys_addr;
+	bool	     sample_weight;
+	bool	     sample_time;
+	bool	     sample_time_set;
+	bool	     sample_cpu;
+	bool	     period;
+	bool	     period_set;
+	bool	     running_time;
+	bool	     full_auxtrace;
+	bool	     auxtrace_snapshot_mode;
+	bool	     record_namespaces;
+	bool	     record_switch_events;
+	bool	     all_kernel;
+	bool	     all_user;
+	bool	     tail_synthesize;
+	bool	     overwrite;
+	bool	     ignore_missing_thread;
+	bool	     strict_freq;
+	bool	     sample_id;
+	unsigned int freq;
+	unsigned int mmap_pages;
+	unsigned int auxtrace_mmap_pages;
+	unsigned int user_freq;
+	u64          branch_stack;
+	u64	     sample_intr_regs;
+	u64	     sample_user_regs;
+	u64	     default_interval;
+	u64	     user_interval;
+	size_t	     auxtrace_snapshot_size;
+	const char   *auxtrace_snapshot_opts;
+	bool	     sample_transaction;
+	unsigned     initial_delay;
+	bool         use_clockid;
+	clockid_t    clockid;
+	unsigned int proc_map_timeout;
+};
+
+struct option;
+extern const char * const *record_usage;
+extern struct option *record_options;
+extern int version_verbose;
+
+int record__parse_freq(const struct option *opt, const char *str, int unset);
+#endif
diff --git a/pmu-events/Build b/pmu-events/Build
new file mode 100644
index 0000000..1778391
--- /dev/null
+++ b/pmu-events/Build
@@ -0,0 +1,15 @@
+hostprogs := jevents
+
+jevents-y	+= json.o jsmn.o jevents.o
+CHOSTFLAGS_jevents.o	= -I$(srctree)/tools/include
+pmu-events-y	+= pmu-events.o
+JDIR		=  pmu-events/arch/$(SRCARCH)
+JSON		=  $(shell [ -d $(JDIR) ] &&				\
+			find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+
+#
+# Locate/process JSON files in pmu-events/arch/
+# directory and create tables in pmu-events.c.
+#
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
+	$(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/pmu-events/README b/pmu-events/README
new file mode 100644
index 0000000..e62b09b
--- /dev/null
+++ b/pmu-events/README
@@ -0,0 +1,152 @@
+
+The contents of this directory allow users to specify PMU events in their
+CPUs by their symbolic names rather than raw event codes (see example below).
+
+The main program in this directory, is the 'jevents', which is built and
+executed _BEFORE_ the perf binary itself is built.
+
+The 'jevents' program tries to locate and process JSON files in the directory
+tree tools/perf/pmu-events/arch/foo.
+
+	- Regular files with '.json' extension in the name are assumed to be
+	  JSON files, each of which describes a set of PMU events.
+
+	- The CSV file that maps a specific CPU to its set of PMU events is to
+	  be named 'mapfile.csv' (see below for mapfile format).
+
+	- Directories are traversed, but all other files are ignored.
+
+	- To reduce JSON event duplication per architecture, platform JSONs may
+	  use "ArchStdEvent" keyword to dereference an "Architecture standard
+	  events", defined in architecture standard JSONs.
+	  Architecture standard JSONs must be located in the architecture root
+	  folder. Matching is based on the "EventName" field.
+
+The PMU events supported by a CPU model are expected to grouped into topics
+such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic
+should be placed in a separate JSON file - where the file name identifies
+the topic. Eg: "Floating-point.json".
+
+All the topic JSON files for a CPU model/family should be in a separate
+sub directory. Thus for the Silvermont X86 CPU:
+
+	$ ls tools/perf/pmu-events/arch/x86/Silvermont_core
+	Cache.json 	Memory.json 	Virtual-Memory.json
+	Frontend.json 	Pipeline.json
+
+The JSONs folder for a CPU model/family may be placed in the root arch
+folder, or may be placed in a vendor sub-folder under the arch folder
+for instances where the arch and vendor are not the same.
+
+Using the JSON files and the mapfile, 'jevents' generates the C source file,
+'pmu-events.c', which encodes the two sets of tables:
+
+	- Set of 'PMU events tables' for all known CPUs in the architecture,
+	  (one table like the following, per JSON file; table name 'pme_power8'
+	  is derived from JSON file name, 'power8.json').
+
+		struct pmu_event pme_power8[] = {
+
+			...
+
+			{
+				.name = "pm_1plus_ppc_cmpl",
+				.event = "event=0x100f2",
+				.desc = "1 or more ppc insts finished,",
+			},
+
+			...
+		}
+
+	- A 'mapping table' that maps each CPU of the architecture, to its
+	  'PMU events table'
+
+		struct pmu_events_map pmu_events_map[] = {
+		{
+			.cpuid = "004b0000",
+			.version = "1",
+			.type = "core",
+			.table = pme_power8
+		},
+			...
+
+		};
+
+After the 'pmu-events.c' is generated, it is compiled and the resulting
+'pmu-events.o' is added to 'libperf.a' which is then used to build perf.
+
+NOTES:
+	1. Several CPUs can support same set of events and hence use a common
+	   JSON file. Hence several entries in the pmu_events_map[] could map
+	   to a single 'PMU events table'.
+
+	2. The 'pmu-events.h' has an extern declaration for the mapping table
+	   and the generated 'pmu-events.c' defines this table.
+
+	3. _All_ known CPU tables for architecture are included in the perf
+	   binary.
+
+At run time, perf determines the actual CPU it is running on, finds the
+matching events table and builds aliases for those events. This allows
+users to specify events by their name:
+
+	$ perf stat -e pm_1plus_ppc_cmpl sleep 1
+
+where 'pm_1plus_ppc_cmpl' is a Power8 PMU event.
+
+However some errors in processing may cause the perf build to fail.
+
+Mapfile format
+===============
+
+The mapfile enables multiple CPU models to share a single set of PMU events.
+It is required even if such mapping is 1:1.
+
+The mapfile.csv format is expected to be:
+
+	Header line
+	CPUID,Version,Dir/path/name,Type
+
+where:
+
+	Comma:
+		is the required field delimiter (i.e other fields cannot
+		have commas within them).
+
+	Comments:
+		Lines in which the first character is either '\n' or '#'
+		are ignored.
+
+	Header line
+		The header line is the first line in the file, which is
+		always _IGNORED_. It can empty.
+
+	CPUID:
+		CPUID is an arch-specific char string, that can be used
+		to identify CPU (and associate it with a set of PMU events
+		it supports). Multiple CPUIDS can point to the same
+		File/path/name.json.
+
+		Example:
+			CPUID == 'GenuineIntel-6-2E' (on x86).
+			CPUID == '004b0100' (PVR value in Powerpc)
+	Version:
+		is the Version of the mapfile.
+
+	Dir/path/name:
+		is the pathname to the directory containing the CPU's JSON
+		files, relative to the directory containing the mapfile.csv
+
+	Type:
+		indicates whether the events or "core" or "uncore" events.
+
+
+	Eg:
+
+	$ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
+	GenuineIntel-6-37,V13,Silvermont_core,core
+	GenuineIntel-6-4D,V13,Silvermont_core,core
+	GenuineIntel-6-4C,V13,Silvermont_core,core
+
+	i.e the three CPU models use the JSON files (i.e PMU events) listed
+	in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'.
diff --git a/pmu-events/arch/arm64/arm/cortex-a53/branch.json b/pmu-events/arch/arm64/arm/cortex-a53/branch.json
new file mode 100644
index 0000000..0b0e6b2
--- /dev/null
+++ b/pmu-events/arch/arm64/arm/cortex-a53/branch.json
@@ -0,0 +1,25 @@
+[
+  {
+    "ArchStdEvent":  "BR_INDIRECT_SPEC",
+  },
+  {
+    "EventCode": "0xC9",
+    "EventName": "BR_COND",
+    "BriefDescription": "Conditional branch executed"
+  },
+  {
+    "EventCode": "0xCA",
+    "EventName": "BR_INDIRECT_MISPRED",
+    "BriefDescription": "Indirect branch mispredicted"
+  },
+  {
+    "EventCode": "0xCB",
+    "EventName": "BR_INDIRECT_MISPRED_ADDR",
+    "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+  },
+  {
+    "EventCode": "0xCC",
+    "EventName": "BR_COND_MISPRED",
+    "BriefDescription": "Conditional branch mispredicted"
+  }
+]
diff --git a/pmu-events/arch/arm64/arm/cortex-a53/bus.json b/pmu-events/arch/arm64/arm/cortex-a53/bus.json
new file mode 100644
index 0000000..ce33b25
--- /dev/null
+++ b/pmu-events/arch/arm64/arm/cortex-a53/bus.json
@@ -0,0 +1,8 @@
+[
+  {
+        "ArchStdEvent": "BUS_ACCESS_RD",
+  },
+  {
+        "ArchStdEvent": "BUS_ACCESS_WR",
+  }
+]
diff --git a/pmu-events/arch/arm64/arm/cortex-a53/cache.json b/pmu-events/arch/arm64/arm/cortex-a53/cache.json
new file mode 100644
index 0000000..5dfbec4
--- /dev/null
+++ b/pmu-events/arch/arm64/arm/cortex-a53/cache.json
@@ -0,0 +1,27 @@
+[
+  {
+        "EventCode": "0xC2",
+        "EventName": "PREFETCH_LINEFILL",
+        "BriefDescription": "Linefill because of prefetch"
+  },
+  {
+        "EventCode": "0xC3",
+        "EventName": "PREFETCH_LINEFILL_DROP",
+        "BriefDescription": "Instruction Cache Throttle occurred"
+  },
+  {
+        "EventCode": "0xC4",
+        "EventName": "READ_ALLOC_ENTER",
+        "BriefDescription": "Entering read allocate mode"
+  },
+  {
+        "EventCode": "0xC5",
+        "EventName": "READ_ALLOC",
+        "BriefDescription": "Read allocate mode"
+  },
+  {
+        "EventCode": "0xC8",
+        "EventName": "EXT_SNOOP",
+        "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+  }
+]
diff --git a/pmu-events/arch/arm64/arm/cortex-a53/memory.json b/pmu-events/arch/arm64/arm/cortex-a53/memory.json
new file mode 100644
index 0000000..25ae642
--- /dev/null
+++ b/pmu-events/arch/arm64/arm/cortex-a53/memory.json
@@ -0,0 +1,12 @@
+[
+  {
+    "EventCode": "0xC0",
+    "EventName": "EXT_MEM_REQ",
+    "BriefDescription": "External memory request"
+  },
+  {
+    "EventCode": "0xC1",
+    "EventName": "EXT_MEM_REQ_NC",
+    "BriefDescription": "Non-cacheable external memory request"
+  }
+]
diff --git a/pmu-events/arch/arm64/arm/cortex-a53/other.json b/pmu-events/arch/arm64/arm/cortex-a53/other.json
new file mode 100644
index 0000000..6cc6cbd
--- /dev/null
+++ b/pmu-events/arch/arm64/arm/cortex-a53/other.json
@@ -0,0 +1,28 @@
+[
+  {
+        "ArchStdEvent": "EXC_IRQ",
+  },
+  {
+        "ArchStdEvent": "EXC_FIQ",
+  },
+  {
+        "EventCode": "0xC6",
+        "EventName": "PRE_DECODE_ERR",
+        "BriefDescription": "Pre-decode error"
+  },
+  {
+        "EventCode": "0xD0",
+        "EventName": "L1I_CACHE_ERR",
+        "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+  },
+  {
+        "EventCode": "0xD1",
+        "EventName": "L1D_CACHE_ERR",
+        "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+  },
+  {
+        "EventCode": "0xD2",
+        "EventName": "TLB_ERR",
+        "BriefDescription": "TLB memory error"
+  }
+]
diff --git a/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json b/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
new file mode 100644
index 0000000..f45a6b5
--- /dev/null
+++ b/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
+[
+  {
+    "EventCode": "0xC7",
+    "EventName": "STALL_SB_FULL",
+    "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+  },
+  {
+    "EventCode": "0xE0",
+    "EventName": "OTHER_IQ_DEP_STALL",
+    "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+  },
+  {
+    "EventCode": "0xE1",
+    "EventName": "IC_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+  },
+  {
+    "EventCode": "0xE2",
+    "EventName": "IUTLB_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+  },
+  {
+    "EventCode": "0xE3",
+    "EventName": "DECODE_DEP_STALL",
+    "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+  },
+  {
+    "EventCode": "0xE4",
+    "EventName": "OTHER_INTERLOCK_STALL",
+    "BriefDescription": "Cycles there is an interlock other than  Advanced SIMD/Floating-point instructions or load/store instruction"
+  },
+  {
+    "EventCode": "0xE5",
+    "EventName": "AGU_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+  },
+  {
+    "EventCode": "0xE6",
+    "EventName": "SIMD_DEP_STALL",
+    "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+  },
+  {
+    "EventCode": "0xE7",
+    "EventName": "LD_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+  },
+  {
+    "EventCode": "0xE8",
+    "EventName": "ST_DEP_STALL",
+    "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+  }
+]
diff --git a/pmu-events/arch/arm64/armv8-recommended.json b/pmu-events/arch/arm64/armv8-recommended.json
new file mode 100644
index 0000000..6328828
--- /dev/null
+++ b/pmu-events/arch/arm64/armv8-recommended.json
@@ -0,0 +1,452 @@
+[
+    {
+        "PublicDescription": "Attributable Level 1 data cache access, read",
+        "EventCode": "0x40",
+        "EventName": "L1D_CACHE_RD",
+        "BriefDescription": "L1D cache access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache access, write",
+        "EventCode": "0x41",
+        "EventName": "L1D_CACHE_WR",
+        "BriefDescription": "L1D cache access, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, read",
+        "EventCode": "0x42",
+        "EventName": "L1D_CACHE_REFILL_RD",
+        "BriefDescription": "L1D cache refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, write",
+        "EventCode": "0x43",
+        "EventName": "L1D_CACHE_REFILL_WR",
+        "BriefDescription": "L1D cache refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, inner",
+        "EventCode": "0x44",
+        "EventName": "L1D_CACHE_REFILL_INNER",
+        "BriefDescription": "L1D cache refill, inner"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache refill, outer",
+        "EventCode": "0x45",
+        "EventName": "L1D_CACHE_REFILL_OUTER",
+        "BriefDescription": "L1D cache refill, outer"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache Write-Back, victim",
+        "EventCode": "0x46",
+        "EventName": "L1D_CACHE_WB_VICTIM",
+        "BriefDescription": "L1D cache Write-Back, victim"
+    },
+    {
+        "PublicDescription": "Level 1 data cache Write-Back, cleaning and coherency",
+        "EventCode": "0x47",
+        "EventName": "L1D_CACHE_WB_CLEAN",
+        "BriefDescription": "L1D cache Write-Back, cleaning and coherency"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data cache invalidate",
+        "EventCode": "0x48",
+        "EventName": "L1D_CACHE_INVAL",
+        "BriefDescription": "L1D cache invalidate"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data TLB refill, read",
+        "EventCode": "0x4C",
+        "EventName": "L1D_TLB_REFILL_RD",
+        "BriefDescription": "L1D tlb refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data TLB refill, write",
+        "EventCode": "0x4D",
+        "EventName": "L1D_TLB_REFILL_WR",
+        "BriefDescription": "L1D tlb refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
+        "EventCode": "0x4E",
+        "EventName": "L1D_TLB_RD",
+        "BriefDescription": "L1D tlb access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
+        "EventCode": "0x4F",
+        "EventName": "L1D_TLB_WR",
+        "BriefDescription": "L1D tlb access, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache access, read",
+        "EventCode": "0x50",
+        "EventName": "L2D_CACHE_RD",
+        "BriefDescription": "L2D cache access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache access, write",
+        "EventCode": "0x51",
+        "EventName": "L2D_CACHE_WR",
+        "BriefDescription": "L2D cache access, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache refill, read",
+        "EventCode": "0x52",
+        "EventName": "L2D_CACHE_REFILL_RD",
+        "BriefDescription": "L2D cache refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache refill, write",
+        "EventCode": "0x53",
+        "EventName": "L2D_CACHE_REFILL_WR",
+        "BriefDescription": "L2D cache refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache Write-Back, victim",
+        "EventCode": "0x56",
+        "EventName": "L2D_CACHE_WB_VICTIM",
+        "BriefDescription": "L2D cache Write-Back, victim"
+    },
+    {
+        "PublicDescription": "Level 2 data cache Write-Back, cleaning and coherency",
+        "EventCode": "0x57",
+        "EventName": "L2D_CACHE_WB_CLEAN",
+        "BriefDescription": "L2D cache Write-Back, cleaning and coherency"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data cache invalidate",
+        "EventCode": "0x58",
+        "EventName": "L2D_CACHE_INVAL",
+        "BriefDescription": "L2D cache invalidate"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB refill, read",
+        "EventCode": "0x5c",
+        "EventName": "L2D_TLB_REFILL_RD",
+        "BriefDescription": "L2D cache refill, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB refill, write",
+        "EventCode": "0x5d",
+        "EventName": "L2D_TLB_REFILL_WR",
+        "BriefDescription": "L2D cache refill, write"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB access, read",
+        "EventCode": "0x5e",
+        "EventName": "L2D_TLB_RD",
+        "BriefDescription": "L2D cache access, read"
+    },
+    {
+        "PublicDescription": "Attributable Level 2 data or unified TLB access, write",
+        "EventCode": "0x5f",
+        "EventName": "L2D_TLB_WR",
+        "BriefDescription": "L2D cache access, write"
+    },
+    {
+        "PublicDescription": "Bus access read",
+        "EventCode": "0x60",
+        "EventName": "BUS_ACCESS_RD",
+        "BriefDescription": "Bus access read"
+   },
+   {
+        "PublicDescription": "Bus access write",
+        "EventCode": "0x61",
+        "EventName": "BUS_ACCESS_WR",
+        "BriefDescription": "Bus access write"
+   }
+   {
+        "PublicDescription": "Bus access, Normal, Cacheable, Shareable",
+        "EventCode": "0x62",
+        "EventName": "BUS_ACCESS_SHARED",
+        "BriefDescription": "Bus access, Normal, Cacheable, Shareable"
+   }
+   {
+        "PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
+        "EventCode": "0x63",
+        "EventName": "BUS_ACCESS_NOT_SHARED",
+        "BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
+   }
+   {
+        "PublicDescription": "Bus access, Normal",
+        "EventCode": "0x64",
+        "EventName": "BUS_ACCESS_NORMAL",
+        "BriefDescription": "Bus access, Normal"
+   }
+   {
+        "PublicDescription": "Bus access, peripheral",
+        "EventCode": "0x65",
+        "EventName": "BUS_ACCESS_PERIPH",
+        "BriefDescription": "Bus access, peripheral"
+   }
+   {
+        "PublicDescription": "Data memory access, read",
+        "EventCode": "0x66",
+        "EventName": "MEM_ACCESS_RD",
+        "BriefDescription": "Data memory access, read"
+   }
+   {
+        "PublicDescription": "Data memory access, write",
+        "EventCode": "0x67",
+        "EventName": "MEM_ACCESS_WR",
+        "BriefDescription": "Data memory access, write"
+   }
+   {
+        "PublicDescription": "Unaligned access, read",
+        "EventCode": "0x68",
+        "EventName": "UNALIGNED_LD_SPEC",
+        "BriefDescription": "Unaligned access, read"
+   }
+   {
+        "PublicDescription": "Unaligned access, write",
+        "EventCode": "0x69",
+        "EventName": "UNALIGNED_ST_SPEC",
+        "BriefDescription": "Unaligned access, write"
+   }
+   {
+        "PublicDescription": "Unaligned access",
+        "EventCode": "0x6a",
+        "EventName": "UNALIGNED_LDST_SPEC",
+        "BriefDescription": "Unaligned access"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
+        "EventCode": "0x6c",
+        "EventName": "LDREX_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
+        "EventCode": "0x6d",
+        "EventName": "STREX_PASS_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
+        "EventCode": "0x6e",
+        "EventName": "STREX_FAIL_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
+   }
+   {
+        "PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
+        "EventCode": "0x6f",
+        "EventName": "STREX_SPEC",
+        "BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, load",
+        "EventCode": "0x70",
+        "EventName": "LD_SPEC",
+        "BriefDescription": "Operation speculatively executed, load"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, store"
+        "EventCode": "0x71",
+        "EventName": "ST_SPEC",
+        "BriefDescription": "Operation speculatively executed, store"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, load or store",
+        "EventCode": "0x72",
+        "EventName": "LDST_SPEC",
+        "BriefDescription": "Operation speculatively executed, load or store"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, integer data processing",
+        "EventCode": "0x73",
+        "EventName": "DP_SPEC",
+        "BriefDescription": "Operation speculatively executed, integer data processing"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
+        "EventCode": "0x74",
+        "EventName": "ASE_SPEC",
+        "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction",
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, floating-point instruction",
+        "EventCode": "0x75",
+        "EventName": "VFP_SPEC",
+        "BriefDescription": "Operation speculatively executed, floating-point instruction"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, software change of the PC",
+        "EventCode": "0x76",
+        "EventName": "PC_WRITE_SPEC",
+        "BriefDescription": "Operation speculatively executed, software change of the PC"
+   }
+   {
+        "PublicDescription": "Operation speculatively executed, Cryptographic instruction",
+        "EventCode": "0x77",
+        "EventName": "CRYPTO_SPEC",
+        "BriefDescription": "Operation speculatively executed, Cryptographic instruction"
+   }
+   {
+        "PublicDescription": "Branch speculatively executed, immediate branch"
+        "EventCode": "0x78",
+        "EventName": "BR_IMMED_SPEC",
+        "BriefDescription": "Branch speculatively executed, immediate branch"
+   }
+   {
+        "PublicDescription": "Branch speculatively executed, procedure return"
+        "EventCode": "0x79",
+        "EventName": "BR_RETURN_SPEC",
+        "BriefDescription": "Branch speculatively executed, procedure return"
+   }
+   {
+        "PublicDescription": "Branch speculatively executed, indirect branch"
+        "EventCode": "0x7a",
+        "EventName": "BR_INDIRECT_SPEC",
+        "BriefDescription": "Branch speculatively executed, indirect branch"
+   }
+   {
+        "PublicDescription": "Barrier speculatively executed, ISB"
+        "EventCode": "0x7c",
+        "EventName": "ISB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, ISB"
+   }
+   {
+        "PublicDescription": "Barrier speculatively executed, DSB"
+        "EventCode": "0x7d",
+        "EventName": "DSB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, DSB"
+   }
+   {
+        "PublicDescription": "Barrier speculatively executed, DMB"
+        "EventCode": "0x7e",
+        "EventName": "DMB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, DMB"
+   }
+   {
+        "PublicDescription": "Exception taken, Other synchronous"
+        "EventCode": "0x81",
+        "EventName": "EXC_UNDEF",
+        "BriefDescription": "Exception taken, Other synchronous"
+   }
+   {
+        "PublicDescription": "Exception taken, Supervisor Call"
+        "EventCode": "0x82",
+        "EventName": "EXC_SVC",
+        "BriefDescription": "Exception taken, Supervisor Call"
+   }
+   {
+        "PublicDescription": "Exception taken, Instruction Abort"
+        "EventCode": "0x83",
+        "EventName": "EXC_PABORT",
+        "BriefDescription": "Exception taken, Instruction Abort"
+   }
+   {
+        "PublicDescription": "Exception taken, Data Abort and SError"
+        "EventCode": "0x84",
+        "EventName": "EXC_DABORT",
+        "BriefDescription": "Exception taken, Data Abort and SError"
+   }
+   {
+        "PublicDescription": "Exception taken, IRQ"
+        "EventCode": "0x86",
+        "EventName": "EXC_IRQ",
+        "BriefDescription": "Exception taken, IRQ"
+   }
+   {
+        "PublicDescription": "Exception taken, FIQ"
+        "EventCode": "0x87",
+        "EventName": "EXC_FIQ",
+        "BriefDescription": "Exception taken, FIQ"
+   }
+   {
+        "PublicDescription": "Exception taken, Secure Monitor Call"
+        "EventCode": "0x88",
+        "EventName": "EXC_SMC",
+        "BriefDescription": "Exception taken, Secure Monitor Call"
+   }
+   {
+        "PublicDescription": "Exception taken, Hypervisor Call"
+        "EventCode": "0x8a",
+        "EventName": "EXC_HVC",
+        "BriefDescription": "Exception taken, Hypervisor Call"
+   }
+   {
+        "PublicDescription": "Exception taken, Instruction Abort not taken locally"
+        "EventCode": "0x8b",
+        "EventName": "EXC_TRAP_PABORT",
+        "BriefDescription": "Exception taken, Instruction Abort not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, Data Abort or SError not taken locally"
+        "EventCode": "0x8c",
+        "EventName": "EXC_TRAP_DABORT",
+        "BriefDescription": "Exception taken, Data Abort or SError not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, Other traps not taken locally"
+        "EventCode": "0x8d",
+        "EventName": "EXC_TRAP_OTHER",
+        "BriefDescription": "Exception taken, Other traps not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, IRQ not taken locally"
+        "EventCode": "0x8e",
+        "EventName": "EXC_TRAP_IRQ",
+        "BriefDescription": "Exception taken, IRQ not taken locally"
+   }
+   {
+        "PublicDescription": "Exception taken, FIQ not taken locally"
+        "EventCode": "0x8f",
+        "EventName": "EXC_TRAP_FIQ",
+        "BriefDescription": "Exception taken, FIQ not taken locally"
+   }
+   {
+        "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire"
+        "EventCode": "0x90",
+        "EventName": "RC_LD_SPEC",
+        "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
+   }
+   {
+        "PublicDescription": "Release consistency operation speculatively executed, Store-Release"
+        "EventCode": "0x91",
+        "EventName": "RC_ST_SPEC",
+        "BriefDescription": "Release consistency operation speculatively executed, Store-Release"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache access, read"
+        "EventCode": "0xa0",
+        "EventName": "L3D_CACHE_RD",
+        "BriefDescription": "Attributable Level 3 data or unified cache access, read"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache access, write"
+        "EventCode": "0xa1",
+        "EventName": "L3D_CACHE_WR",
+        "BriefDescription": "Attributable Level 3 data or unified cache access, write"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache refill, read"
+        "EventCode": "0xa2",
+        "EventName": "L3D_CACHE_REFILL_RD",
+        "BriefDescription": "Attributable Level 3 data or unified cache refill, read"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache refill, write"
+        "EventCode": "0xa3",
+        "EventName": "L3D_CACHE_REFILL_WR",
+        "BriefDescription": "Attributable Level 3 data or unified cache refill, write"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
+        "EventCode": "0xa6",
+        "EventName": "L3D_CACHE_WB_VICTIM",
+        "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
+        "EventCode": "0xa7",
+        "EventName": "L3D_CACHE_WB_CLEAN",
+        "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
+   }
+   {
+        "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate"
+        "EventCode": "0xa8",
+        "EventName": "L3D_CACHE_INVAL",
+        "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
+   }
+]
diff --git a/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
new file mode 100644
index 0000000..bc03c06
--- /dev/null
+++ b/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
@@ -0,0 +1,32 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_RD",
+   },
+   {
+        "ArchStdEvent": "BUS_ACCESS_WR",
+   }
+]
diff --git a/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json b/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json
new file mode 100644
index 0000000..9f0f15d
--- /dev/null
+++ b/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json
@@ -0,0 +1,122 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL",
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache prefetch access count",
+        "EventCode": "0x102e",
+        "EventName": "L1I_CACHE_PRF",
+        "BriefDescription": "L1I cache prefetch access count",
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache miss due to prefetch access count",
+        "EventCode": "0x102f",
+        "EventName": "L1I_CACHE_PRF_REFILL",
+        "BriefDescription": "L1I cache miss due to prefetch access count",
+    },
+    {
+        "PublicDescription": "Instruction queue is empty",
+        "EventCode": "0x1043",
+        "EventName": "IQ_IS_EMPTY",
+        "BriefDescription": "Instruction queue is empty",
+    },
+    {
+        "PublicDescription": "Instruction fetch stall cycles",
+        "EventCode": "0x1044",
+        "EventName": "IF_IS_STALL",
+        "BriefDescription": "Instruction fetch stall cycles",
+    },
+    {
+        "PublicDescription": "Instructions can receive, but not send",
+        "EventCode": "0x2014",
+        "EventName": "FETCH_BUBBLE",
+        "BriefDescription": "Instructions can receive, but not send",
+    },
+    {
+        "PublicDescription": "Prefetch request from LSU",
+        "EventCode": "0x6013",
+        "EventName": "PRF_REQ",
+        "BriefDescription": "Prefetch request from LSU",
+    },
+    {
+        "PublicDescription": "Hit on prefetched data",
+        "EventCode": "0x6014",
+        "EventName": "HIT_ON_PRF",
+        "BriefDescription": "Hit on prefetched data",
+    },
+    {
+        "PublicDescription": "Cycles of that the number of issuing micro operations are less than 4",
+        "EventCode": "0x7001",
+        "EventName": "EXE_STALL_CYCLE",
+        "BriefDescription": "Cycles of that the number of issue ups are less than 4",
+    },
+    {
+        "PublicDescription": "No any micro operation is issued and meanwhile any load operation is not resolved",
+        "EventCode": "0x7004",
+        "EventName": "MEM_STALL_ANYLOAD",
+        "BriefDescription": "No any micro operation is issued and meanwhile any load operation is not resolved",
+    },
+    {
+        "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill",
+        "EventCode": "0x7006",
+        "EventName": "MEM_STALL_L1MISS",
+        "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill",
+    },
+    {
+        "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache",
+        "EventCode": "0x7007",
+        "EventName": "MEM_STALL_L2MISS",
+        "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache",
+    },
+]
diff --git a/pmu-events/arch/arm64/mapfile.csv b/pmu-events/arch/arm64/mapfile.csv
new file mode 100644
index 0000000..f03e26e
--- /dev/null
+++ b/pmu-events/arch/arm64/mapfile.csv
@@ -0,0 +1,18 @@
+# Format:
+#	MIDR,Version,JSON/file/pathname,Type
+#
+# where
+#	MIDR	Processor version
+#		Variant[23:20] and Revision [3:0] should be zero.
+#	Version could be used to track version of of JSON file
+#		but currently unused.
+#	JSON/file/pathname is the path to JSON file, relative
+#		to tools/perf/pmu-events/arch/arm64/.
+#	Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core
+0x00000000420f5160,v1,cavium/thunderx2,core
+0x00000000430f0af0,v1,cavium/thunderx2,core
+0x00000000480fd010,v1,hisilicon/hip08,core
diff --git a/pmu-events/arch/powerpc/mapfile.csv b/pmu-events/arch/powerpc/mapfile.csv
new file mode 100644
index 0000000..229150e
--- /dev/null
+++ b/pmu-events/arch/powerpc/mapfile.csv
@@ -0,0 +1,17 @@
+# Format:
+# 	PVR,Version,JSON/file/pathname,Type
+#
+# where
+# 	PVR	Processor version
+# 	Version could be used to track version of of JSON file
+# 		but currently unused.
+# 	JSON/file/pathname is the path to JSON file, relative
+# 		to tools/perf/pmu-events/arch/powerpc/.
+# 	Type is core, uncore etc
+#
+# Multiple PVRs could map to a single JSON file.
+#
+
+# Power8 entries
+004[bcd][[:xdigit:]]{4},1,power8,core
+004e[[:xdigit:]]{4},1,power9,core
diff --git a/pmu-events/arch/powerpc/power8/cache.json b/pmu-events/arch/powerpc/power8/cache.json
new file mode 100644
index 0000000..4a3daa6
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/cache.json
@@ -0,0 +1,176 @@
+[
+  {,
+    "EventCode": "0x4c048",
+    "EventName": "PM_DATA_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c048",
+    "EventName": "PM_DATA_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c04c",
+    "EventName": "PM_DATA_FROM_DL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c042",
+    "EventName": "PM_DATA_FROM_L2",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x200fe",
+    "EventName": "PM_DATA_FROM_L2MISS",
+    "BriefDescription": "Demand LD - L2 Miss (not L2 hit)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1c04e",
+    "EventName": "PM_DATA_FROM_L2MISS_MOD",
+    "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c040",
+    "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x4c040",
+    "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c040",
+    "EventName": "PM_DATA_FROM_L2_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c040",
+    "EventName": "PM_DATA_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x4c042",
+    "EventName": "PM_DATA_FROM_L3",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x300fe",
+    "EventName": "PM_DATA_FROM_L3MISS",
+    "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c04e",
+    "EventName": "PM_DATA_FROM_L3MISS_MOD",
+    "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c042",
+    "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c042",
+    "EventName": "PM_DATA_FROM_L3_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c044",
+    "EventName": "PM_DATA_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c04c",
+    "EventName": "PM_DATA_FROM_LL4",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x4c04a",
+    "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c048",
+    "EventName": "PM_DATA_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c046",
+    "EventName": "PM_DATA_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c04a",
+    "EventName": "PM_DATA_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3001a",
+    "EventName": "PM_DATA_TABLEWALK_CYC",
+    "BriefDescription": "Tablwalk Cycles (could be 1 or 2 active)",
+    "PublicDescription": "Data Tablewalk Active"
+  },
+  {,
+    "EventCode": "0x4e04e",
+    "EventName": "PM_DPTEG_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd094",
+    "EventName": "PM_DSLB_MISS",
+    "BriefDescription": "Data SLB Miss - Total of all segment sizes",
+    "PublicDescription": "Data SLB Miss - Total of all segment sizesData SLB misses"
+  },
+  {,
+    "EventCode": "0x1002c",
+    "EventName": "PM_L1_DCACHE_RELOADED_ALL",
+    "BriefDescription": "L1 data cache reloaded for demand or prefetch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x300f6",
+    "EventName": "PM_L1_DCACHE_RELOAD_VALID",
+    "BriefDescription": "DL1 reloaded due to Demand Load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e054",
+    "EventName": "PM_LD_MISS_L1",
+    "BriefDescription": "Load Missed L1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x100ee",
+    "EventName": "PM_LD_REF_L1",
+    "BriefDescription": "All L1 D cache load references counted at finish, gated by reject",
+    "PublicDescription": "Load Ref count combined for all units"
+  },
+  {,
+    "EventCode": "0x300f0",
+    "EventName": "PM_ST_MISS_L1",
+    "BriefDescription": "Store Missed L1",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/floating-point.json b/pmu-events/arch/powerpc/power8/floating-point.json
new file mode 100644
index 0000000..5f1bb9f
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/floating-point.json
@@ -0,0 +1,14 @@
+[
+  {,
+    "EventCode": "0x2000e",
+    "EventName": "PM_FXU_BUSY",
+    "BriefDescription": "fxu0 busy and fxu1 busy",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1000e",
+    "EventName": "PM_FXU_IDLE",
+    "BriefDescription": "fxu0 idle and fxu1 idle",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/frontend.json b/pmu-events/arch/powerpc/power8/frontend.json
new file mode 100644
index 0000000..04c5f1b
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/frontend.json
@@ -0,0 +1,470 @@
+[
+  {,
+    "EventCode": "0x2505e",
+    "EventName": "PM_BACK_BR_CMPL",
+    "BriefDescription": "Branch instruction completed with a target address less than current instruction address",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10068",
+    "EventName": "PM_BRU_FIN",
+    "BriefDescription": "Branch Instruction Finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20036",
+    "EventName": "PM_BR_2PATH",
+    "BriefDescription": "two path branch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40060",
+    "EventName": "PM_BR_CMPL",
+    "BriefDescription": "Branch Instruction completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x400f6",
+    "EventName": "PM_BR_MPRED_CMPL",
+    "BriefDescription": "Number of Branch Mispredicts",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200fa",
+    "EventName": "PM_BR_TAKEN_CMPL",
+    "BriefDescription": "New event for Branch Taken",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10018",
+    "EventName": "PM_IC_DEMAND_CYC",
+    "BriefDescription": "Cycles when a demand ifetch was pending",
+    "PublicDescription": "Demand ifetch pending"
+  },
+  {,
+    "EventCode": "0x100f6",
+    "EventName": "PM_IERAT_RELOAD",
+    "BriefDescription": "Number of I-ERAT reloads",
+    "PublicDescription": "IERAT Reloaded (Miss)"
+  },
+  {,
+    "EventCode": "0x4006a",
+    "EventName": "PM_IERAT_RELOAD_16M",
+    "BriefDescription": "IERAT Reloaded (Miss) for a 16M page",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20064",
+    "EventName": "PM_IERAT_RELOAD_4K",
+    "BriefDescription": "IERAT Miss (Not implemented as DI on POWER6)",
+    "PublicDescription": "IERAT Reloaded (Miss) for a 4k page"
+  },
+  {,
+    "EventCode": "0x3006a",
+    "EventName": "PM_IERAT_RELOAD_64K",
+    "BriefDescription": "IERAT Reloaded (Miss) for a 64k page",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x14050",
+    "EventName": "PM_INST_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x2",
+    "EventName": "PM_INST_CMPL",
+    "BriefDescription": "Number of PowerPC Instructions that completed",
+    "PublicDescription": "PPC Instructions Finished (completed)"
+  },
+  {,
+    "EventCode": "0x200f2",
+    "EventName": "PM_INST_DISP",
+    "BriefDescription": "PPC Dispatched",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x44048",
+    "EventName": "PM_INST_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x34048",
+    "EventName": "PM_INST_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x3404c",
+    "EventName": "PM_INST_FROM_DL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x4404c",
+    "EventName": "PM_INST_FROM_DMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x14042",
+    "EventName": "PM_INST_FROM_L2",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x1404e",
+    "EventName": "PM_INST_FROM_L2MISS",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x34040",
+    "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x44040",
+    "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x24040",
+    "EventName": "PM_INST_FROM_L2_MEPF",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x14040",
+    "EventName": "PM_INST_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x44042",
+    "EventName": "PM_INST_FROM_L3",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x300fa",
+    "EventName": "PM_INST_FROM_L3MISS",
+    "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet",
+    "PublicDescription": "Inst from L3 miss"
+  },
+  {,
+    "EventCode": "0x4404e",
+    "EventName": "PM_INST_FROM_L3MISS_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
+    "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x34042",
+    "EventName": "PM_INST_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x24042",
+    "EventName": "PM_INST_FROM_L3_MEPF",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x14044",
+    "EventName": "PM_INST_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x1404c",
+    "EventName": "PM_INST_FROM_LL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x24048",
+    "EventName": "PM_INST_FROM_LMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x2404c",
+    "EventName": "PM_INST_FROM_MEMORY",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x4404a",
+    "EventName": "PM_INST_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x14048",
+    "EventName": "PM_INST_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x24046",
+    "EventName": "PM_INST_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x1404a",
+    "EventName": "PM_INST_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x2404a",
+    "EventName": "PM_INST_FROM_RL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x3404a",
+    "EventName": "PM_INST_FROM_RMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x24050",
+    "EventName": "PM_INST_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x24052",
+    "EventName": "PM_INST_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+  },
+  {,
+    "EventCode": "0x14052",
+    "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor an instruction fetch"
+  },
+  {,
+    "EventCode": "0x1003a",
+    "EventName": "PM_INST_IMC_MATCH_CMPL",
+    "BriefDescription": "IMC Match Count ( Not architected in P8)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x14054",
+    "EventName": "PM_INST_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch",
+    "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor an instruction fetch"
+  },
+  {,
+    "EventCode": "0x44052",
+    "EventName": "PM_INST_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch",
+    "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor an instruction fetch"
+  },
+  {,
+    "EventCode": "0x34050",
+    "EventName": "PM_INST_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x34052",
+    "EventName": "PM_INST_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+  },
+  {,
+    "EventCode": "0x44050",
+    "EventName": "PM_INST_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x45048",
+    "EventName": "PM_IPTEG_FROM_DL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x35048",
+    "EventName": "PM_IPTEG_FROM_DL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3504c",
+    "EventName": "PM_IPTEG_FROM_DL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4504c",
+    "EventName": "PM_IPTEG_FROM_DMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15042",
+    "EventName": "PM_IPTEG_FROM_L2",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1504e",
+    "EventName": "PM_IPTEG_FROM_L2MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x25040",
+    "EventName": "PM_IPTEG_FROM_L2_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15040",
+    "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x45042",
+    "EventName": "PM_IPTEG_FROM_L3",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4504e",
+    "EventName": "PM_IPTEG_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x35042",
+    "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x25042",
+    "EventName": "PM_IPTEG_FROM_L3_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15044",
+    "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1504c",
+    "EventName": "PM_IPTEG_FROM_LL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x25048",
+    "EventName": "PM_IPTEG_FROM_LMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2504c",
+    "EventName": "PM_IPTEG_FROM_MEMORY",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4504a",
+    "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15048",
+    "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x25046",
+    "EventName": "PM_IPTEG_FROM_RL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1504a",
+    "EventName": "PM_IPTEG_FROM_RL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2504a",
+    "EventName": "PM_IPTEG_FROM_RL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3504a",
+    "EventName": "PM_IPTEG_FROM_RMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd096",
+    "EventName": "PM_ISLB_MISS",
+    "BriefDescription": "I SLB Miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x400fc",
+    "EventName": "PM_ITLB_MISS",
+    "BriefDescription": "ITLB Reloaded (always zero on POWER6)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200fd",
+    "EventName": "PM_L1_ICACHE_MISS",
+    "BriefDescription": "Demand iCache Miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40012",
+    "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+    "BriefDescription": "Counts all Icache reloads includes demand, prefetchm prefetch turned into demand and demand turned into prefetch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30068",
+    "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+    "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x300f4",
+    "EventName": "PM_THRD_CONC_RUN_INST",
+    "BriefDescription": "PPC Instructions Finished when both threads in run_cycles",
+    "PublicDescription": "Concurrent Run Instructions"
+  },
+  {,
+    "EventCode": "0x30060",
+    "EventName": "PM_TM_TRANS_RUN_INST",
+    "BriefDescription": "Instructions completed in transactional state",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e014",
+    "EventName": "PM_TM_TX_PASS_RUN_INST",
+    "BriefDescription": "run instructions spent in successful transactions",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/marked.json b/pmu-events/arch/powerpc/power8/marked.json
new file mode 100644
index 0000000..dcdcede
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/marked.json
@@ -0,0 +1,794 @@
+[
+  {,
+    "EventCode": "0x3515e",
+    "EventName": "PM_MRK_BACK_BR_CMPL",
+    "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2013a",
+    "EventName": "PM_MRK_BRU_FIN",
+    "BriefDescription": "bru marked instr finish",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1016e",
+    "EventName": "PM_MRK_BR_CMPL",
+    "BriefDescription": "Branch Instruction completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x301e4",
+    "EventName": "PM_MRK_BR_MPRED_CMPL",
+    "BriefDescription": "Marked Branch Mispredicted",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x101e2",
+    "EventName": "PM_MRK_BR_TAKEN_CMPL",
+    "BriefDescription": "Marked Branch Taken completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d148",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d128",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d148",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c128",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d14c",
+    "EventName": "PM_MRK_DATA_FROM_DL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c12c",
+    "EventName": "PM_MRK_DATA_FROM_DL4_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d14c",
+    "EventName": "PM_MRK_DATA_FROM_DMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d12c",
+    "EventName": "PM_MRK_DATA_FROM_DMEM_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d142",
+    "EventName": "PM_MRK_DATA_FROM_L2",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d14e",
+    "EventName": "PM_MRK_DATA_FROM_L2MISS",
+    "BriefDescription": "Data cache reload L2 miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c12e",
+    "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC",
+    "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L2 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c122",
+    "EventName": "PM_MRK_DATA_FROM_L2_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d140",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c120",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d140",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d120",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d140",
+    "EventName": "PM_MRK_DATA_FROM_L2_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d120",
+    "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d140",
+    "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c120",
+    "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d142",
+    "EventName": "PM_MRK_DATA_FROM_L3",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x201e4",
+    "EventName": "PM_MRK_DATA_FROM_L3MISS",
+    "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d12e",
+    "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC",
+    "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L3 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d122",
+    "EventName": "PM_MRK_DATA_FROM_L3_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d142",
+    "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c122",
+    "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d142",
+    "EventName": "PM_MRK_DATA_FROM_L3_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d122",
+    "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d144",
+    "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c124",
+    "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d14c",
+    "EventName": "PM_MRK_DATA_FROM_LL4",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c12c",
+    "EventName": "PM_MRK_DATA_FROM_LL4_CYC",
+    "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d148",
+    "EventName": "PM_MRK_DATA_FROM_LMEM",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d128",
+    "EventName": "PM_MRK_DATA_FROM_LMEM_CYC",
+    "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d14c",
+    "EventName": "PM_MRK_DATA_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d12c",
+    "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC",
+    "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d14a",
+    "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d12a",
+    "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC",
+    "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d148",
+    "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c128",
+    "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC",
+    "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d146",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d126",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d14a",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c12a",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d14a",
+    "EventName": "PM_MRK_DATA_FROM_RL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d12a",
+    "EventName": "PM_MRK_DATA_FROM_RL4_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d14a",
+    "EventName": "PM_MRK_DATA_FROM_RMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c12a",
+    "EventName": "PM_MRK_DATA_FROM_RMEM_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40118",
+    "EventName": "PM_MRK_DCACHE_RELOAD_INTV",
+    "BriefDescription": "Combined Intervention event",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x301e6",
+    "EventName": "PM_MRK_DERAT_MISS",
+    "BriefDescription": "Erat Miss (TLB Access) All page sizes",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d154",
+    "EventName": "PM_MRK_DERAT_MISS_16G",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d154",
+    "EventName": "PM_MRK_DERAT_MISS_16M",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d156",
+    "EventName": "PM_MRK_DERAT_MISS_4K",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d154",
+    "EventName": "PM_MRK_DERAT_MISS_64K",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20132",
+    "EventName": "PM_MRK_DFU_FIN",
+    "BriefDescription": "Decimal Unit marked Instruction Finish",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f148",
+    "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f148",
+    "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f14c",
+    "EventName": "PM_MRK_DPTEG_FROM_DL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f14c",
+    "EventName": "PM_MRK_DPTEG_FROM_DMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f142",
+    "EventName": "PM_MRK_DPTEG_FROM_L2",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f14e",
+    "EventName": "PM_MRK_DPTEG_FROM_L2MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f140",
+    "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f140",
+    "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f142",
+    "EventName": "PM_MRK_DPTEG_FROM_L3",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f14e",
+    "EventName": "PM_MRK_DPTEG_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f142",
+    "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f142",
+    "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f144",
+    "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f14c",
+    "EventName": "PM_MRK_DPTEG_FROM_LL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f148",
+    "EventName": "PM_MRK_DPTEG_FROM_LMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f14c",
+    "EventName": "PM_MRK_DPTEG_FROM_MEMORY",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f14a",
+    "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f148",
+    "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f146",
+    "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f14a",
+    "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f14a",
+    "EventName": "PM_MRK_DPTEG_FROM_RL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f14a",
+    "EventName": "PM_MRK_DPTEG_FROM_RMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x401e4",
+    "EventName": "PM_MRK_DTLB_MISS",
+    "BriefDescription": "Marked dtlb miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d158",
+    "EventName": "PM_MRK_DTLB_MISS_16G",
+    "BriefDescription": "Marked Data TLB Miss page size 16G",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d156",
+    "EventName": "PM_MRK_DTLB_MISS_16M",
+    "BriefDescription": "Marked Data TLB Miss page size 16M",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d156",
+    "EventName": "PM_MRK_DTLB_MISS_4K",
+    "BriefDescription": "Marked Data TLB Miss page size 4k",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d156",
+    "EventName": "PM_MRK_DTLB_MISS_64K",
+    "BriefDescription": "Marked Data TLB Miss page size 64K",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40154",
+    "EventName": "PM_MRK_FAB_RSP_BKILL",
+    "BriefDescription": "Marked store had to do a bkill",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f150",
+    "EventName": "PM_MRK_FAB_RSP_BKILL_CYC",
+    "BriefDescription": "cycles L2 RC took for a bkill",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3015e",
+    "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY",
+    "BriefDescription": "Sampled store did a rwitm and got a rty",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30154",
+    "EventName": "PM_MRK_FAB_RSP_DCLAIM",
+    "BriefDescription": "Marked store had to do a dclaim",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f152",
+    "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC",
+    "BriefDescription": "cycles L2 RC took for a dclaim",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4015e",
+    "EventName": "PM_MRK_FAB_RSP_RD_RTY",
+    "BriefDescription": "Sampled L2 reads retry count",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1015e",
+    "EventName": "PM_MRK_FAB_RSP_RD_T_INTV",
+    "BriefDescription": "Sampled Read got a T intervention",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f150",
+    "EventName": "PM_MRK_FAB_RSP_RWITM_CYC",
+    "BriefDescription": "cycles L2 RC took for a rwitm",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2015e",
+    "EventName": "PM_MRK_FAB_RSP_RWITM_RTY",
+    "BriefDescription": "Sampled store did a rwitm and got a rty",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20134",
+    "EventName": "PM_MRK_FXU_FIN",
+    "BriefDescription": "fxu marked instr finish",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x401e0",
+    "EventName": "PM_MRK_INST_CMPL",
+    "BriefDescription": "marked instruction completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20130",
+    "EventName": "PM_MRK_INST_DECODED",
+    "BriefDescription": "marked instruction decoded",
+    "PublicDescription": "marked instruction decoded. Name from ISU?"
+  },
+  {,
+    "EventCode": "0x101e0",
+    "EventName": "PM_MRK_INST_DISP",
+    "BriefDescription": "The thread has dispatched a randomly sampled marked instruction",
+    "PublicDescription": "Marked Instruction dispatched"
+  },
+  {,
+    "EventCode": "0x30130",
+    "EventName": "PM_MRK_INST_FIN",
+    "BriefDescription": "marked instruction finished",
+    "PublicDescription": "marked instr finish any unit"
+  },
+  {,
+    "EventCode": "0x401e6",
+    "EventName": "PM_MRK_INST_FROM_L3MISS",
+    "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet",
+    "PublicDescription": "n/a"
+  },
+  {,
+    "EventCode": "0x10132",
+    "EventName": "PM_MRK_INST_ISSUED",
+    "BriefDescription": "Marked instruction issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40134",
+    "EventName": "PM_MRK_INST_TIMEO",
+    "BriefDescription": "marked Instruction finish timeout (instruction lost)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x101e4",
+    "EventName": "PM_MRK_L1_ICACHE_MISS",
+    "BriefDescription": "sampled Instruction suffered an icache Miss",
+    "PublicDescription": "Marked L1 Icache Miss"
+  },
+  {,
+    "EventCode": "0x101ea",
+    "EventName": "PM_MRK_L1_RELOAD_VALID",
+    "BriefDescription": "Marked demand reload",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20114",
+    "EventName": "PM_MRK_L2_RC_DISP",
+    "BriefDescription": "Marked Instruction RC dispatched in L2",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3012a",
+    "EventName": "PM_MRK_L2_RC_DONE",
+    "BriefDescription": "Marked RC done",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40116",
+    "EventName": "PM_MRK_LARX_FIN",
+    "BriefDescription": "Larx finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1013e",
+    "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC",
+    "BriefDescription": "Marked Load exposed Miss cycles",
+    "PublicDescription": "Marked Load exposed Miss (use edge detect to count #)"
+  },
+  {,
+    "EventCode": "0x201e2",
+    "EventName": "PM_MRK_LD_MISS_L1",
+    "BriefDescription": "Marked DL1 Demand Miss counted at exec time",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4013e",
+    "EventName": "PM_MRK_LD_MISS_L1_CYC",
+    "BriefDescription": "Marked ld latency",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40132",
+    "EventName": "PM_MRK_LSU_FIN",
+    "BriefDescription": "lsu marked instr finish",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20112",
+    "EventName": "PM_MRK_NTF_FIN",
+    "BriefDescription": "Marked next to finish instruction finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d15e",
+    "EventName": "PM_MRK_RUN_CYC",
+    "BriefDescription": "Marked run cycles",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3013e",
+    "EventName": "PM_MRK_STALL_CMPLU_CYC",
+    "BriefDescription": "Marked Group completion Stall",
+    "PublicDescription": "Marked Group Completion Stall cycles (use edge detect to count #)"
+  },
+  {,
+    "EventCode": "0x3e158",
+    "EventName": "PM_MRK_STCX_FAIL",
+    "BriefDescription": "marked stcx failed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10134",
+    "EventName": "PM_MRK_ST_CMPL",
+    "BriefDescription": "marked store completed and sent to nest",
+    "PublicDescription": "Marked store completed"
+  },
+  {,
+    "EventCode": "0x30134",
+    "EventName": "PM_MRK_ST_CMPL_INT",
+    "BriefDescription": "marked store finished with intervention",
+    "PublicDescription": "marked store complete (data home) with intervention"
+  },
+  {,
+    "EventCode": "0x3f150",
+    "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC",
+    "BriefDescription": "cycles to drain st from core to L2",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3012c",
+    "EventName": "PM_MRK_ST_FWD",
+    "BriefDescription": "Marked st forwards",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f150",
+    "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC",
+    "BriefDescription": "cycles from L2 rc disp to l2 rc completion",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20138",
+    "EventName": "PM_MRK_ST_NEST",
+    "BriefDescription": "Marked store sent to nest",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30132",
+    "EventName": "PM_MRK_VSU_FIN",
+    "BriefDescription": "VSU marked instr finish",
+    "PublicDescription": "vsu (fpu) marked instr finish"
+  },
+  {,
+    "EventCode": "0x3d15e",
+    "EventName": "PM_MULT_MRK",
+    "BriefDescription": "mult marked instr",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15152",
+    "EventName": "PM_SYNC_MRK_BR_LINK",
+    "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1515c",
+    "EventName": "PM_SYNC_MRK_BR_MPRED",
+    "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15156",
+    "EventName": "PM_SYNC_MRK_FX_DIVIDE",
+    "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15158",
+    "EventName": "PM_SYNC_MRK_L2HIT",
+    "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1515a",
+    "EventName": "PM_SYNC_MRK_L2MISS",
+    "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15154",
+    "EventName": "PM_SYNC_MRK_L3MISS",
+    "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15150",
+    "EventName": "PM_SYNC_MRK_PROBE_NOP",
+    "BriefDescription": "Marked probeNops which can cause synchronous interrupts",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/memory.json b/pmu-events/arch/powerpc/power8/memory.json
new file mode 100644
index 0000000..87cdaad
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/memory.json
@@ -0,0 +1,212 @@
+[
+  {,
+    "EventCode": "0x10050",
+    "EventName": "PM_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for all data types ( demand load,data,inst prefetch,inst fetch,xlate (I or d)"
+  },
+  {,
+    "EventCode": "0x1c050",
+    "EventName": "PM_DATA_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for a demand load"
+  },
+  {,
+    "EventCode": "0x4c04c",
+    "EventName": "PM_DATA_FROM_DMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c048",
+    "EventName": "PM_DATA_FROM_LMEM",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from the local chip's Memory due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c04c",
+    "EventName": "PM_DATA_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c04a",
+    "EventName": "PM_DATA_FROM_RL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c04a",
+    "EventName": "PM_DATA_FROM_RMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c050",
+    "EventName": "PM_DATA_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for a demand load"
+  },
+  {,
+    "EventCode": "0x2c052",
+    "EventName": "PM_DATA_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+  },
+  {,
+    "EventCode": "0x1c052",
+    "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor a demand load"
+  },
+  {,
+    "EventCode": "0x1c054",
+    "EventName": "PM_DATA_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c052",
+    "EventName": "PM_DATA_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load",
+    "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor a demand load"
+  },
+  {,
+    "EventCode": "0x3c050",
+    "EventName": "PM_DATA_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for a demand load"
+  },
+  {,
+    "EventCode": "0x3c052",
+    "EventName": "PM_DATA_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+  },
+  {,
+    "EventCode": "0x4c050",
+    "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for a demand load"
+  },
+  {,
+    "EventCode": "0x3e04c",
+    "EventName": "PM_DPTEG_FROM_DL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e04c",
+    "EventName": "PM_DPTEG_FROM_DMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e04a",
+    "EventName": "PM_DPTEG_FROM_RMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20050",
+    "EventName": "PM_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20052",
+    "EventName": "PM_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+  },
+  {,
+    "EventCode": "0x10052",
+    "EventName": "PM_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x18082",
+    "EventName": "PM_L3_CO_MEPF",
+    "BriefDescription": "L3 CO of line in Mep state ( includes casthrough",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c058",
+    "EventName": "PM_MEM_CO",
+    "BriefDescription": "Memory castouts from this lpar",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10058",
+    "EventName": "PM_MEM_LOC_THRESH_IFU",
+    "BriefDescription": "Local Memory above threshold for IFU speculation control",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40056",
+    "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
+    "BriefDescription": "Local memory above threshold for LSU medium",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1c05e",
+    "EventName": "PM_MEM_LOC_THRESH_LSU_MED",
+    "BriefDescription": "Local memory above theshold for data prefetch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c058",
+    "EventName": "PM_MEM_PREF",
+    "BriefDescription": "Memory prefetch for this lpar. Includes L4",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10056",
+    "EventName": "PM_MEM_READ",
+    "BriefDescription": "Reads from Memory from this lpar (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3c05e",
+    "EventName": "PM_MEM_RWITM",
+    "BriefDescription": "Memory rwitm for this lpar",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3006e",
+    "EventName": "PM_NEST_REF_CLK",
+    "BriefDescription": "Multiply by 4 to obtain the number of PB cycles",
+    "PublicDescription": "Nest reference clocks"
+  },
+  {,
+    "EventCode": "0x10054",
+    "EventName": "PM_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x40052",
+    "EventName": "PM_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x30050",
+    "EventName": "PM_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x30052",
+    "EventName": "PM_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+  },
+  {,
+    "EventCode": "0x40050",
+    "EventName": "PM_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/other.json b/pmu-events/arch/powerpc/power8/other.json
new file mode 100644
index 0000000..704302c
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/other.json
@@ -0,0 +1,4064 @@
+[
+  {,
+    "EventCode": "0x1f05e",
+    "EventName": "PM_1LPAR_CYC",
+    "BriefDescription": "Number of cycles in single lpar mode. All threads in the core are assigned to the same lpar",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2006e",
+    "EventName": "PM_2LPAR_CYC",
+    "BriefDescription": "Cycles in 2-lpar mode. Threads 0-3 belong to Lpar0 and threads 4-7 belong to Lpar1",
+    "PublicDescription": "Number of cycles in 2 lpar mode"
+  },
+  {,
+    "EventCode": "0x4e05e",
+    "EventName": "PM_4LPAR_CYC",
+    "BriefDescription": "Number of cycles in 4 LPAR mode. Threads 0-1 belong to lpar0, threads 2-3 belong to lpar1, threads 4-5 belong to lpar2, and threads 6-7 belong to lpar3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x610050",
+    "EventName": "PM_ALL_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for all data types ( demand load,data,inst prefetch,inst fetch,xlate (I or d)"
+  },
+  {,
+    "EventCode": "0x520050",
+    "EventName": "PM_ALL_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x620052",
+    "EventName": "PM_ALL_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+  },
+  {,
+    "EventCode": "0x610052",
+    "EventName": "PM_ALL_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x610054",
+    "EventName": "PM_ALL_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x640052",
+    "EventName": "PM_ALL_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x630050",
+    "EventName": "PM_ALL_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x630052",
+    "EventName": "PM_ALL_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+  },
+  {,
+    "EventCode": "0x640050",
+    "EventName": "PM_ALL_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x4082",
+    "EventName": "PM_BANK_CONFLICT",
+    "BriefDescription": "Read blocked due to interleave conflict. The ifar logic will detect an interleave conflict and kill the data that was read that cycle",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5086",
+    "EventName": "PM_BR_BC_8",
+    "BriefDescription": "Pairable BC+8 branch that has not been converted to a Resolve Finished in the BRU pipeline",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5084",
+    "EventName": "PM_BR_BC_8_CONV",
+    "BriefDescription": "Pairable BC+8 branch that was converted to a Resolve Finished in the BRU pipeline",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40ac",
+    "EventName": "PM_BR_MPRED_CCACHE",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40b8",
+    "EventName": "PM_BR_MPRED_CR",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the BHT Direction Prediction (taken/not taken)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40ae",
+    "EventName": "PM_BR_MPRED_LSTACK",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40ba",
+    "EventName": "PM_BR_MPRED_TA",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack. Only XL-form branches that resolved Taken set this event",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10138",
+    "EventName": "PM_BR_MRK_2PATH",
+    "BriefDescription": "marked two path branch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x409c",
+    "EventName": "PM_BR_PRED_BR0",
+    "BriefDescription": "Conditional Branch Completed on BR0 (1st branch in group) in which the HW predicted the Direction or Target",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x409e",
+    "EventName": "PM_BR_PRED_BR1",
+    "BriefDescription": "Conditional Branch Completed on BR1 (2nd branch in group) in which the HW predicted the Direction or Target. Note: BR1 can only be used in Single Thread Mode. In all of the SMT modes, only one branch can complete, thus BR1 is unused",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x489c",
+    "EventName": "PM_BR_PRED_BR_CMPL",
+    "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0) OR if_pc_br0_br_pred(1)",
+    "PublicDescription": "IFU"
+  },
+  {,
+    "EventCode": "0x40a4",
+    "EventName": "PM_BR_PRED_CCACHE_BR0",
+    "BriefDescription": "Conditional Branch Completed on BR0 that used the Count Cache for Target Prediction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40a6",
+    "EventName": "PM_BR_PRED_CCACHE_BR1",
+    "BriefDescription": "Conditional Branch Completed on BR1 that used the Count Cache for Target Prediction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x48a4",
+    "EventName": "PM_BR_PRED_CCACHE_CMPL",
+    "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0) AND if_pc_br0_pred_type",
+    "PublicDescription": "IFU"
+  },
+  {,
+    "EventCode": "0x40b0",
+    "EventName": "PM_BR_PRED_CR_BR0",
+    "BriefDescription": "Conditional Branch Completed on BR0 that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40b2",
+    "EventName": "PM_BR_PRED_CR_BR1",
+    "BriefDescription": "Conditional Branch Completed on BR1 that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x48b0",
+    "EventName": "PM_BR_PRED_CR_CMPL",
+    "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(1)='1'",
+    "PublicDescription": "IFU"
+  },
+  {,
+    "EventCode": "0x40a8",
+    "EventName": "PM_BR_PRED_LSTACK_BR0",
+    "BriefDescription": "Conditional Branch Completed on BR0 that used the Link Stack for Target Prediction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40aa",
+    "EventName": "PM_BR_PRED_LSTACK_BR1",
+    "BriefDescription": "Conditional Branch Completed on BR1 that used the Link Stack for Target Prediction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x48a8",
+    "EventName": "PM_BR_PRED_LSTACK_CMPL",
+    "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0) AND (not if_pc_br0_pred_type)",
+    "PublicDescription": "IFU"
+  },
+  {,
+    "EventCode": "0x40b4",
+    "EventName": "PM_BR_PRED_TA_BR0",
+    "BriefDescription": "Conditional Branch Completed on BR0 that had its target address predicted. Only XL-form branches set this event",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40b6",
+    "EventName": "PM_BR_PRED_TA_BR1",
+    "BriefDescription": "Conditional Branch Completed on BR1 that had its target address predicted. Only XL-form branches set this event",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x48b4",
+    "EventName": "PM_BR_PRED_TA_CMPL",
+    "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0)='1'",
+    "PublicDescription": "IFU"
+  },
+  {,
+    "EventCode": "0x40a0",
+    "EventName": "PM_BR_UNCOND_BR0",
+    "BriefDescription": "Unconditional Branch Completed on BR0. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was coverted to a Resolve",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40a2",
+    "EventName": "PM_BR_UNCOND_BR1",
+    "BriefDescription": "Unconditional Branch Completed on BR1. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was coverted to a Resolve",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x48a0",
+    "EventName": "PM_BR_UNCOND_CMPL",
+    "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred=00 AND if_pc_br0_completed",
+    "PublicDescription": "IFU"
+  },
+  {,
+    "EventCode": "0x3094",
+    "EventName": "PM_CASTOUT_ISSUED",
+    "BriefDescription": "Castouts issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3096",
+    "EventName": "PM_CASTOUT_ISSUED_GPR",
+    "BriefDescription": "Castouts issued GPR",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2090",
+    "EventName": "PM_CLB_HELD",
+    "BriefDescription": "CLB Hold: Any Reason",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d018",
+    "EventName": "PM_CMPLU_STALL_BRU_CRU",
+    "BriefDescription": "Completion stall due to IFU",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30026",
+    "EventName": "PM_CMPLU_STALL_COQ_FULL",
+    "BriefDescription": "Completion stall due to CO q full",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30038",
+    "EventName": "PM_CMPLU_STALL_FLUSH",
+    "BriefDescription": "completion stall due to flush by own thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30028",
+    "EventName": "PM_CMPLU_STALL_MEM_ECC_DELAY",
+    "BriefDescription": "Completion stall due to mem ECC delay",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e01c",
+    "EventName": "PM_CMPLU_STALL_NO_NTF",
+    "BriefDescription": "Completion stall due to nop",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e01e",
+    "EventName": "PM_CMPLU_STALL_NTCG_FLUSH",
+    "BriefDescription": "Completion stall due to ntcg flush",
+    "PublicDescription": "Completion stall due to reject (load hit store)"
+  },
+  {,
+    "EventCode": "0x4c010",
+    "EventName": "PM_CMPLU_STALL_REJECT",
+    "BriefDescription": "Completion stall due to LSU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c01a",
+    "EventName": "PM_CMPLU_STALL_REJECT_LHS",
+    "BriefDescription": "Completion stall due to reject (load hit store)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c014",
+    "EventName": "PM_CMPLU_STALL_REJ_LMQ_FULL",
+    "BriefDescription": "Completion stall due to LSU reject LMQ full",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d010",
+    "EventName": "PM_CMPLU_STALL_SCALAR",
+    "BriefDescription": "Completion stall due to VSU scalar instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d010",
+    "EventName": "PM_CMPLU_STALL_SCALAR_LONG",
+    "BriefDescription": "Completion stall due to VSU scalar long latency instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c014",
+    "EventName": "PM_CMPLU_STALL_STORE",
+    "BriefDescription": "Completion stall by stores this includes store agen finishes in pipe LS0/LS1 and store data finishes in LS2/LS3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d014",
+    "EventName": "PM_CMPLU_STALL_VECTOR",
+    "BriefDescription": "Completion stall due to VSU vector instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d012",
+    "EventName": "PM_CMPLU_STALL_VECTOR_LONG",
+    "BriefDescription": "Completion stall due to VSU vector long instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d012",
+    "EventName": "PM_CMPLU_STALL_VSU",
+    "BriefDescription": "Completion stall due to VSU instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x16083",
+    "EventName": "PM_CO0_ALLOC",
+    "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x16082",
+    "EventName": "PM_CO0_BUSY",
+    "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x517082",
+    "EventName": "PM_CO_DISP_FAIL",
+    "BriefDescription": "CO dispatch failed due to all CO machines being busy",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x527084",
+    "EventName": "PM_CO_TM_SC_FOOTPRINT",
+    "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3608a",
+    "EventName": "PM_CO_USAGE",
+    "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40066",
+    "EventName": "PM_CRU_FIN",
+    "BriefDescription": "IFU Finished a (non-branch) instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x61c050",
+    "EventName": "PM_DATA_ALL_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for either demand loads or data prefetch",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for a demand load"
+  },
+  {,
+    "EventCode": "0x64c048",
+    "EventName": "PM_DATA_ALL_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c048",
+    "EventName": "PM_DATA_ALL_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c04c",
+    "EventName": "PM_DATA_ALL_FROM_DL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c04c",
+    "EventName": "PM_DATA_ALL_FROM_DMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c042",
+    "EventName": "PM_DATA_ALL_FROM_L2",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c046",
+    "EventName": "PM_DATA_ALL_FROM_L21_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c046",
+    "EventName": "PM_DATA_ALL_FROM_L21_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c04e",
+    "EventName": "PM_DATA_ALL_FROM_L2MISS_MOD",
+    "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c040",
+    "EventName": "PM_DATA_ALL_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c040",
+    "EventName": "PM_DATA_ALL_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c040",
+    "EventName": "PM_DATA_ALL_FROM_L2_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c040",
+    "EventName": "PM_DATA_ALL_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c042",
+    "EventName": "PM_DATA_ALL_FROM_L3",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c044",
+    "EventName": "PM_DATA_ALL_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c044",
+    "EventName": "PM_DATA_ALL_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c044",
+    "EventName": "PM_DATA_ALL_FROM_L31_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c046",
+    "EventName": "PM_DATA_ALL_FROM_L31_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c04e",
+    "EventName": "PM_DATA_ALL_FROM_L3MISS_MOD",
+    "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c042",
+    "EventName": "PM_DATA_ALL_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c042",
+    "EventName": "PM_DATA_ALL_FROM_L3_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c044",
+    "EventName": "PM_DATA_ALL_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c04c",
+    "EventName": "PM_DATA_ALL_FROM_LL4",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c048",
+    "EventName": "PM_DATA_ALL_FROM_LMEM",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from the local chip's Memory due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c04c",
+    "EventName": "PM_DATA_ALL_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x64c04a",
+    "EventName": "PM_DATA_ALL_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c048",
+    "EventName": "PM_DATA_ALL_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c046",
+    "EventName": "PM_DATA_ALL_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x61c04a",
+    "EventName": "PM_DATA_ALL_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c04a",
+    "EventName": "PM_DATA_ALL_FROM_RL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x63c04a",
+    "EventName": "PM_DATA_ALL_FROM_RMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either demand loads or data prefetch",
+    "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x62c050",
+    "EventName": "PM_DATA_ALL_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for either demand loads or data prefetch",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for a demand load"
+  },
+  {,
+    "EventCode": "0x62c052",
+    "EventName": "PM_DATA_ALL_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for either demand loads or data prefetch",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+  },
+  {,
+    "EventCode": "0x61c052",
+    "EventName": "PM_DATA_ALL_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for either demand loads or data prefetch",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor a demand load"
+  },
+  {,
+    "EventCode": "0x61c054",
+    "EventName": "PM_DATA_ALL_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for either demand loads or data prefetch",
+    "PublicDescription": "Pump prediction correct. Counts across all types of pumps for a demand load"
+  },
+  {,
+    "EventCode": "0x64c052",
+    "EventName": "PM_DATA_ALL_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for either demand loads or data prefetch",
+    "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor a demand load"
+  },
+  {,
+    "EventCode": "0x63c050",
+    "EventName": "PM_DATA_ALL_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for either demand loads or data prefetch",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for a demand load"
+  },
+  {,
+    "EventCode": "0x63c052",
+    "EventName": "PM_DATA_ALL_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for either demand loads or data prefetch",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+  },
+  {,
+    "EventCode": "0x64c050",
+    "EventName": "PM_DATA_ALL_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for either demand loads or data prefetch",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for a demand load"
+  },
+  {,
+    "EventCode": "0x4c046",
+    "EventName": "PM_DATA_FROM_L21_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c046",
+    "EventName": "PM_DATA_FROM_L21_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x4c044",
+    "EventName": "PM_DATA_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x3c044",
+    "EventName": "PM_DATA_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x2c044",
+    "EventName": "PM_DATA_FROM_L31_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x1c046",
+    "EventName": "PM_DATA_FROM_L31_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load",
+    "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+  },
+  {,
+    "EventCode": "0x400fe",
+    "EventName": "PM_DATA_FROM_MEM",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load",
+    "PublicDescription": "Data cache reload from memory (including L4)"
+  },
+  {,
+    "EventCode": "0xe0bc",
+    "EventName": "PM_DC_COLLISIONS",
+    "BriefDescription": "DATA Cache collisions",
+    "PublicDescription": "DATA Cache collisions42"
+  },
+  {,
+    "EventCode": "0x1e050",
+    "EventName": "PM_DC_PREF_STREAM_ALLOC",
+    "BriefDescription": "Stream marked valid. The stream could have been allocated through the hardware prefetch mechanism or through software. This is combined ls0 and ls1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e050",
+    "EventName": "PM_DC_PREF_STREAM_CONF",
+    "BriefDescription": "A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Combine up + down",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e050",
+    "EventName": "PM_DC_PREF_STREAM_FUZZY_CONF",
+    "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e050",
+    "EventName": "PM_DC_PREF_STREAM_STRIDED_CONF",
+    "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0ba",
+    "EventName": "PM_DFU",
+    "BriefDescription": "Finish DFU (all finish)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0be",
+    "EventName": "PM_DFU_DCFFIX",
+    "BriefDescription": "Convert from fixed opcode finish (dcffix,dcffixq)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0bc",
+    "EventName": "PM_DFU_DENBCD",
+    "BriefDescription": "BCD->DPD opcode finish (denbcd, denbcdq)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0b8",
+    "EventName": "PM_DFU_MC",
+    "BriefDescription": "Finish DFU multicycle",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2092",
+    "EventName": "PM_DISP_CLB_HELD_BAL",
+    "BriefDescription": "Dispatch/CLB Hold: Balance",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2094",
+    "EventName": "PM_DISP_CLB_HELD_RES",
+    "BriefDescription": "Dispatch/CLB Hold: Resource",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20a8",
+    "EventName": "PM_DISP_CLB_HELD_SB",
+    "BriefDescription": "Dispatch/CLB Hold: Scoreboard",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2098",
+    "EventName": "PM_DISP_CLB_HELD_SYNC",
+    "BriefDescription": "Dispatch/CLB Hold: Sync type instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2096",
+    "EventName": "PM_DISP_CLB_HELD_TLBIE",
+    "BriefDescription": "Dispatch Hold: Due to TLBIE",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20006",
+    "EventName": "PM_DISP_HELD_IQ_FULL",
+    "BriefDescription": "Dispatch held due to Issue q full",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1002a",
+    "EventName": "PM_DISP_HELD_MAP_FULL",
+    "BriefDescription": "Dispatch for this thread was held because the Mappers were full",
+    "PublicDescription": "Dispatch held due to Mapper full"
+  },
+  {,
+    "EventCode": "0x30018",
+    "EventName": "PM_DISP_HELD_SRQ_FULL",
+    "BriefDescription": "Dispatch held due SRQ no room",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30a6",
+    "EventName": "PM_DISP_HOLD_GCT_FULL",
+    "BriefDescription": "Dispatch Hold Due to no space in the GCT",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30008",
+    "EventName": "PM_DISP_WT",
+    "BriefDescription": "Dispatched Starved",
+    "PublicDescription": "Dispatched Starved (not held, nothing to dispatch)"
+  },
+  {,
+    "EventCode": "0x4e046",
+    "EventName": "PM_DPTEG_FROM_L21_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e046",
+    "EventName": "PM_DPTEG_FROM_L21_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e040",
+    "EventName": "PM_DPTEG_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e040",
+    "EventName": "PM_DPTEG_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e044",
+    "EventName": "PM_DPTEG_FROM_L31_ECO_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e044",
+    "EventName": "PM_DPTEG_FROM_L31_ECO_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e044",
+    "EventName": "PM_DPTEG_FROM_L31_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e046",
+    "EventName": "PM_DPTEG_FROM_L31_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50a8",
+    "EventName": "PM_EAT_FORCE_MISPRED",
+    "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issue",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4084",
+    "EventName": "PM_EAT_FULL_CYC",
+    "BriefDescription": "Cycles No room in EAT",
+    "PublicDescription": "Cycles No room in EATSet on bank conflict and case where no ibuffers available"
+  },
+  {,
+    "EventCode": "0x2080",
+    "EventName": "PM_EE_OFF_EXT_INT",
+    "BriefDescription": "Ee off and external interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20b4",
+    "EventName": "PM_FAV_TBEGIN",
+    "BriefDescription": "Dispatch time Favored tbegin",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x100f4",
+    "EventName": "PM_FLOP",
+    "BriefDescription": "Floating Point Operation Finished",
+    "PublicDescription": "Floating Point Operations Finished"
+  },
+  {,
+    "EventCode": "0xa0ae",
+    "EventName": "PM_FLOP_SUM_SCALAR",
+    "BriefDescription": "flops summary scalar instructions",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0ac",
+    "EventName": "PM_FLOP_SUM_VEC",
+    "BriefDescription": "flops summary vector instructions",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2084",
+    "EventName": "PM_FLUSH_BR_MPRED",
+    "BriefDescription": "Flush caused by branch mispredict",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2082",
+    "EventName": "PM_FLUSH_DISP",
+    "BriefDescription": "Dispatch flush",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x208c",
+    "EventName": "PM_FLUSH_DISP_SB",
+    "BriefDescription": "Dispatch Flush: Scoreboard",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2088",
+    "EventName": "PM_FLUSH_DISP_SYNC",
+    "BriefDescription": "Dispatch Flush: Sync",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x208a",
+    "EventName": "PM_FLUSH_DISP_TLBIE",
+    "BriefDescription": "Dispatch Flush: TLBIE",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x208e",
+    "EventName": "PM_FLUSH_LSU",
+    "BriefDescription": "Flush initiated by LSU",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2086",
+    "EventName": "PM_FLUSH_PARTIAL",
+    "BriefDescription": "Partial flush",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0b0",
+    "EventName": "PM_FPU0_FCONV",
+    "BriefDescription": "Convert instruction executed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0b8",
+    "EventName": "PM_FPU0_FEST",
+    "BriefDescription": "Estimate instruction executed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0b4",
+    "EventName": "PM_FPU0_FRSP",
+    "BriefDescription": "Round to single precision instruction executed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0b2",
+    "EventName": "PM_FPU1_FCONV",
+    "BriefDescription": "Convert instruction executed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0ba",
+    "EventName": "PM_FPU1_FEST",
+    "BriefDescription": "Estimate instruction executed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0b6",
+    "EventName": "PM_FPU1_FRSP",
+    "BriefDescription": "Round to single precision instruction executed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50b0",
+    "EventName": "PM_FUSION_TOC_GRP0_1",
+    "BriefDescription": "One pair of instructions fused with TOC in Group0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50ae",
+    "EventName": "PM_FUSION_TOC_GRP0_2",
+    "BriefDescription": "Two pairs of instructions fused with TOCin Group0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50ac",
+    "EventName": "PM_FUSION_TOC_GRP0_3",
+    "BriefDescription": "Three pairs of instructions fused with TOC in Group0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50b2",
+    "EventName": "PM_FUSION_TOC_GRP1_1",
+    "BriefDescription": "One pair of instructions fused with TOX in Group1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50b8",
+    "EventName": "PM_FUSION_VSX_GRP0_1",
+    "BriefDescription": "One pair of instructions fused with VSX in Group0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50b6",
+    "EventName": "PM_FUSION_VSX_GRP0_2",
+    "BriefDescription": "Two pairs of instructions fused with VSX in Group0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50b4",
+    "EventName": "PM_FUSION_VSX_GRP0_3",
+    "BriefDescription": "Three pairs of instructions fused with VSX in Group0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50ba",
+    "EventName": "PM_FUSION_VSX_GRP1_1",
+    "BriefDescription": "One pair of instructions fused with VSX in Group1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3000e",
+    "EventName": "PM_FXU0_BUSY_FXU1_IDLE",
+    "BriefDescription": "fxu0 busy and fxu1 idle",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10004",
+    "EventName": "PM_FXU0_FIN",
+    "BriefDescription": "The fixed point unit Unit 0 finished an instruction. Instructions that finish may not necessary complete",
+    "PublicDescription": "FXU0 Finished"
+  },
+  {,
+    "EventCode": "0x4000e",
+    "EventName": "PM_FXU1_BUSY_FXU0_IDLE",
+    "BriefDescription": "fxu0 idle and fxu1 busy",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40004",
+    "EventName": "PM_FXU1_FIN",
+    "BriefDescription": "FXU1 Finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20008",
+    "EventName": "PM_GCT_EMPTY_CYC",
+    "BriefDescription": "No itags assigned either thread (GCT Empty)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30a4",
+    "EventName": "PM_GCT_MERGE",
+    "BriefDescription": "Group dispatched on a merged GCT empty. GCT entries can be merged only within the same thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d01e",
+    "EventName": "PM_GCT_NOSLOT_BR_MPRED",
+    "BriefDescription": "Gct empty for this thread due to branch mispred",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d01a",
+    "EventName": "PM_GCT_NOSLOT_BR_MPRED_ICMISS",
+    "BriefDescription": "Gct empty for this thread due to Icache Miss and branch mispred",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x100f8",
+    "EventName": "PM_GCT_NOSLOT_CYC",
+    "BriefDescription": "No itags assigned",
+    "PublicDescription": "Pipeline empty (No itags assigned , no GCT slots used)"
+  },
+  {,
+    "EventCode": "0x2d01e",
+    "EventName": "PM_GCT_NOSLOT_DISP_HELD_ISSQ",
+    "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to Issue q full",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d01c",
+    "EventName": "PM_GCT_NOSLOT_DISP_HELD_MAP",
+    "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to Mapper full",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e010",
+    "EventName": "PM_GCT_NOSLOT_DISP_HELD_OTHER",
+    "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to sync",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d01c",
+    "EventName": "PM_GCT_NOSLOT_DISP_HELD_SRQ",
+    "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to SRQ full",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e010",
+    "EventName": "PM_GCT_NOSLOT_IC_L3MISS",
+    "BriefDescription": "Gct empty for this thread due to icach l3 miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d01a",
+    "EventName": "PM_GCT_NOSLOT_IC_MISS",
+    "BriefDescription": "Gct empty for this thread due to Icache Miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20a2",
+    "EventName": "PM_GCT_UTIL_11_14_ENTRIES",
+    "BriefDescription": "GCT Utilization 11-14 entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20a4",
+    "EventName": "PM_GCT_UTIL_15_17_ENTRIES",
+    "BriefDescription": "GCT Utilization 15-17 entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20a6",
+    "EventName": "PM_GCT_UTIL_18_ENTRIES",
+    "BriefDescription": "GCT Utilization 18+ entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x209c",
+    "EventName": "PM_GCT_UTIL_1_2_ENTRIES",
+    "BriefDescription": "GCT Utilization 1-2 entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x209e",
+    "EventName": "PM_GCT_UTIL_3_6_ENTRIES",
+    "BriefDescription": "GCT Utilization 3-6 entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20a0",
+    "EventName": "PM_GCT_UTIL_7_10_ENTRIES",
+    "BriefDescription": "GCT Utilization 7-10 entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1000a",
+    "EventName": "PM_GRP_BR_MPRED_NONSPEC",
+    "BriefDescription": "Group experienced non-speculative branch redirect",
+    "PublicDescription": "Group experienced Non-speculative br mispredicct"
+  },
+  {,
+    "EventCode": "0x30004",
+    "EventName": "PM_GRP_CMPL",
+    "BriefDescription": "group completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3000a",
+    "EventName": "PM_GRP_DISP",
+    "BriefDescription": "group dispatch",
+    "PublicDescription": "dispatch_success (Group Dispatched)"
+  },
+  {,
+    "EventCode": "0x1000c",
+    "EventName": "PM_GRP_IC_MISS_NONSPEC",
+    "BriefDescription": "Group experienced non-speculative I cache miss",
+    "PublicDescription": "Group experi enced Non-specu lative I cache miss"
+  },
+  {,
+    "EventCode": "0x10130",
+    "EventName": "PM_GRP_MRK",
+    "BriefDescription": "Instruction Marked",
+    "PublicDescription": "Instruction marked in idu"
+  },
+  {,
+    "EventCode": "0x509c",
+    "EventName": "PM_GRP_NON_FULL_GROUP",
+    "BriefDescription": "GROUPs where we did not have 6 non branch instructions in the group(ST mode), in SMT mode 3 non branches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50a4",
+    "EventName": "PM_GRP_TERM_2ND_BRANCH",
+    "BriefDescription": "There were enough instructions in the Ibuffer, but 2nd branch ends group",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50a6",
+    "EventName": "PM_GRP_TERM_FPU_AFTER_BR",
+    "BriefDescription": "There were enough instructions in the Ibuffer, but FPU OP IN same group after a branch terminates a group, cant do partial flushes",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x509e",
+    "EventName": "PM_GRP_TERM_NOINST",
+    "BriefDescription": "Do not fill every slot in the group, Not enough instructions in the Ibuffer. This includes cases where the group started with enough instructions, but some got knocked out by a cache miss or branch redirect (which would also empty the Ibuffer)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50a0",
+    "EventName": "PM_GRP_TERM_OTHER",
+    "BriefDescription": "There were enough instructions in the Ibuffer, but the group terminated early for some other reason, most likely due to a First or Last",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x50a2",
+    "EventName": "PM_GRP_TERM_SLOT_LIMIT",
+    "BriefDescription": "There were enough instructions in the Ibuffer, but 3 src RA/RB/RC , 2 way crack caused a group termination",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4086",
+    "EventName": "PM_IBUF_FULL_CYC",
+    "BriefDescription": "Cycles No room in ibuff",
+    "PublicDescription": "Cycles No room in ibufffully qualified transfer (if5 valid)"
+  },
+  {,
+    "EventCode": "0x4098",
+    "EventName": "PM_IC_DEMAND_L2_BHT_REDIRECT",
+    "BriefDescription": "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x409a",
+    "EventName": "PM_IC_DEMAND_L2_BR_REDIRECT",
+    "BriefDescription": "L2 I cache demand request due to branch Mispredict ( 15 cycle path)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4088",
+    "EventName": "PM_IC_DEMAND_REQ",
+    "BriefDescription": "Demand Instruction fetch request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x508a",
+    "EventName": "PM_IC_INVALIDATE",
+    "BriefDescription": "Ic line invalidated",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4092",
+    "EventName": "PM_IC_PREF_CANCEL_HIT",
+    "BriefDescription": "Prefetch Canceled due to icache hit",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4094",
+    "EventName": "PM_IC_PREF_CANCEL_L2",
+    "BriefDescription": "L2 Squashed request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4090",
+    "EventName": "PM_IC_PREF_CANCEL_PAGE",
+    "BriefDescription": "Prefetch Canceled due to page boundary",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x408a",
+    "EventName": "PM_IC_PREF_REQ",
+    "BriefDescription": "Instruction prefetch requests",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x408e",
+    "EventName": "PM_IC_PREF_WRITE",
+    "BriefDescription": "Instruction prefetch written into IL1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4096",
+    "EventName": "PM_IC_RELOAD_PRIVATE",
+    "BriefDescription": "Reloading line was brought in private for a specific thread. Most lines are brought in shared for all eight thrreads. If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was invalidat",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5088",
+    "EventName": "PM_IFU_L2_TOUCH",
+    "BriefDescription": "L2 touch to update MRU on a line",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x514050",
+    "EventName": "PM_INST_ALL_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for instruction fetches and prefetches",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x544048",
+    "EventName": "PM_INST_ALL_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x534048",
+    "EventName": "PM_INST_ALL_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x53404c",
+    "EventName": "PM_INST_ALL_FROM_DL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x54404c",
+    "EventName": "PM_INST_ALL_FROM_DMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x514042",
+    "EventName": "PM_INST_ALL_FROM_L2",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x544046",
+    "EventName": "PM_INST_ALL_FROM_L21_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x534046",
+    "EventName": "PM_INST_ALL_FROM_L21_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x51404e",
+    "EventName": "PM_INST_ALL_FROM_L2MISS",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x534040",
+    "EventName": "PM_INST_ALL_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x544040",
+    "EventName": "PM_INST_ALL_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x524040",
+    "EventName": "PM_INST_ALL_FROM_L2_MEPF",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x514040",
+    "EventName": "PM_INST_ALL_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x544042",
+    "EventName": "PM_INST_ALL_FROM_L3",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x544044",
+    "EventName": "PM_INST_ALL_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x534044",
+    "EventName": "PM_INST_ALL_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x524044",
+    "EventName": "PM_INST_ALL_FROM_L31_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x514046",
+    "EventName": "PM_INST_ALL_FROM_L31_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x54404e",
+    "EventName": "PM_INST_ALL_FROM_L3MISS_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
+    "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x534042",
+    "EventName": "PM_INST_ALL_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x524042",
+    "EventName": "PM_INST_ALL_FROM_L3_MEPF",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x514044",
+    "EventName": "PM_INST_ALL_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x51404c",
+    "EventName": "PM_INST_ALL_FROM_LL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x524048",
+    "EventName": "PM_INST_ALL_FROM_LMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x52404c",
+    "EventName": "PM_INST_ALL_FROM_MEMORY",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x54404a",
+    "EventName": "PM_INST_ALL_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x514048",
+    "EventName": "PM_INST_ALL_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x524046",
+    "EventName": "PM_INST_ALL_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x51404a",
+    "EventName": "PM_INST_ALL_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x52404a",
+    "EventName": "PM_INST_ALL_FROM_RL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x53404a",
+    "EventName": "PM_INST_ALL_FROM_RMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to instruction fetches and prefetches",
+    "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x524050",
+    "EventName": "PM_INST_ALL_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for instruction fetches and prefetches",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x524052",
+    "EventName": "PM_INST_ALL_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for instruction fetches and prefetches",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+  },
+  {,
+    "EventCode": "0x514052",
+    "EventName": "PM_INST_ALL_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for instruction fetches and prefetches",
+    "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor an instruction fetch"
+  },
+  {,
+    "EventCode": "0x514054",
+    "EventName": "PM_INST_ALL_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for instruction fetches and prefetches",
+    "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor an instruction fetch"
+  },
+  {,
+    "EventCode": "0x544052",
+    "EventName": "PM_INST_ALL_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for instruction fetches and prefetches",
+    "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor an instruction fetch"
+  },
+  {,
+    "EventCode": "0x534050",
+    "EventName": "PM_INST_ALL_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for instruction fetches and prefetches",
+    "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x534052",
+    "EventName": "PM_INST_ALL_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for instruction fetches and prefetches",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+  },
+  {,
+    "EventCode": "0x544050",
+    "EventName": "PM_INST_ALL_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for instruction fetches and prefetches",
+    "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x4080",
+    "EventName": "PM_INST_FROM_L1",
+    "BriefDescription": "Instruction fetches from L1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x44046",
+    "EventName": "PM_INST_FROM_L21_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x34046",
+    "EventName": "PM_INST_FROM_L21_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x44044",
+    "EventName": "PM_INST_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x34044",
+    "EventName": "PM_INST_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x24044",
+    "EventName": "PM_INST_FROM_L31_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x14046",
+    "EventName": "PM_INST_FROM_L31_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)",
+    "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+  },
+  {,
+    "EventCode": "0x30016",
+    "EventName": "PM_INST_IMC_MATCH_DISP",
+    "BriefDescription": "Matched Instructions Dispatched",
+    "PublicDescription": "IMC Matches dispatched"
+  },
+  {,
+    "EventCode": "0x30014",
+    "EventName": "PM_IOPS_DISP",
+    "BriefDescription": "Internal Operations dispatched",
+    "PublicDescription": "IOPS dispatched"
+  },
+  {,
+    "EventCode": "0x45046",
+    "EventName": "PM_IPTEG_FROM_L21_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x35046",
+    "EventName": "PM_IPTEG_FROM_L21_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x35040",
+    "EventName": "PM_IPTEG_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x45040",
+    "EventName": "PM_IPTEG_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x45044",
+    "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x35044",
+    "EventName": "PM_IPTEG_FROM_L31_ECO_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x25044",
+    "EventName": "PM_IPTEG_FROM_L31_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x15046",
+    "EventName": "PM_IPTEG_FROM_L31_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x617082",
+    "EventName": "PM_ISIDE_DISP",
+    "BriefDescription": "All i-side dispatch attempts",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x627084",
+    "EventName": "PM_ISIDE_DISP_FAIL",
+    "BriefDescription": "All i-side dispatch attempts that failed due to a addr collision with another machine",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x627086",
+    "EventName": "PM_ISIDE_DISP_FAIL_OTHER",
+    "BriefDescription": "All i-side dispatch attempts that failed due to a reason other than addrs collision",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4608e",
+    "EventName": "PM_ISIDE_L2MEMACC",
+    "BriefDescription": "valid when first beat of data comes in for an i-side fetch where data came from mem(or L4)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x44608e",
+    "EventName": "PM_ISIDE_MRU_TOUCH",
+    "BriefDescription": "Iside L2 MRU touch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30ac",
+    "EventName": "PM_ISU_REF_FX0",
+    "BriefDescription": "FX0 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30ae",
+    "EventName": "PM_ISU_REF_FX1",
+    "BriefDescription": "FX1 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x38ac",
+    "EventName": "PM_ISU_REF_FXU",
+    "BriefDescription": "FXU ISU reject from either pipe",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30b0",
+    "EventName": "PM_ISU_REF_LS0",
+    "BriefDescription": "LS0 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30b2",
+    "EventName": "PM_ISU_REF_LS1",
+    "BriefDescription": "LS1 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30b4",
+    "EventName": "PM_ISU_REF_LS2",
+    "BriefDescription": "LS2 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30b6",
+    "EventName": "PM_ISU_REF_LS3",
+    "BriefDescription": "LS3 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x309c",
+    "EventName": "PM_ISU_REJECTS_ALL",
+    "BriefDescription": "All isu rejects could be more than 1 per cycle",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30a2",
+    "EventName": "PM_ISU_REJECT_RES_NA",
+    "BriefDescription": "ISU reject due to resource not available",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x309e",
+    "EventName": "PM_ISU_REJECT_SAR_BYPASS",
+    "BriefDescription": "Reject because of SAR bypass",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30a0",
+    "EventName": "PM_ISU_REJECT_SRC_NA",
+    "BriefDescription": "ISU reject due to source not available",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30a8",
+    "EventName": "PM_ISU_REJ_VS0",
+    "BriefDescription": "VS0 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30aa",
+    "EventName": "PM_ISU_REJ_VS1",
+    "BriefDescription": "VS1 ISU reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x38a8",
+    "EventName": "PM_ISU_REJ_VSU",
+    "BriefDescription": "VSU ISU reject from either pipe",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30b8",
+    "EventName": "PM_ISYNC",
+    "BriefDescription": "Isync count per thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200301ea",
+    "EventName": "PM_L1MISS_LAT_EXC_1024",
+    "BriefDescription": "L1 misses that took longer than 1024 cyles to resolve (miss to reload)",
+    "PublicDescription": "Reload latency exceeded 1024 cyc"
+  },
+  {,
+    "EventCode": "0x200401ec",
+    "EventName": "PM_L1MISS_LAT_EXC_2048",
+    "BriefDescription": "L1 misses that took longer than 2048 cyles to resolve (miss to reload)",
+    "PublicDescription": "Reload latency exceeded 2048 cyc"
+  },
+  {,
+    "EventCode": "0x200101e8",
+    "EventName": "PM_L1MISS_LAT_EXC_256",
+    "BriefDescription": "L1 misses that took longer than 256 cyles to resolve (miss to reload)",
+    "PublicDescription": "Reload latency exceeded 256 cyc"
+  },
+  {,
+    "EventCode": "0x200201e6",
+    "EventName": "PM_L1MISS_LAT_EXC_32",
+    "BriefDescription": "L1 misses that took longer than 32 cyles to resolve (miss to reload)",
+    "PublicDescription": "Reload latency exceeded 32 cyc"
+  },
+  {,
+    "EventCode": "0x26086",
+    "EventName": "PM_L1PF_L2MEMACC",
+    "BriefDescription": "valid when first beat of data comes in for an L1pref where data came from mem(or L4)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x408c",
+    "EventName": "PM_L1_DEMAND_WRITE",
+    "BriefDescription": "Instruction Demand sectors wriittent into IL1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x417080",
+    "EventName": "PM_L2_CASTOUT_MOD",
+    "BriefDescription": "L2 Castouts - Modified (M, Mu, Me)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x417082",
+    "EventName": "PM_L2_CASTOUT_SHR",
+    "BriefDescription": "L2 Castouts - Shared (T, Te, Si, S)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x27084",
+    "EventName": "PM_L2_CHIP_PUMP",
+    "BriefDescription": "RC requests that were local on chip pump attempts",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x427086",
+    "EventName": "PM_L2_DC_INV",
+    "BriefDescription": "Dcache invalidates from L2",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x44608c",
+    "EventName": "PM_L2_DISP_ALL_L2MISS",
+    "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x27086",
+    "EventName": "PM_L2_GROUP_PUMP",
+    "BriefDescription": "RC requests that were on Node Pump attempts",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x626084",
+    "EventName": "PM_L2_GRP_GUESS_CORRECT",
+    "BriefDescription": "L2 guess grp and guess was correct (data intra-6chip AND ^on-chip)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x626086",
+    "EventName": "PM_L2_GRP_GUESS_WRONG",
+    "BriefDescription": "L2 guess grp and guess was not correct (ie data on-chip OR beyond-6chip)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x427084",
+    "EventName": "PM_L2_IC_INV",
+    "BriefDescription": "Icache Invalidates from L2",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x436088",
+    "EventName": "PM_L2_INST",
+    "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x43608a",
+    "EventName": "PM_L2_INST_MISS",
+    "BriefDescription": "All successful i-side dispatches that were an L2miss for this thread (excludes i_l2mru_tch reqs)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x416080",
+    "EventName": "PM_L2_LD",
+    "BriefDescription": "All successful D-side Load dispatches for this thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x437088",
+    "EventName": "PM_L2_LD_DISP",
+    "BriefDescription": "All successful load dispatches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x43708a",
+    "EventName": "PM_L2_LD_HIT",
+    "BriefDescription": "All successful load dispatches that were L2 hits",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x426084",
+    "EventName": "PM_L2_LD_MISS",
+    "BriefDescription": "All successful D-Side Load dispatches that were an L2miss for this thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x616080",
+    "EventName": "PM_L2_LOC_GUESS_CORRECT",
+    "BriefDescription": "L2 guess loc and guess was correct (ie data local)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x616082",
+    "EventName": "PM_L2_LOC_GUESS_WRONG",
+    "BriefDescription": "L2 guess loc and guess was not correct (ie data not on chip)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x516080",
+    "EventName": "PM_L2_RCLD_DISP",
+    "BriefDescription": "L2 RC load dispatch attempt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x516082",
+    "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR",
+    "BriefDescription": "L2 RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x526084",
+    "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER",
+    "BriefDescription": "L2 RC load dispatch attempt failed due to other reasons",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x536088",
+    "EventName": "PM_L2_RCST_DISP",
+    "BriefDescription": "L2 RC store dispatch attempt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x53608a",
+    "EventName": "PM_L2_RCST_DISP_FAIL_ADDR",
+    "BriefDescription": "L2 RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x54608c",
+    "EventName": "PM_L2_RCST_DISP_FAIL_OTHER",
+    "BriefDescription": "L2 RC store dispatch attempt failed due to other reasons",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x537088",
+    "EventName": "PM_L2_RC_ST_DONE",
+    "BriefDescription": "RC did st to line that was Tx or Sx",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x63708a",
+    "EventName": "PM_L2_RTY_LD",
+    "BriefDescription": "RC retries on PB for any load from core",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3708a",
+    "EventName": "PM_L2_RTY_ST",
+    "BriefDescription": "RC retries on PB for any store from core",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x54708c",
+    "EventName": "PM_L2_SN_M_RD_DONE",
+    "BriefDescription": "SNP dispatched for a read and was M",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x54708e",
+    "EventName": "PM_L2_SN_M_WR_DONE",
+    "BriefDescription": "SNP dispatched for a write and was M",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x53708a",
+    "EventName": "PM_L2_SN_SX_I_DONE",
+    "BriefDescription": "SNP dispatched and went from Sx or Tx to Ix",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x17080",
+    "EventName": "PM_L2_ST",
+    "BriefDescription": "All successful D-side store dispatches for this thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x44708c",
+    "EventName": "PM_L2_ST_DISP",
+    "BriefDescription": "All successful store dispatches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x44708e",
+    "EventName": "PM_L2_ST_HIT",
+    "BriefDescription": "All successful store dispatches that were L2Hits",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x17082",
+    "EventName": "PM_L2_ST_MISS",
+    "BriefDescription": "All successful D-side store dispatches for this thread that were L2 Miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x636088",
+    "EventName": "PM_L2_SYS_GUESS_CORRECT",
+    "BriefDescription": "L2 guess sys and guess was correct (ie data beyond-6chip)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x63608a",
+    "EventName": "PM_L2_SYS_GUESS_WRONG",
+    "BriefDescription": "L2 guess sys and guess was not correct (ie data ^beyond-6chip)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x617080",
+    "EventName": "PM_L2_SYS_PUMP",
+    "BriefDescription": "RC requests that were system pump attempts",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e05e",
+    "EventName": "PM_L2_TM_REQ_ABORT",
+    "BriefDescription": "TM abort",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e05c",
+    "EventName": "PM_L2_TM_ST_ABORT_SISTER",
+    "BriefDescription": "TM marked store abort",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x23808a",
+    "EventName": "PM_L3_CINJ",
+    "BriefDescription": "l3 ci of cache inject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x128084",
+    "EventName": "PM_L3_CI_HIT",
+    "BriefDescription": "L3 Castins Hit (total count",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x128086",
+    "EventName": "PM_L3_CI_MISS",
+    "BriefDescription": "L3 castins miss (total count",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x819082",
+    "EventName": "PM_L3_CI_USAGE",
+    "BriefDescription": "rotating sample of 16 CI or CO actives",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x438088",
+    "EventName": "PM_L3_CO",
+    "BriefDescription": "l3 castout occurring ( does not include casthrough or log writes (cinj/dmaw)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x83908b",
+    "EventName": "PM_L3_CO0_ALLOC",
+    "BriefDescription": "lifetime, sample of CO machine 0 valid",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x83908a",
+    "EventName": "PM_L3_CO0_BUSY",
+    "BriefDescription": "lifetime, sample of CO machine 0 valid",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x28086",
+    "EventName": "PM_L3_CO_L31",
+    "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 ( lossy)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x238088",
+    "EventName": "PM_L3_CO_LCO",
+    "BriefDescription": "Total L3 castouts occurred on LCO",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x28084",
+    "EventName": "PM_L3_CO_MEM",
+    "BriefDescription": "L3 CO to memory OR of port 0 and 1 ( lossy)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb19082",
+    "EventName": "PM_L3_GRP_GUESS_CORRECT",
+    "BriefDescription": "Initial scope=group and data from same group (near) (pred successful)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb3908a",
+    "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH",
+    "BriefDescription": "Initial scope=group but data from local node. Predition too high",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb39088",
+    "EventName": "PM_L3_GRP_GUESS_WRONG_LOW",
+    "BriefDescription": "Initial scope=group but data from outside group (far or rem). Prediction too Low",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x218080",
+    "EventName": "PM_L3_HIT",
+    "BriefDescription": "L3 Hits",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x138088",
+    "EventName": "PM_L3_L2_CO_HIT",
+    "BriefDescription": "L2 castout hits",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x13808a",
+    "EventName": "PM_L3_L2_CO_MISS",
+    "BriefDescription": "L2 castout miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x14808c",
+    "EventName": "PM_L3_LAT_CI_HIT",
+    "BriefDescription": "L3 Lateral Castins Hit",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x14808e",
+    "EventName": "PM_L3_LAT_CI_MISS",
+    "BriefDescription": "L3 Lateral Castins Miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x228084",
+    "EventName": "PM_L3_LD_HIT",
+    "BriefDescription": "L3 demand LD Hits",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x228086",
+    "EventName": "PM_L3_LD_MISS",
+    "BriefDescription": "L3 demand LD Miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e052",
+    "EventName": "PM_L3_LD_PREF",
+    "BriefDescription": "L3 Load Prefetches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb19080",
+    "EventName": "PM_L3_LOC_GUESS_CORRECT",
+    "BriefDescription": "initial scope=node/chip and data from local node (local) (pred successful)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb29086",
+    "EventName": "PM_L3_LOC_GUESS_WRONG",
+    "BriefDescription": "Initial scope=node but data from out side local node (near or far or rem). Prediction too Low",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x218082",
+    "EventName": "PM_L3_MISS",
+    "BriefDescription": "L3 Misses",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x54808c",
+    "EventName": "PM_L3_P0_CO_L31",
+    "BriefDescription": "l3 CO to L3.1 (lco) port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x538088",
+    "EventName": "PM_L3_P0_CO_MEM",
+    "BriefDescription": "l3 CO to memory port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x929084",
+    "EventName": "PM_L3_P0_CO_RTY",
+    "BriefDescription": "L3 CO received retry port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa29084",
+    "EventName": "PM_L3_P0_GRP_PUMP",
+    "BriefDescription": "L3 pf sent with grp scope port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x528084",
+    "EventName": "PM_L3_P0_LCO_DATA",
+    "BriefDescription": "lco sent with data port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x518080",
+    "EventName": "PM_L3_P0_LCO_NO_DATA",
+    "BriefDescription": "dataless l3 lco sent port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa4908c",
+    "EventName": "PM_L3_P0_LCO_RTY",
+    "BriefDescription": "L3 LCO received retry port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa19080",
+    "EventName": "PM_L3_P0_NODE_PUMP",
+    "BriefDescription": "L3 pf sent with nodal scope port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x919080",
+    "EventName": "PM_L3_P0_PF_RTY",
+    "BriefDescription": "L3 PF received retry port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x939088",
+    "EventName": "PM_L3_P0_SN_HIT",
+    "BriefDescription": "L3 snoop hit port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x118080",
+    "EventName": "PM_L3_P0_SN_INV",
+    "BriefDescription": "Port0 snooper detects someone doing a store to a line thats Sx",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x94908c",
+    "EventName": "PM_L3_P0_SN_MISS",
+    "BriefDescription": "L3 snoop miss port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa39088",
+    "EventName": "PM_L3_P0_SYS_PUMP",
+    "BriefDescription": "L3 pf sent with sys scope port 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x54808e",
+    "EventName": "PM_L3_P1_CO_L31",
+    "BriefDescription": "l3 CO to L3.1 (lco) port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x53808a",
+    "EventName": "PM_L3_P1_CO_MEM",
+    "BriefDescription": "l3 CO to memory port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x929086",
+    "EventName": "PM_L3_P1_CO_RTY",
+    "BriefDescription": "L3 CO received retry port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa29086",
+    "EventName": "PM_L3_P1_GRP_PUMP",
+    "BriefDescription": "L3 pf sent with grp scope port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x528086",
+    "EventName": "PM_L3_P1_LCO_DATA",
+    "BriefDescription": "lco sent with data port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x518082",
+    "EventName": "PM_L3_P1_LCO_NO_DATA",
+    "BriefDescription": "dataless l3 lco sent port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa4908e",
+    "EventName": "PM_L3_P1_LCO_RTY",
+    "BriefDescription": "L3 LCO received retry port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa19082",
+    "EventName": "PM_L3_P1_NODE_PUMP",
+    "BriefDescription": "L3 pf sent with nodal scope port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x919082",
+    "EventName": "PM_L3_P1_PF_RTY",
+    "BriefDescription": "L3 PF received retry port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x93908a",
+    "EventName": "PM_L3_P1_SN_HIT",
+    "BriefDescription": "L3 snoop hit port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x118082",
+    "EventName": "PM_L3_P1_SN_INV",
+    "BriefDescription": "Port1 snooper detects someone doing a store to a line thats Sx",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x94908e",
+    "EventName": "PM_L3_P1_SN_MISS",
+    "BriefDescription": "L3 snoop miss port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa3908a",
+    "EventName": "PM_L3_P1_SYS_PUMP",
+    "BriefDescription": "L3 pf sent with sys scope port 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x84908d",
+    "EventName": "PM_L3_PF0_ALLOC",
+    "BriefDescription": "lifetime, sample of PF machine 0 valid",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x84908c",
+    "EventName": "PM_L3_PF0_BUSY",
+    "BriefDescription": "lifetime, sample of PF machine 0 valid",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x428084",
+    "EventName": "PM_L3_PF_HIT_L3",
+    "BriefDescription": "l3 pf hit in l3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x18080",
+    "EventName": "PM_L3_PF_MISS_L3",
+    "BriefDescription": "L3 Prefetch missed in L3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3808a",
+    "EventName": "PM_L3_PF_OFF_CHIP_CACHE",
+    "BriefDescription": "L3 Prefetch from Off chip cache",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4808e",
+    "EventName": "PM_L3_PF_OFF_CHIP_MEM",
+    "BriefDescription": "L3 Prefetch from Off chip memory",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x38088",
+    "EventName": "PM_L3_PF_ON_CHIP_CACHE",
+    "BriefDescription": "L3 Prefetch from On chip cache",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4808c",
+    "EventName": "PM_L3_PF_ON_CHIP_MEM",
+    "BriefDescription": "L3 Prefetch from On chip memory",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x829084",
+    "EventName": "PM_L3_PF_USAGE",
+    "BriefDescription": "rotating sample of 32 PF actives",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e052",
+    "EventName": "PM_L3_PREF_ALL",
+    "BriefDescription": "Total HW L3 prefetches(Load+store)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x84908f",
+    "EventName": "PM_L3_RD0_ALLOC",
+    "BriefDescription": "lifetime, sample of RD machine 0 valid",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x84908e",
+    "EventName": "PM_L3_RD0_BUSY",
+    "BriefDescription": "lifetime, sample of RD machine 0 valid",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x829086",
+    "EventName": "PM_L3_RD_USAGE",
+    "BriefDescription": "rotating sample of 16 RD actives",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x839089",
+    "EventName": "PM_L3_SN0_ALLOC",
+    "BriefDescription": "lifetime, sample of snooper machine 0 valid",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x839088",
+    "EventName": "PM_L3_SN0_BUSY",
+    "BriefDescription": "lifetime, sample of snooper machine 0 valid",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x819080",
+    "EventName": "PM_L3_SN_USAGE",
+    "BriefDescription": "rotating sample of 8 snoop valids",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e052",
+    "EventName": "PM_L3_ST_PREF",
+    "BriefDescription": "L3 store Prefetches",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e052",
+    "EventName": "PM_L3_SW_PREF",
+    "BriefDescription": "Data stream touchto L3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb29084",
+    "EventName": "PM_L3_SYS_GUESS_CORRECT",
+    "BriefDescription": "Initial scope=system and data from outside group (far or rem)(pred successful)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb4908c",
+    "EventName": "PM_L3_SYS_GUESS_WRONG",
+    "BriefDescription": "Initial scope=system but data from local or near. Predction too high",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x24808e",
+    "EventName": "PM_L3_TRANS_PF",
+    "BriefDescription": "L3 Transient prefetch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x18081",
+    "EventName": "PM_L3_WI0_ALLOC",
+    "BriefDescription": "lifetime, sample of Write Inject machine 0 valid",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x418080",
+    "EventName": "PM_L3_WI0_BUSY",
+    "BriefDescription": "lifetime, sample of Write Inject machine 0 valid",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x418082",
+    "EventName": "PM_L3_WI_USAGE",
+    "BriefDescription": "rotating sample of 8 WI actives",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc080",
+    "EventName": "PM_LD_REF_L1_LSU0",
+    "BriefDescription": "LS0 L1 D cache load references counted at finish, gated by reject",
+    "PublicDescription": "LS0 L1 D cache load references counted at finish, gated by rejectLSU0 L1 D cache load references"
+  },
+  {,
+    "EventCode": "0xc082",
+    "EventName": "PM_LD_REF_L1_LSU1",
+    "BriefDescription": "LS1 L1 D cache load references counted at finish, gated by reject",
+    "PublicDescription": "LS1 L1 D cache load references counted at finish, gated by rejectLSU1 L1 D cache load references"
+  },
+  {,
+    "EventCode": "0xc094",
+    "EventName": "PM_LD_REF_L1_LSU2",
+    "BriefDescription": "LS2 L1 D cache load references counted at finish, gated by reject",
+    "PublicDescription": "LS2 L1 D cache load references counted at finish, gated by reject42"
+  },
+  {,
+    "EventCode": "0xc096",
+    "EventName": "PM_LD_REF_L1_LSU3",
+    "BriefDescription": "LS3 L1 D cache load references counted at finish, gated by reject",
+    "PublicDescription": "LS3 L1 D cache load references counted at finish, gated by reject42"
+  },
+  {,
+    "EventCode": "0x509a",
+    "EventName": "PM_LINK_STACK_INVALID_PTR",
+    "BriefDescription": "A flush were LS ptr is invalid, results in a pop , A lot of interrupts between push and pops",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5098",
+    "EventName": "PM_LINK_STACK_WRONG_ADD_PRED",
+    "BriefDescription": "Link stack predicts wrong address, because of link stack design limitation",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe080",
+    "EventName": "PM_LS0_ERAT_MISS_PREF",
+    "BriefDescription": "LS0 Erat miss due to prefetch",
+    "PublicDescription": "LS0 Erat miss due to prefetch42"
+  },
+  {,
+    "EventCode": "0xd0b8",
+    "EventName": "PM_LS0_L1_PREF",
+    "BriefDescription": "LS0 L1 cache data prefetches",
+    "PublicDescription": "LS0 L1 cache data prefetches42"
+  },
+  {,
+    "EventCode": "0xc098",
+    "EventName": "PM_LS0_L1_SW_PREF",
+    "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches",
+    "PublicDescription": "Software L1 Prefetches, including SW Transient Prefetches42"
+  },
+  {,
+    "EventCode": "0xe082",
+    "EventName": "PM_LS1_ERAT_MISS_PREF",
+    "BriefDescription": "LS1 Erat miss due to prefetch",
+    "PublicDescription": "LS1 Erat miss due to prefetch42"
+  },
+  {,
+    "EventCode": "0xd0ba",
+    "EventName": "PM_LS1_L1_PREF",
+    "BriefDescription": "LS1 L1 cache data prefetches",
+    "PublicDescription": "LS1 L1 cache data prefetches42"
+  },
+  {,
+    "EventCode": "0xc09a",
+    "EventName": "PM_LS1_L1_SW_PREF",
+    "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches",
+    "PublicDescription": "Software L1 Prefetches, including SW Transient Prefetches42"
+  },
+  {,
+    "EventCode": "0xc0b0",
+    "EventName": "PM_LSU0_FLUSH_LRQ",
+    "BriefDescription": "LS0 Flush: LRQ",
+    "PublicDescription": "LS0 Flush: LRQLSU0 LRQ flushes"
+  },
+  {,
+    "EventCode": "0xc0b8",
+    "EventName": "PM_LSU0_FLUSH_SRQ",
+    "BriefDescription": "LS0 Flush: SRQ",
+    "PublicDescription": "LS0 Flush: SRQLSU0 SRQ lhs flushes"
+  },
+  {,
+    "EventCode": "0xc0a4",
+    "EventName": "PM_LSU0_FLUSH_ULD",
+    "BriefDescription": "LS0 Flush: Unaligned Load",
+    "PublicDescription": "LS0 Flush: Unaligned LoadLSU0 unaligned load flushes"
+  },
+  {,
+    "EventCode": "0xc0ac",
+    "EventName": "PM_LSU0_FLUSH_UST",
+    "BriefDescription": "LS0 Flush: Unaligned Store",
+    "PublicDescription": "LS0 Flush: Unaligned StoreLSU0 unaligned store flushes"
+  },
+  {,
+    "EventCode": "0xf088",
+    "EventName": "PM_LSU0_L1_CAM_CANCEL",
+    "BriefDescription": "ls0 l1 tm cam cancel",
+    "PublicDescription": "ls0 l1 tm cam cancel42"
+  },
+  {,
+    "EventCode": "0x1e056",
+    "EventName": "PM_LSU0_LARX_FIN",
+    "BriefDescription": "Larx finished in LSU pipe0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd08c",
+    "EventName": "PM_LSU0_LMQ_LHR_MERGE",
+    "BriefDescription": "LS0 Load Merged with another cacheline request",
+    "PublicDescription": "LS0 Load Merged with another cacheline request42"
+  },
+  {,
+    "EventCode": "0xc08c",
+    "EventName": "PM_LSU0_NCLD",
+    "BriefDescription": "LS0 Non-cachable Loads counted at finish",
+    "PublicDescription": "LS0 Non-cachable Loads counted at finishLSU0 non-cacheable loads"
+  },
+  {,
+    "EventCode": "0xe090",
+    "EventName": "PM_LSU0_PRIMARY_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit",
+    "PublicDescription": "Primary ERAT hit42"
+  },
+  {,
+    "EventCode": "0x1e05a",
+    "EventName": "PM_LSU0_REJECT",
+    "BriefDescription": "LSU0 reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc09c",
+    "EventName": "PM_LSU0_SRQ_STFWD",
+    "BriefDescription": "LS0 SRQ forwarded data to a load",
+    "PublicDescription": "LS0 SRQ forwarded data to a loadLSU0 SRQ store forwarded"
+  },
+  {,
+    "EventCode": "0xf084",
+    "EventName": "PM_LSU0_STORE_REJECT",
+    "BriefDescription": "ls0 store reject",
+    "PublicDescription": "ls0 store reject42"
+  },
+  {,
+    "EventCode": "0xe0a8",
+    "EventName": "PM_LSU0_TMA_REQ_L2",
+    "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+    "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+  },
+  {,
+    "EventCode": "0xe098",
+    "EventName": "PM_LSU0_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1",
+    "PublicDescription": "Load tm hit in L142"
+  },
+  {,
+    "EventCode": "0xe0a0",
+    "EventName": "PM_LSU0_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss",
+    "PublicDescription": "Load tm L1 miss42"
+  },
+  {,
+    "EventCode": "0xc0b2",
+    "EventName": "PM_LSU1_FLUSH_LRQ",
+    "BriefDescription": "LS1 Flush: LRQ",
+    "PublicDescription": "LS1 Flush: LRQLSU1 LRQ flushes"
+  },
+  {,
+    "EventCode": "0xc0ba",
+    "EventName": "PM_LSU1_FLUSH_SRQ",
+    "BriefDescription": "LS1 Flush: SRQ",
+    "PublicDescription": "LS1 Flush: SRQLSU1 SRQ lhs flushes"
+  },
+  {,
+    "EventCode": "0xc0a6",
+    "EventName": "PM_LSU1_FLUSH_ULD",
+    "BriefDescription": "LS 1 Flush: Unaligned Load",
+    "PublicDescription": "LS 1 Flush: Unaligned LoadLSU1 unaligned load flushes"
+  },
+  {,
+    "EventCode": "0xc0ae",
+    "EventName": "PM_LSU1_FLUSH_UST",
+    "BriefDescription": "LS1 Flush: Unaligned Store",
+    "PublicDescription": "LS1 Flush: Unaligned StoreLSU1 unaligned store flushes"
+  },
+  {,
+    "EventCode": "0xf08a",
+    "EventName": "PM_LSU1_L1_CAM_CANCEL",
+    "BriefDescription": "ls1 l1 tm cam cancel",
+    "PublicDescription": "ls1 l1 tm cam cancel42"
+  },
+  {,
+    "EventCode": "0x2e056",
+    "EventName": "PM_LSU1_LARX_FIN",
+    "BriefDescription": "Larx finished in LSU pipe1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd08e",
+    "EventName": "PM_LSU1_LMQ_LHR_MERGE",
+    "BriefDescription": "LS1 Load Merge with another cacheline request",
+    "PublicDescription": "LS1 Load Merge with another cacheline request42"
+  },
+  {,
+    "EventCode": "0xc08e",
+    "EventName": "PM_LSU1_NCLD",
+    "BriefDescription": "LS1 Non-cachable Loads counted at finish",
+    "PublicDescription": "LS1 Non-cachable Loads counted at finishLSU1 non-cacheable loads"
+  },
+  {,
+    "EventCode": "0xe092",
+    "EventName": "PM_LSU1_PRIMARY_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit",
+    "PublicDescription": "Primary ERAT hit42"
+  },
+  {,
+    "EventCode": "0x2e05a",
+    "EventName": "PM_LSU1_REJECT",
+    "BriefDescription": "LSU1 reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc09e",
+    "EventName": "PM_LSU1_SRQ_STFWD",
+    "BriefDescription": "LS1 SRQ forwarded data to a load",
+    "PublicDescription": "LS1 SRQ forwarded data to a loadLSU1 SRQ store forwarded"
+  },
+  {,
+    "EventCode": "0xf086",
+    "EventName": "PM_LSU1_STORE_REJECT",
+    "BriefDescription": "ls1 store reject",
+    "PublicDescription": "ls1 store reject42"
+  },
+  {,
+    "EventCode": "0xe0aa",
+    "EventName": "PM_LSU1_TMA_REQ_L2",
+    "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+    "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+  },
+  {,
+    "EventCode": "0xe09a",
+    "EventName": "PM_LSU1_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1",
+    "PublicDescription": "Load tm hit in L142"
+  },
+  {,
+    "EventCode": "0xe0a2",
+    "EventName": "PM_LSU1_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss",
+    "PublicDescription": "Load tm L1 miss42"
+  },
+  {,
+    "EventCode": "0xc0b4",
+    "EventName": "PM_LSU2_FLUSH_LRQ",
+    "BriefDescription": "LS02Flush: LRQ",
+    "PublicDescription": "LS02Flush: LRQ42"
+  },
+  {,
+    "EventCode": "0xc0bc",
+    "EventName": "PM_LSU2_FLUSH_SRQ",
+    "BriefDescription": "LS2 Flush: SRQ",
+    "PublicDescription": "LS2 Flush: SRQ42"
+  },
+  {,
+    "EventCode": "0xc0a8",
+    "EventName": "PM_LSU2_FLUSH_ULD",
+    "BriefDescription": "LS3 Flush: Unaligned Load",
+    "PublicDescription": "LS3 Flush: Unaligned Load42"
+  },
+  {,
+    "EventCode": "0xf08c",
+    "EventName": "PM_LSU2_L1_CAM_CANCEL",
+    "BriefDescription": "ls2 l1 tm cam cancel",
+    "PublicDescription": "ls2 l1 tm cam cancel42"
+  },
+  {,
+    "EventCode": "0x3e056",
+    "EventName": "PM_LSU2_LARX_FIN",
+    "BriefDescription": "Larx finished in LSU pipe2",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc084",
+    "EventName": "PM_LSU2_LDF",
+    "BriefDescription": "LS2 Scalar Loads",
+    "PublicDescription": "LS2 Scalar Loads42"
+  },
+  {,
+    "EventCode": "0xc088",
+    "EventName": "PM_LSU2_LDX",
+    "BriefDescription": "LS0 Vector Loads",
+    "PublicDescription": "LS0 Vector Loads42"
+  },
+  {,
+    "EventCode": "0xd090",
+    "EventName": "PM_LSU2_LMQ_LHR_MERGE",
+    "BriefDescription": "LS0 Load Merged with another cacheline request",
+    "PublicDescription": "LS0 Load Merged with another cacheline request42"
+  },
+  {,
+    "EventCode": "0xe094",
+    "EventName": "PM_LSU2_PRIMARY_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit",
+    "PublicDescription": "Primary ERAT hit42"
+  },
+  {,
+    "EventCode": "0x3e05a",
+    "EventName": "PM_LSU2_REJECT",
+    "BriefDescription": "LSU2 reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc0a0",
+    "EventName": "PM_LSU2_SRQ_STFWD",
+    "BriefDescription": "LS2 SRQ forwarded data to a load",
+    "PublicDescription": "LS2 SRQ forwarded data to a load42"
+  },
+  {,
+    "EventCode": "0xe0ac",
+    "EventName": "PM_LSU2_TMA_REQ_L2",
+    "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+    "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+  },
+  {,
+    "EventCode": "0xe09c",
+    "EventName": "PM_LSU2_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1",
+    "PublicDescription": "Load tm hit in L142"
+  },
+  {,
+    "EventCode": "0xe0a4",
+    "EventName": "PM_LSU2_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss",
+    "PublicDescription": "Load tm L1 miss42"
+  },
+  {,
+    "EventCode": "0xc0b6",
+    "EventName": "PM_LSU3_FLUSH_LRQ",
+    "BriefDescription": "LS3 Flush: LRQ",
+    "PublicDescription": "LS3 Flush: LRQ42"
+  },
+  {,
+    "EventCode": "0xc0be",
+    "EventName": "PM_LSU3_FLUSH_SRQ",
+    "BriefDescription": "LS13 Flush: SRQ",
+    "PublicDescription": "LS13 Flush: SRQ42"
+  },
+  {,
+    "EventCode": "0xc0aa",
+    "EventName": "PM_LSU3_FLUSH_ULD",
+    "BriefDescription": "LS 14Flush: Unaligned Load",
+    "PublicDescription": "LS 14Flush: Unaligned Load42"
+  },
+  {,
+    "EventCode": "0xf08e",
+    "EventName": "PM_LSU3_L1_CAM_CANCEL",
+    "BriefDescription": "ls3 l1 tm cam cancel",
+    "PublicDescription": "ls3 l1 tm cam cancel42"
+  },
+  {,
+    "EventCode": "0x4e056",
+    "EventName": "PM_LSU3_LARX_FIN",
+    "BriefDescription": "Larx finished in LSU pipe3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc086",
+    "EventName": "PM_LSU3_LDF",
+    "BriefDescription": "LS3 Scalar Loads",
+    "PublicDescription": "LS3 Scalar Loads 42"
+  },
+  {,
+    "EventCode": "0xc08a",
+    "EventName": "PM_LSU3_LDX",
+    "BriefDescription": "LS1 Vector Loads",
+    "PublicDescription": "LS1 Vector Loads42"
+  },
+  {,
+    "EventCode": "0xd092",
+    "EventName": "PM_LSU3_LMQ_LHR_MERGE",
+    "BriefDescription": "LS1 Load Merge with another cacheline request",
+    "PublicDescription": "LS1 Load Merge with another cacheline request42"
+  },
+  {,
+    "EventCode": "0xe096",
+    "EventName": "PM_LSU3_PRIMARY_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit",
+    "PublicDescription": "Primary ERAT hit42"
+  },
+  {,
+    "EventCode": "0x4e05a",
+    "EventName": "PM_LSU3_REJECT",
+    "BriefDescription": "LSU3 reject",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc0a2",
+    "EventName": "PM_LSU3_SRQ_STFWD",
+    "BriefDescription": "LS3 SRQ forwarded data to a load",
+    "PublicDescription": "LS3 SRQ forwarded data to a load42"
+  },
+  {,
+    "EventCode": "0xe0ae",
+    "EventName": "PM_LSU3_TMA_REQ_L2",
+    "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+    "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+  },
+  {,
+    "EventCode": "0xe09e",
+    "EventName": "PM_LSU3_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1",
+    "PublicDescription": "Load tm hit in L142"
+  },
+  {,
+    "EventCode": "0xe0a6",
+    "EventName": "PM_LSU3_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss",
+    "PublicDescription": "Load tm L1 miss42"
+  },
+  {,
+    "EventCode": "0xe880",
+    "EventName": "PM_LSU_ERAT_MISS_PREF",
+    "BriefDescription": "Erat miss due to prefetch, on either pipe",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xc8ac",
+    "EventName": "PM_LSU_FLUSH_UST",
+    "BriefDescription": "Unaligned Store Flush on either pipe",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xd0a4",
+    "EventName": "PM_LSU_FOUR_TABLEWALK_CYC",
+    "BriefDescription": "Cycles when four tablewalks pending on this thread",
+    "PublicDescription": "Cycles when four tablewalks pending on this thread42"
+  },
+  {,
+    "EventCode": "0x10066",
+    "EventName": "PM_LSU_FX_FIN",
+    "BriefDescription": "LSU Finished a FX operation (up to 2 per cycle",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd8b8",
+    "EventName": "PM_LSU_L1_PREF",
+    "BriefDescription": "hw initiated , include sw streaming forms as well , include sw streams as a separate event",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xc898",
+    "EventName": "PM_LSU_L1_SW_PREF",
+    "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches, on both pipes",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xc884",
+    "EventName": "PM_LSU_LDF",
+    "BriefDescription": "FPU loads only on LS2/LS3 ie LU0/LU1",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xc888",
+    "EventName": "PM_LSU_LDX",
+    "BriefDescription": "Vector loads can issue only on LS2/LS3",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xd0a2",
+    "EventName": "PM_LSU_LMQ_FULL_CYC",
+    "BriefDescription": "LMQ full",
+    "PublicDescription": "LMQ fullCycles LMQ full"
+  },
+  {,
+    "EventCode": "0xd0a1",
+    "EventName": "PM_LSU_LMQ_S0_ALLOC",
+    "BriefDescription": "Per thread - use edge detect to count allocates On a per thread basis, level signal indicating Slot 0 is valid. By instrumenting a single slot we can calculate service time for that slot. Previous machines required a separate signal indicating the slot was allocated. Because any signal can be routed to any counter in P8, we can count level in one PMC and edge detect in another PMC using the same signal",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0xd0a0",
+    "EventName": "PM_LSU_LMQ_S0_VALID",
+    "BriefDescription": "Slot 0 of LMQ valid",
+    "PublicDescription": "Slot 0 of LMQ validLMQ slot 0 valid"
+  },
+  {,
+    "EventCode": "0x3001c",
+    "EventName": "PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC",
+    "BriefDescription": "ALL threads lsu empty (lmq and srq empty)",
+    "PublicDescription": "ALL threads lsu empty (lmq and srq empty). Issue HW016541"
+  },
+  {,
+    "EventCode": "0xd09f",
+    "EventName": "PM_LSU_LRQ_S0_ALLOC",
+    "BriefDescription": "Per thread - use edge detect to count allocates On a per thread basis, level signal indicating Slot 0 is valid. By instrumenting a single slot we can calculate service time for that slot. Previous machines required a separate signal indicating the slot was allocated. Because any signal can be routed to any counter in P8, we can count level in one PMC and edge detect in another PMC using the same signal",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0xd09e",
+    "EventName": "PM_LSU_LRQ_S0_VALID",
+    "BriefDescription": "Slot 0 of LRQ valid",
+    "PublicDescription": "Slot 0 of LRQ validLRQ slot 0 valid"
+  },
+  {,
+    "EventCode": "0xf091",
+    "EventName": "PM_LSU_LRQ_S43_ALLOC",
+    "BriefDescription": "LRQ slot 43 was released",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0xf090",
+    "EventName": "PM_LSU_LRQ_S43_VALID",
+    "BriefDescription": "LRQ slot 43 was busy",
+    "PublicDescription": "LRQ slot 43 was busy42"
+  },
+  {,
+    "EventCode": "0x30162",
+    "EventName": "PM_LSU_MRK_DERAT_MISS",
+    "BriefDescription": "DERAT Reloaded (Miss)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc88c",
+    "EventName": "PM_LSU_NCLD",
+    "BriefDescription": "count at finish so can return only on ls0 or ls1",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xc092",
+    "EventName": "PM_LSU_NCST",
+    "BriefDescription": "Non-cachable Stores sent to nest",
+    "PublicDescription": "Non-cachable Stores sent to nest42"
+  },
+  {,
+    "EventCode": "0x10064",
+    "EventName": "PM_LSU_REJECT",
+    "BriefDescription": "LSU Reject (up to 4 per cycle)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd082",
+    "EventName": "PM_LSU_SET_MPRED",
+    "BriefDescription": "Line already in cache at reload time",
+    "PublicDescription": "Line already in cache at reload time42"
+  },
+  {,
+    "EventCode": "0x40008",
+    "EventName": "PM_LSU_SRQ_EMPTY_CYC",
+    "BriefDescription": "ALL threads srq empty",
+    "PublicDescription": "All threads srq empty"
+  },
+  {,
+    "EventCode": "0xd09d",
+    "EventName": "PM_LSU_SRQ_S0_ALLOC",
+    "BriefDescription": "Per thread - use edge detect to count allocates On a per thread basis, level signal indicating Slot 0 is valid. By instrumenting a single slot we can calculate service time for that slot. Previous machines required a separate signal indicating the slot was allocated. Because any signal can be routed to any counter in P8, we can count level in one PMC and edge detect in another PMC using the same signal",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0xd09c",
+    "EventName": "PM_LSU_SRQ_S0_VALID",
+    "BriefDescription": "Slot 0 of SRQ valid",
+    "PublicDescription": "Slot 0 of SRQ validSRQ slot 0 valid"
+  },
+  {,
+    "EventCode": "0xf093",
+    "EventName": "PM_LSU_SRQ_S39_ALLOC",
+    "BriefDescription": "SRQ slot 39 was released",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0xf092",
+    "EventName": "PM_LSU_SRQ_S39_VALID",
+    "BriefDescription": "SRQ slot 39 was busy",
+    "PublicDescription": "SRQ slot 39 was busy42"
+  },
+  {,
+    "EventCode": "0xd09b",
+    "EventName": "PM_LSU_SRQ_SYNC",
+    "BriefDescription": "A sync in the SRQ ended",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0xd09a",
+    "EventName": "PM_LSU_SRQ_SYNC_CYC",
+    "BriefDescription": "A sync is in the SRQ (edge detect to count)",
+    "PublicDescription": "A sync is in the SRQ (edge detect to count)SRQ sync duration"
+  },
+  {,
+    "EventCode": "0xf084",
+    "EventName": "PM_LSU_STORE_REJECT",
+    "BriefDescription": "Store reject on either pipe",
+    "PublicDescription": "LSU"
+  },
+  {,
+    "EventCode": "0xd0a6",
+    "EventName": "PM_LSU_TWO_TABLEWALK_CYC",
+    "BriefDescription": "Cycles when two tablewalks pending on this thread",
+    "PublicDescription": "Cycles when two tablewalks pending on this thread42"
+  },
+  {,
+    "EventCode": "0x5094",
+    "EventName": "PM_LWSYNC",
+    "BriefDescription": "threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x209a",
+    "EventName": "PM_LWSYNC_HELD",
+    "BriefDescription": "LWSYNC held at dispatch",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3013a",
+    "EventName": "PM_MRK_CRU_FIN",
+    "BriefDescription": "IFU non-branch finished",
+    "PublicDescription": "IFU non-branch marked instruction finished"
+  },
+  {,
+    "EventCode": "0x4d146",
+    "EventName": "PM_MRK_DATA_FROM_L21_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d126",
+    "EventName": "PM_MRK_DATA_FROM_L21_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d146",
+    "EventName": "PM_MRK_DATA_FROM_L21_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c126",
+    "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d144",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d124",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d144",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c124",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d144",
+    "EventName": "PM_MRK_DATA_FROM_L31_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d124",
+    "EventName": "PM_MRK_DATA_FROM_L31_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d146",
+    "EventName": "PM_MRK_DATA_FROM_L31_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c126",
+    "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x201e0",
+    "EventName": "PM_MRK_DATA_FROM_MEM",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f146",
+    "EventName": "PM_MRK_DPTEG_FROM_L21_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f146",
+    "EventName": "PM_MRK_DPTEG_FROM_L21_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f140",
+    "EventName": "PM_MRK_DPTEG_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f140",
+    "EventName": "PM_MRK_DPTEG_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f144",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3f144",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2f144",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1f146",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30156",
+    "EventName": "PM_MRK_FAB_RSP_MATCH",
+    "BriefDescription": "ttype and cresp matched as specified in MMCR1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4f152",
+    "EventName": "PM_MRK_FAB_RSP_MATCH_CYC",
+    "BriefDescription": "cresp/ttype match cycles",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2013c",
+    "EventName": "PM_MRK_FILT_MATCH",
+    "BriefDescription": "Marked filter Match",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1013c",
+    "EventName": "PM_MRK_FIN_STALL_CYC",
+    "BriefDescription": "Marked instruction Finish Stall cycles (marked finish after NTC) (use edge detect to count )",
+    "PublicDescription": "Marked instruction Finish Stall cycles (marked finish after NTC) (use edge detect to count #)"
+  },
+  {,
+    "EventCode": "0x40130",
+    "EventName": "PM_MRK_GRP_CMPL",
+    "BriefDescription": "marked instruction finished (completed)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4013a",
+    "EventName": "PM_MRK_GRP_IC_MISS",
+    "BriefDescription": "Marked Group experienced I cache miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3013c",
+    "EventName": "PM_MRK_GRP_NTC",
+    "BriefDescription": "Marked group ntc cycles",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1013f",
+    "EventName": "PM_MRK_LD_MISS_EXPOSED",
+    "BriefDescription": "Marked Load exposed Miss (exposed period ended)",
+    "PublicDescription": "Marked Load exposed Miss (use edge detect to count #)"
+  },
+  {,
+    "EventCode": "0xd180",
+    "EventName": "PM_MRK_LSU_FLUSH",
+    "BriefDescription": "Flush: (marked) : All Cases",
+    "PublicDescription": "Flush: (marked) : All Cases42"
+  },
+  {,
+    "EventCode": "0xd188",
+    "EventName": "PM_MRK_LSU_FLUSH_LRQ",
+    "BriefDescription": "Flush: (marked) LRQ",
+    "PublicDescription": "Flush: (marked) LRQMarked LRQ flushes"
+  },
+  {,
+    "EventCode": "0xd18a",
+    "EventName": "PM_MRK_LSU_FLUSH_SRQ",
+    "BriefDescription": "Flush: (marked) SRQ",
+    "PublicDescription": "Flush: (marked) SRQMarked SRQ lhs flushes"
+  },
+  {,
+    "EventCode": "0xd184",
+    "EventName": "PM_MRK_LSU_FLUSH_ULD",
+    "BriefDescription": "Flush: (marked) Unaligned Load",
+    "PublicDescription": "Flush: (marked) Unaligned LoadMarked unaligned load flushes"
+  },
+  {,
+    "EventCode": "0xd186",
+    "EventName": "PM_MRK_LSU_FLUSH_UST",
+    "BriefDescription": "Flush: (marked) Unaligned Store",
+    "PublicDescription": "Flush: (marked) Unaligned StoreMarked unaligned store flushes"
+  },
+  {,
+    "EventCode": "0x40164",
+    "EventName": "PM_MRK_LSU_REJECT",
+    "BriefDescription": "LSU marked reject (up to 2 per cycle)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30164",
+    "EventName": "PM_MRK_LSU_REJECT_ERAT_MISS",
+    "BriefDescription": "LSU marked reject due to ERAT (up to 2 per cycle)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d15a",
+    "EventName": "PM_MRK_SRC_PREF_TRACK_EFF",
+    "BriefDescription": "Marked src pref track was effective",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d15a",
+    "EventName": "PM_MRK_SRC_PREF_TRACK_INEFF",
+    "BriefDescription": "Prefetch tracked was ineffective for marked src",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d15c",
+    "EventName": "PM_MRK_SRC_PREF_TRACK_MOD",
+    "BriefDescription": "Prefetch tracked was moderate for marked src",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1d15c",
+    "EventName": "PM_MRK_SRC_PREF_TRACK_MOD_L2",
+    "BriefDescription": "Marked src Prefetch Tracked was moderate (source L2)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3d15c",
+    "EventName": "PM_MRK_SRC_PREF_TRACK_MOD_L3",
+    "BriefDescription": "Prefetch tracked was moderate (L3 hit) for marked src",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1c15a",
+    "EventName": "PM_MRK_TGT_PREF_TRACK_EFF",
+    "BriefDescription": "Marked target pref track was effective",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3c15a",
+    "EventName": "PM_MRK_TGT_PREF_TRACK_INEFF",
+    "BriefDescription": "Prefetch tracked was ineffective for marked target",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c15c",
+    "EventName": "PM_MRK_TGT_PREF_TRACK_MOD",
+    "BriefDescription": "Prefetch tracked was moderate for marked target",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1c15c",
+    "EventName": "PM_MRK_TGT_PREF_TRACK_MOD_L2",
+    "BriefDescription": "Marked target Prefetch Tracked was moderate (source L2)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3c15c",
+    "EventName": "PM_MRK_TGT_PREF_TRACK_MOD_L3",
+    "BriefDescription": "Prefetch tracked was moderate (L3 hit) for marked target",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20b0",
+    "EventName": "PM_NESTED_TEND",
+    "BriefDescription": "Completion time nested tend",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20b6",
+    "EventName": "PM_NON_FAV_TBEGIN",
+    "BriefDescription": "Dispatch time non favored tbegin",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x328084",
+    "EventName": "PM_NON_TM_RST_SC",
+    "BriefDescription": "non tm snp rst tm sc",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2001a",
+    "EventName": "PM_NTCG_ALL_FIN",
+    "BriefDescription": "Cycles after all instructions have finished to group completed",
+    "PublicDescription": "Ccycles after all instructions have finished to group completed"
+  },
+  {,
+    "EventCode": "0x20ac",
+    "EventName": "PM_OUTER_TBEGIN",
+    "BriefDescription": "Completion time outer tbegin",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20ae",
+    "EventName": "PM_OUTER_TEND",
+    "BriefDescription": "Completion time outer tend",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2005a",
+    "EventName": "PM_PREF_TRACKED",
+    "BriefDescription": "Total number of Prefetch Operations that were tracked",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1005a",
+    "EventName": "PM_PREF_TRACK_EFF",
+    "BriefDescription": "Prefetch Tracked was effective",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3005a",
+    "EventName": "PM_PREF_TRACK_INEFF",
+    "BriefDescription": "Prefetch tracked was ineffective",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4005a",
+    "EventName": "PM_PREF_TRACK_MOD",
+    "BriefDescription": "Prefetch tracked was moderate",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1005c",
+    "EventName": "PM_PREF_TRACK_MOD_L2",
+    "BriefDescription": "Prefetch Tracked was moderate (source L2)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3005c",
+    "EventName": "PM_PREF_TRACK_MOD_L3",
+    "BriefDescription": "Prefetch tracked was moderate (L3)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe084",
+    "EventName": "PM_PTE_PREFETCH",
+    "BriefDescription": "PTE prefetches",
+    "PublicDescription": "PTE prefetches42"
+  },
+  {,
+    "EventCode": "0x16081",
+    "EventName": "PM_RC0_ALLOC",
+    "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x16080",
+    "EventName": "PM_RC0_BUSY",
+    "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200301ea",
+    "EventName": "PM_RC_LIFETIME_EXC_1024",
+    "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 1024 cycles",
+    "PublicDescription": "Reload latency exceeded 1024 cyc"
+  },
+  {,
+    "EventCode": "0x200401ec",
+    "EventName": "PM_RC_LIFETIME_EXC_2048",
+    "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 2048 cycles",
+    "PublicDescription": "Threshold counter exceeded a value of 2048"
+  },
+  {,
+    "EventCode": "0x200101e8",
+    "EventName": "PM_RC_LIFETIME_EXC_256",
+    "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 256 cycles",
+    "PublicDescription": "Threshold counter exceed a count of 256"
+  },
+  {,
+    "EventCode": "0x200201e6",
+    "EventName": "PM_RC_LIFETIME_EXC_32",
+    "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 32 cycles",
+    "PublicDescription": "Reload latency exceeded 32 cyc"
+  },
+  {,
+    "EventCode": "0x36088",
+    "EventName": "PM_RC_USAGE",
+    "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x34808e",
+    "EventName": "PM_RD_CLEARING_SC",
+    "BriefDescription": "rd clearing sc",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x34808c",
+    "EventName": "PM_RD_FORMING_SC",
+    "BriefDescription": "rd forming sc",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x428086",
+    "EventName": "PM_RD_HIT_PF",
+    "BriefDescription": "rd machine hit l3 pf machine",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20004",
+    "EventName": "PM_REAL_SRQ_FULL",
+    "BriefDescription": "Out of real srq entries",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2006a",
+    "EventName": "PM_RUN_CYC_SMT2_SHRD_MODE",
+    "BriefDescription": "cycles this threads run latch is set and the core is in SMT2 shared mode",
+    "PublicDescription": "Cycles run latch is set and core is in SMT2-shared mode"
+  },
+  {,
+    "EventCode": "0x1006a",
+    "EventName": "PM_RUN_CYC_SMT2_SPLIT_MODE",
+    "BriefDescription": "Cycles run latch is set and core is in SMT2-split mode",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4006c",
+    "EventName": "PM_RUN_CYC_SMT8_MODE",
+    "BriefDescription": "Cycles run latch is set and core is in SMT8 mode",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xf082",
+    "EventName": "PM_SEC_ERAT_HIT",
+    "BriefDescription": "secondary ERAT Hit",
+    "PublicDescription": "secondary ERAT Hit42"
+  },
+  {,
+    "EventCode": "0x508c",
+    "EventName": "PM_SHL_CREATED",
+    "BriefDescription": "Store-Hit-Load Table Entry Created",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x508e",
+    "EventName": "PM_SHL_ST_CONVERT",
+    "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5090",
+    "EventName": "PM_SHL_ST_DISABLE",
+    "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x26085",
+    "EventName": "PM_SN0_ALLOC",
+    "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+    "PublicDescription": "0.0"
+  },
+  {,
+    "EventCode": "0x26084",
+    "EventName": "PM_SN0_BUSY",
+    "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xd0b2",
+    "EventName": "PM_SNOOP_TLBIE",
+    "BriefDescription": "TLBIE snoop",
+    "PublicDescription": "TLBIE snoopSnoop TLBIE"
+  },
+  {,
+    "EventCode": "0x338088",
+    "EventName": "PM_SNP_TM_HIT_M",
+    "BriefDescription": "snp tm st hit m mu",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x33808a",
+    "EventName": "PM_SNP_TM_HIT_T",
+    "BriefDescription": "snp tm_st_hit t tn te",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4608c",
+    "EventName": "PM_SN_USAGE",
+    "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10028",
+    "EventName": "PM_STALL_END_GCT_EMPTY",
+    "BriefDescription": "Count ended because GCT went empty",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xc090",
+    "EventName": "PM_STCX_LSU",
+    "BriefDescription": "STCX executed reported at sent to nest",
+    "PublicDescription": "STCX executed reported at sent to nest42"
+  },
+  {,
+    "EventCode": "0x717080",
+    "EventName": "PM_ST_CAUSED_FAIL",
+    "BriefDescription": "Non TM St caused any thread to fail",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3090",
+    "EventName": "PM_SWAP_CANCEL",
+    "BriefDescription": "SWAP cancel , rtag not available",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3092",
+    "EventName": "PM_SWAP_CANCEL_GPR",
+    "BriefDescription": "SWAP cancel , rtag not available for gpr",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x308c",
+    "EventName": "PM_SWAP_COMPLETE",
+    "BriefDescription": "swap cast in completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x308e",
+    "EventName": "PM_SWAP_COMPLETE_GPR",
+    "BriefDescription": "swap cast in completed fpr gpr",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe086",
+    "EventName": "PM_TABLEWALK_CYC_PREF",
+    "BriefDescription": "tablewalk qualified for pte prefetches",
+    "PublicDescription": "tablewalk qualified for pte prefetches42"
+  },
+  {,
+    "EventCode": "0x20b2",
+    "EventName": "PM_TABORT_TRECLAIM",
+    "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe0ba",
+    "EventName": "PM_TEND_PEND_CYC",
+    "BriefDescription": "TEND latency per thread",
+    "PublicDescription": "TEND latency per thread42"
+  },
+  {,
+    "EventCode": "0x10012",
+    "EventName": "PM_THRD_GRP_CMPL_BOTH_CYC",
+    "BriefDescription": "Cycles group completed on both completion slots by any thread",
+    "PublicDescription": "Two threads finished same cycle (gated by run latch)"
+  },
+  {,
+    "EventCode": "0x40bc",
+    "EventName": "PM_THRD_PRIO_0_1_CYC",
+    "BriefDescription": "Cycles thread running at priority level 0 or 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x40be",
+    "EventName": "PM_THRD_PRIO_2_3_CYC",
+    "BriefDescription": "Cycles thread running at priority level 2 or 3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5080",
+    "EventName": "PM_THRD_PRIO_4_5_CYC",
+    "BriefDescription": "Cycles thread running at priority level 4 or 5",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x5082",
+    "EventName": "PM_THRD_PRIO_6_7_CYC",
+    "BriefDescription": "Cycles thread running at priority level 6 or 7",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3098",
+    "EventName": "PM_THRD_REBAL_CYC",
+    "BriefDescription": "cycles rebalance was active",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20b8",
+    "EventName": "PM_TM_BEGIN_ALL",
+    "BriefDescription": "Tm any tbegin",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x318082",
+    "EventName": "PM_TM_CAM_OVERFLOW",
+    "BriefDescription": "l3 tm cam overflow during L2 co of SC",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x74708c",
+    "EventName": "PM_TM_CAP_OVERFLOW",
+    "BriefDescription": "TM Footprint Capactiy Overflow",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20ba",
+    "EventName": "PM_TM_END_ALL",
+    "BriefDescription": "Tm any tend",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3086",
+    "EventName": "PM_TM_FAIL_CONF_NON_TM",
+    "BriefDescription": "TEXAS fail reason @ completion",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3088",
+    "EventName": "PM_TM_FAIL_CON_TM",
+    "BriefDescription": "TEXAS fail reason @ completion",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe0b2",
+    "EventName": "PM_TM_FAIL_DISALLOW",
+    "BriefDescription": "TM fail disallow",
+    "PublicDescription": "TM fail disallow42"
+  },
+  {,
+    "EventCode": "0x3084",
+    "EventName": "PM_TM_FAIL_FOOTPRINT_OVERFLOW",
+    "BriefDescription": "TEXAS fail reason @ completion",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe0b8",
+    "EventName": "PM_TM_FAIL_NON_TX_CONFLICT",
+    "BriefDescription": "Non transactional conflict from LSU whtver gets repoted to texas",
+    "PublicDescription": "Non transactional conflict from LSU whtver gets repoted to texas42"
+  },
+  {,
+    "EventCode": "0x308a",
+    "EventName": "PM_TM_FAIL_SELF",
+    "BriefDescription": "TEXAS fail reason @ completion",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe0b4",
+    "EventName": "PM_TM_FAIL_TLBIE",
+    "BriefDescription": "TLBIE hit bloom filter",
+    "PublicDescription": "TLBIE hit bloom filter42"
+  },
+  {,
+    "EventCode": "0xe0b6",
+    "EventName": "PM_TM_FAIL_TX_CONFLICT",
+    "BriefDescription": "Transactional conflict from LSU, whatever gets reported to texas",
+    "PublicDescription": "Transactional conflict from LSU, whatever gets reported to texas 42"
+  },
+  {,
+    "EventCode": "0x727086",
+    "EventName": "PM_TM_FAV_CAUSED_FAIL",
+    "BriefDescription": "TM Load (fav) caused another thread to fail",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x717082",
+    "EventName": "PM_TM_LD_CAUSED_FAIL",
+    "BriefDescription": "Non TM Ld caused any thread to fail",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x727084",
+    "EventName": "PM_TM_LD_CONF",
+    "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x328086",
+    "EventName": "PM_TM_RST_SC",
+    "BriefDescription": "tm snp rst tm sc",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x318080",
+    "EventName": "PM_TM_SC_CO",
+    "BriefDescription": "l3 castout tm Sc line",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x73708a",
+    "EventName": "PM_TM_ST_CAUSED_FAIL",
+    "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x737088",
+    "EventName": "PM_TM_ST_CONF",
+    "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20bc",
+    "EventName": "PM_TM_TBEGIN",
+    "BriefDescription": "Tm nested tbegin",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3080",
+    "EventName": "PM_TM_TRESUME",
+    "BriefDescription": "Tm resume",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20be",
+    "EventName": "PM_TM_TSUSPEND",
+    "BriefDescription": "Tm suspend",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xe08c",
+    "EventName": "PM_UP_PREF_L3",
+    "BriefDescription": "Micropartition prefetch",
+    "PublicDescription": "Micropartition prefetch42"
+  },
+  {,
+    "EventCode": "0xe08e",
+    "EventName": "PM_UP_PREF_POINTER",
+    "BriefDescription": "Micrpartition pointer prefetches",
+    "PublicDescription": "Micrpartition pointer prefetches42"
+  },
+  {,
+    "EventCode": "0xa0a4",
+    "EventName": "PM_VSU0_16FLOP",
+    "BriefDescription": "Sixteen flops operation (SP vector versions of fdiv,fsqrt)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa080",
+    "EventName": "PM_VSU0_1FLOP",
+    "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished",
+    "PublicDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finishedDecode into 1,2,4 FLOP according to instr IOP, multiplied by #vector elements according to route( eg x1, x2, x4) Only if instr sends finish to ISU"
+  },
+  {,
+    "EventCode": "0xa098",
+    "EventName": "PM_VSU0_2FLOP",
+    "BriefDescription": "two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa09c",
+    "EventName": "PM_VSU0_4FLOP",
+    "BriefDescription": "four flops operation (scalar fdiv, fsqrt, DP vector version of fmadd, fnmadd, fmsub, fnmsub, SP vector versions of single flop instructions)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0a0",
+    "EventName": "PM_VSU0_8FLOP",
+    "BriefDescription": "eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0a4",
+    "EventName": "PM_VSU0_COMPLEX_ISSUED",
+    "BriefDescription": "Complex VMX instruction issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0b4",
+    "EventName": "PM_VSU0_CY_ISSUED",
+    "BriefDescription": "Cryptographic instruction RFC02196 Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0a8",
+    "EventName": "PM_VSU0_DD_ISSUED",
+    "BriefDescription": "64BIT Decimal Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa08c",
+    "EventName": "PM_VSU0_DP_2FLOP",
+    "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa090",
+    "EventName": "PM_VSU0_DP_FMA",
+    "BriefDescription": "DP vector version of fmadd,fnmadd,fmsub,fnmsub",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa094",
+    "EventName": "PM_VSU0_DP_FSQRT_FDIV",
+    "BriefDescription": "DP vector versions of fdiv,fsqrt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0ac",
+    "EventName": "PM_VSU0_DQ_ISSUED",
+    "BriefDescription": "128BIT Decimal Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0b0",
+    "EventName": "PM_VSU0_EX_ISSUED",
+    "BriefDescription": "Direct move 32/64b VRFtoGPR RFC02206 Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0bc",
+    "EventName": "PM_VSU0_FIN",
+    "BriefDescription": "VSU0 Finished an instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa084",
+    "EventName": "PM_VSU0_FMA",
+    "BriefDescription": "two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only!",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb098",
+    "EventName": "PM_VSU0_FPSCR",
+    "BriefDescription": "Move to/from FPSCR type instruction issued on Pipe 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa088",
+    "EventName": "PM_VSU0_FSQRT_FDIV",
+    "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only!",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb090",
+    "EventName": "PM_VSU0_PERMUTE_ISSUED",
+    "BriefDescription": "Permute VMX Instruction Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb088",
+    "EventName": "PM_VSU0_SCALAR_DP_ISSUED",
+    "BriefDescription": "Double Precision scalar instruction issued on Pipe0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb094",
+    "EventName": "PM_VSU0_SIMPLE_ISSUED",
+    "BriefDescription": "Simple VMX instruction issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0a8",
+    "EventName": "PM_VSU0_SINGLE",
+    "BriefDescription": "FPU single precision",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb09c",
+    "EventName": "PM_VSU0_SQ",
+    "BriefDescription": "Store Vector Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb08c",
+    "EventName": "PM_VSU0_STF",
+    "BriefDescription": "FPU store (SP or DP) issued on Pipe0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb080",
+    "EventName": "PM_VSU0_VECTOR_DP_ISSUED",
+    "BriefDescription": "Double Precision vector instruction issued on Pipe0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb084",
+    "EventName": "PM_VSU0_VECTOR_SP_ISSUED",
+    "BriefDescription": "Single Precision vector instruction issued (executed)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0a6",
+    "EventName": "PM_VSU1_16FLOP",
+    "BriefDescription": "Sixteen flops operation (SP vector versions of fdiv,fsqrt)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa082",
+    "EventName": "PM_VSU1_1FLOP",
+    "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa09a",
+    "EventName": "PM_VSU1_2FLOP",
+    "BriefDescription": "two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa09e",
+    "EventName": "PM_VSU1_4FLOP",
+    "BriefDescription": "four flops operation (scalar fdiv, fsqrt, DP vector version of fmadd, fnmadd, fmsub, fnmsub, SP vector versions of single flop instructions)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0a2",
+    "EventName": "PM_VSU1_8FLOP",
+    "BriefDescription": "eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0a6",
+    "EventName": "PM_VSU1_COMPLEX_ISSUED",
+    "BriefDescription": "Complex VMX instruction issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0b6",
+    "EventName": "PM_VSU1_CY_ISSUED",
+    "BriefDescription": "Cryptographic instruction RFC02196 Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0aa",
+    "EventName": "PM_VSU1_DD_ISSUED",
+    "BriefDescription": "64BIT Decimal Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa08e",
+    "EventName": "PM_VSU1_DP_2FLOP",
+    "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa092",
+    "EventName": "PM_VSU1_DP_FMA",
+    "BriefDescription": "DP vector version of fmadd,fnmadd,fmsub,fnmsub",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa096",
+    "EventName": "PM_VSU1_DP_FSQRT_FDIV",
+    "BriefDescription": "DP vector versions of fdiv,fsqrt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0ae",
+    "EventName": "PM_VSU1_DQ_ISSUED",
+    "BriefDescription": "128BIT Decimal Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb0b2",
+    "EventName": "PM_VSU1_EX_ISSUED",
+    "BriefDescription": "Direct move 32/64b VRFtoGPR RFC02206 Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0be",
+    "EventName": "PM_VSU1_FIN",
+    "BriefDescription": "VSU1 Finished an instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa086",
+    "EventName": "PM_VSU1_FMA",
+    "BriefDescription": "two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only!",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb09a",
+    "EventName": "PM_VSU1_FPSCR",
+    "BriefDescription": "Move to/from FPSCR type instruction issued on Pipe 0",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa08a",
+    "EventName": "PM_VSU1_FSQRT_FDIV",
+    "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only!",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb092",
+    "EventName": "PM_VSU1_PERMUTE_ISSUED",
+    "BriefDescription": "Permute VMX Instruction Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb08a",
+    "EventName": "PM_VSU1_SCALAR_DP_ISSUED",
+    "BriefDescription": "Double Precision scalar instruction issued on Pipe1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb096",
+    "EventName": "PM_VSU1_SIMPLE_ISSUED",
+    "BriefDescription": "Simple VMX instruction issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xa0aa",
+    "EventName": "PM_VSU1_SINGLE",
+    "BriefDescription": "FPU single precision",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb09e",
+    "EventName": "PM_VSU1_SQ",
+    "BriefDescription": "Store Vector Issued",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb08e",
+    "EventName": "PM_VSU1_STF",
+    "BriefDescription": "FPU store (SP or DP) issued on Pipe1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb082",
+    "EventName": "PM_VSU1_VECTOR_DP_ISSUED",
+    "BriefDescription": "Double Precision vector instruction issued on Pipe1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0xb086",
+    "EventName": "PM_VSU1_VECTOR_SP_ISSUED",
+    "BriefDescription": "Single Precision vector instruction issued (executed)",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/pipeline.json b/pmu-events/arch/powerpc/power8/pipeline.json
new file mode 100644
index 0000000..293f3a4
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/pipeline.json
@@ -0,0 +1,350 @@
+[
+  {,
+    "EventCode": "0x100f2",
+    "EventName": "PM_1PLUS_PPC_CMPL",
+    "BriefDescription": "1 or more ppc insts finished",
+    "PublicDescription": "1 or more ppc insts finished (completed)"
+  },
+  {,
+    "EventCode": "0x400f2",
+    "EventName": "PM_1PLUS_PPC_DISP",
+    "BriefDescription": "Cycles at least one Instr Dispatched",
+    "PublicDescription": "Cycles at least one Instr Dispatched. Could be a group with only microcode. Issue HW016521"
+  },
+  {,
+    "EventCode": "0x100fa",
+    "EventName": "PM_ANY_THRD_RUN_CYC",
+    "BriefDescription": "One of threads in run_cycles",
+    "PublicDescription": "Any thread in run_cycles (was one thread in run_cycles)"
+  },
+  {,
+    "EventCode": "0x4000a",
+    "EventName": "PM_CMPLU_STALL",
+    "BriefDescription": "Completion stall",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d018",
+    "EventName": "PM_CMPLU_STALL_BRU",
+    "BriefDescription": "Completion stall due to a Branch Unit",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c012",
+    "EventName": "PM_CMPLU_STALL_DCACHE_MISS",
+    "BriefDescription": "Completion stall by Dcache miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c018",
+    "EventName": "PM_CMPLU_STALL_DMISS_L21_L31",
+    "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c016",
+    "EventName": "PM_CMPLU_STALL_DMISS_L2L3",
+    "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c016",
+    "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT",
+    "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict",
+    "PublicDescription": "Completion stall due to cache miss resolving in core's L2/L3 with a conflict"
+  },
+  {,
+    "EventCode": "0x4c01a",
+    "EventName": "PM_CMPLU_STALL_DMISS_L3MISS",
+    "BriefDescription": "Completion stall due to cache miss resolving missed the L3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c018",
+    "EventName": "PM_CMPLU_STALL_DMISS_LMEM",
+    "BriefDescription": "Completion stall due to cache miss that resolves in local memory",
+    "PublicDescription": "Completion stall due to cache miss resolving in core's Local Memory"
+  },
+  {,
+    "EventCode": "0x2c01c",
+    "EventName": "PM_CMPLU_STALL_DMISS_REMOTE",
+    "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)",
+    "PublicDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)"
+  },
+  {,
+    "EventCode": "0x4c012",
+    "EventName": "PM_CMPLU_STALL_ERAT_MISS",
+    "BriefDescription": "Completion stall due to LSU reject ERAT miss",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d016",
+    "EventName": "PM_CMPLU_STALL_FXLONG",
+    "BriefDescription": "Completion stall due to a long latency fixed point instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2d016",
+    "EventName": "PM_CMPLU_STALL_FXU",
+    "BriefDescription": "Completion stall due to FXU",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30036",
+    "EventName": "PM_CMPLU_STALL_HWSYNC",
+    "BriefDescription": "completion stall due to hwsync",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4d014",
+    "EventName": "PM_CMPLU_STALL_LOAD_FINISH",
+    "BriefDescription": "Completion stall due to a Load finish",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c010",
+    "EventName": "PM_CMPLU_STALL_LSU",
+    "BriefDescription": "Completion stall by LSU instruction",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10036",
+    "EventName": "PM_CMPLU_STALL_LWSYNC",
+    "BriefDescription": "completion stall due to isync/lwsync",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30006",
+    "EventName": "PM_CMPLU_STALL_OTHER_CMPL",
+    "BriefDescription": "Instructions core completed while this tread was stalled",
+    "PublicDescription": "Instructions core completed while this thread was stalled"
+  },
+  {,
+    "EventCode": "0x4c01c",
+    "EventName": "PM_CMPLU_STALL_ST_FWD",
+    "BriefDescription": "Completion stall due to store forward",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1001c",
+    "EventName": "PM_CMPLU_STALL_THRD",
+    "BriefDescription": "Completion Stalled due to thread conflict. Group ready to complete but it was another thread's turn",
+    "PublicDescription": "Completion stall due to thread conflict"
+  },
+  {,
+    "EventCode": "0x1e",
+    "EventName": "PM_CYC",
+    "BriefDescription": "Cycles",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10006",
+    "EventName": "PM_DISP_HELD",
+    "BriefDescription": "Dispatch Held",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4003c",
+    "EventName": "PM_DISP_HELD_SYNC_HOLD",
+    "BriefDescription": "Dispatch held due to SYNC hold",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200f8",
+    "EventName": "PM_EXT_INT",
+    "BriefDescription": "external interrupt",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x400f8",
+    "EventName": "PM_FLUSH",
+    "BriefDescription": "Flush (any type)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30012",
+    "EventName": "PM_FLUSH_COMPLETION",
+    "BriefDescription": "Completion Flush",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3000c",
+    "EventName": "PM_FREQ_DOWN",
+    "BriefDescription": "Power Management: Below Threshold B",
+    "PublicDescription": "Frequency is being slewed down due to Power Management"
+  },
+  {,
+    "EventCode": "0x4000c",
+    "EventName": "PM_FREQ_UP",
+    "BriefDescription": "Power Management: Above Threshold A",
+    "PublicDescription": "Frequency is being slewed up due to Power Management"
+  },
+  {,
+    "EventCode": "0x2000a",
+    "EventName": "PM_HV_CYC",
+    "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration",
+    "PublicDescription": "cycles in hypervisor mode"
+  },
+  {,
+    "EventCode": "0x3405e",
+    "EventName": "PM_IFETCH_THROTTLE",
+    "BriefDescription": "Cycles in which Instruction fetch throttle was active",
+    "PublicDescription": "Cycles instruction fecth was throttled in IFU"
+  },
+  {,
+    "EventCode": "0x10014",
+    "EventName": "PM_IOPS_CMPL",
+    "BriefDescription": "Internal Operations completed",
+    "PublicDescription": "IOPS Completed"
+  },
+  {,
+    "EventCode": "0x3c058",
+    "EventName": "PM_LARX_FIN",
+    "BriefDescription": "Larx finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1002e",
+    "EventName": "PM_LD_CMPL",
+    "BriefDescription": "count of Loads completed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10062",
+    "EventName": "PM_LD_L3MISS_PEND_CYC",
+    "BriefDescription": "Cycles L3 miss was pending for this thread",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30066",
+    "EventName": "PM_LSU_FIN",
+    "BriefDescription": "LSU Finished an instruction (up to 2 per cycle)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2003e",
+    "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC",
+    "BriefDescription": "LSU empty (lmq and srq empty)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e05c",
+    "EventName": "PM_LSU_REJECT_ERAT_MISS",
+    "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e05c",
+    "EventName": "PM_LSU_REJECT_LHS",
+    "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e05c",
+    "EventName": "PM_LSU_REJECT_LMQ_FULL",
+    "BriefDescription": "LSU reject due to LMQ full ( 4 per cycle)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1001a",
+    "EventName": "PM_LSU_SRQ_FULL_CYC",
+    "BriefDescription": "Storage Queue is full and is blocking dispatch",
+    "PublicDescription": "SRQ is Full"
+  },
+  {,
+    "EventCode": "0x40014",
+    "EventName": "PM_PROBE_NOP_DISP",
+    "BriefDescription": "ProbeNops dispatched",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x600f4",
+    "EventName": "PM_RUN_CYC",
+    "BriefDescription": "Run_cycles",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3006c",
+    "EventName": "PM_RUN_CYC_SMT2_MODE",
+    "BriefDescription": "Cycles run latch is set and core is in SMT2 mode",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2006c",
+    "EventName": "PM_RUN_CYC_SMT4_MODE",
+    "BriefDescription": "cycles this threads run latch is set and the core is in SMT4 mode",
+    "PublicDescription": "Cycles run latch is set and core is in SMT4 mode"
+  },
+  {,
+    "EventCode": "0x1006c",
+    "EventName": "PM_RUN_CYC_ST_MODE",
+    "BriefDescription": "Cycles run latch is set and core is in ST mode",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x500fa",
+    "EventName": "PM_RUN_INST_CMPL",
+    "BriefDescription": "Run_Instructions",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e058",
+    "EventName": "PM_STCX_FAIL",
+    "BriefDescription": "stcx failed",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x20016",
+    "EventName": "PM_ST_CMPL",
+    "BriefDescription": "Store completion count",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200f0",
+    "EventName": "PM_ST_FIN",
+    "BriefDescription": "Store Instructions Finished",
+    "PublicDescription": "Store Instructions Finished (store sent to nest)"
+  },
+  {,
+    "EventCode": "0x20018",
+    "EventName": "PM_ST_FWD",
+    "BriefDescription": "Store forwards that finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10026",
+    "EventName": "PM_TABLEWALK_CYC",
+    "BriefDescription": "Cycles when a tablewalk (I or D) is active",
+    "PublicDescription": "Tablewalk Active"
+  },
+  {,
+    "EventCode": "0x300f8",
+    "EventName": "PM_TB_BIT_TRANS",
+    "BriefDescription": "timebase event",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2000c",
+    "EventName": "PM_THRD_ALL_RUN_CYC",
+    "BriefDescription": "All Threads in Run_cycles (was both threads in run_cycles)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30058",
+    "EventName": "PM_TLBIE_FIN",
+    "BriefDescription": "tlbie finished",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10060",
+    "EventName": "PM_TM_TRANS_RUN_CYC",
+    "BriefDescription": "run cycles in transactional state",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e012",
+    "EventName": "PM_TM_TX_PASS_RUN_CYC",
+    "BriefDescription": "cycles spent in successful transactions",
+    "PublicDescription": "run cycles spent in successful transactions"
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/pmc.json b/pmu-events/arch/powerpc/power8/pmc.json
new file mode 100644
index 0000000..583e4d9
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/pmc.json
@@ -0,0 +1,140 @@
+[
+  {,
+    "EventCode": "0x20010",
+    "EventName": "PM_PMC1_OVERFLOW",
+    "BriefDescription": "Overflow from counter 1",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30010",
+    "EventName": "PM_PMC2_OVERFLOW",
+    "BriefDescription": "Overflow from counter 2",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30020",
+    "EventName": "PM_PMC2_REWIND",
+    "BriefDescription": "PMC2 Rewind Event (did not match condition)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10022",
+    "EventName": "PM_PMC2_SAVED",
+    "BriefDescription": "PMC2 Rewind Value saved",
+    "PublicDescription": "PMC2 Rewind Value saved (matched condition)"
+  },
+  {,
+    "EventCode": "0x40010",
+    "EventName": "PM_PMC3_OVERFLOW",
+    "BriefDescription": "Overflow from counter 3",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10010",
+    "EventName": "PM_PMC4_OVERFLOW",
+    "BriefDescription": "Overflow from counter 4",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10020",
+    "EventName": "PM_PMC4_REWIND",
+    "BriefDescription": "PMC4 Rewind Event",
+    "PublicDescription": "PMC4 Rewind Event (did not match condition)"
+  },
+  {,
+    "EventCode": "0x30022",
+    "EventName": "PM_PMC4_SAVED",
+    "BriefDescription": "PMC4 Rewind Value saved (matched condition)",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10024",
+    "EventName": "PM_PMC5_OVERFLOW",
+    "BriefDescription": "Overflow from counter 5",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x30024",
+    "EventName": "PM_PMC6_OVERFLOW",
+    "BriefDescription": "Overflow from counter 6",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x400f4",
+    "EventName": "PM_RUN_PURR",
+    "BriefDescription": "Run_PURR",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x10008",
+    "EventName": "PM_RUN_SPURR",
+    "BriefDescription": "Run SPURR",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x0",
+    "EventName": "PM_SUSPENDED",
+    "BriefDescription": "Counter OFF",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x301ea",
+    "EventName": "PM_THRESH_EXC_1024",
+    "BriefDescription": "Threshold counter exceeded a value of 1024",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x401ea",
+    "EventName": "PM_THRESH_EXC_128",
+    "BriefDescription": "Threshold counter exceeded a value of 128",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x401ec",
+    "EventName": "PM_THRESH_EXC_2048",
+    "BriefDescription": "Threshold counter exceeded a value of 2048",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x101e8",
+    "EventName": "PM_THRESH_EXC_256",
+    "BriefDescription": "Threshold counter exceed a count of 256",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x201e6",
+    "EventName": "PM_THRESH_EXC_32",
+    "BriefDescription": "Threshold counter exceeded a value of 32",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x101e6",
+    "EventName": "PM_THRESH_EXC_4096",
+    "BriefDescription": "Threshold counter exceed a count of 4096",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x201e8",
+    "EventName": "PM_THRESH_EXC_512",
+    "BriefDescription": "Threshold counter exceeded a value of 512",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x301e8",
+    "EventName": "PM_THRESH_EXC_64",
+    "BriefDescription": "IFU non-branch finished",
+    "PublicDescription": "Threshold counter exceeded a value of 64"
+  },
+  {,
+    "EventCode": "0x101ec",
+    "EventName": "PM_THRESH_MET",
+    "BriefDescription": "threshold exceeded",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4016e",
+    "EventName": "PM_THRESH_NOT_MET",
+    "BriefDescription": "Threshold counter did not meet threshold",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power8/translation.json b/pmu-events/arch/powerpc/power8/translation.json
new file mode 100644
index 0000000..e47a554
--- /dev/null
+++ b/pmu-events/arch/powerpc/power8/translation.json
@@ -0,0 +1,176 @@
+[
+  {,
+    "EventCode": "0x4c054",
+    "EventName": "PM_DERAT_MISS_16G",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3c054",
+    "EventName": "PM_DERAT_MISS_16M",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1c056",
+    "EventName": "PM_DERAT_MISS_4K",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c054",
+    "EventName": "PM_DERAT_MISS_64K",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e048",
+    "EventName": "PM_DPTEG_FROM_DL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e048",
+    "EventName": "PM_DPTEG_FROM_DL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e042",
+    "EventName": "PM_DPTEG_FROM_L2",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e04e",
+    "EventName": "PM_DPTEG_FROM_L2MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e040",
+    "EventName": "PM_DPTEG_FROM_L2_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e040",
+    "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e042",
+    "EventName": "PM_DPTEG_FROM_L3",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3e042",
+    "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e042",
+    "EventName": "PM_DPTEG_FROM_L3_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e044",
+    "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e04c",
+    "EventName": "PM_DPTEG_FROM_LL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e048",
+    "EventName": "PM_DPTEG_FROM_LMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e04c",
+    "EventName": "PM_DPTEG_FROM_MEMORY",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4e04a",
+    "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e048",
+    "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e046",
+    "EventName": "PM_DPTEG_FROM_RL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x1e04a",
+    "EventName": "PM_DPTEG_FROM_RL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2e04a",
+    "EventName": "PM_DPTEG_FROM_RL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x300fc",
+    "EventName": "PM_DTLB_MISS",
+    "BriefDescription": "Data PTEG reload",
+    "PublicDescription": "Data PTEG Reloaded (DTLB Miss)"
+  },
+  {,
+    "EventCode": "0x1c058",
+    "EventName": "PM_DTLB_MISS_16G",
+    "BriefDescription": "Data TLB Miss page size 16G",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x4c056",
+    "EventName": "PM_DTLB_MISS_16M",
+    "BriefDescription": "Data TLB Miss page size 16M",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x2c056",
+    "EventName": "PM_DTLB_MISS_4K",
+    "BriefDescription": "Data TLB Miss page size 4k",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x3c056",
+    "EventName": "PM_DTLB_MISS_64K",
+    "BriefDescription": "Data TLB Miss page size 64K",
+    "PublicDescription": ""
+  },
+  {,
+    "EventCode": "0x200f6",
+    "EventName": "PM_LSU_DERAT_MISS",
+    "BriefDescription": "DERAT Reloaded due to a DERAT miss",
+    "PublicDescription": "DERAT Reloaded (Miss)"
+  },
+  {,
+    "EventCode": "0x20066",
+    "EventName": "PM_TLB_MISS",
+    "BriefDescription": "TLB Miss (I + D)",
+    "PublicDescription": ""
+  },
+]
diff --git a/pmu-events/arch/powerpc/power9/cache.json b/pmu-events/arch/powerpc/power9/cache.json
new file mode 100644
index 0000000..8510721
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/cache.json
@@ -0,0 +1,107 @@
+[
+  {,
+    "EventCode": "0x300F4",
+    "EventName": "PM_THRD_CONC_RUN_INST",
+    "BriefDescription": "PPC Instructions Finished by this thread when all threads in the core had the run-latch set"
+  },
+  {,
+    "EventCode": "0x1E056",
+    "EventName": "PM_CMPLU_STALL_FLUSH_ANY_THREAD",
+    "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion"
+  },
+  {,
+    "EventCode": "0x4D016",
+    "EventName": "PM_CMPLU_STALL_FXLONG",
+    "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)"
+  },
+  {,
+    "EventCode": "0x2D016",
+    "EventName": "PM_CMPLU_STALL_FXU",
+    "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes"
+  },
+  {,
+    "EventCode": "0x4D12A",
+    "EventName": "PM_MRK_DATA_FROM_RL4_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load"
+  },
+  {,
+    "EventCode": "0x1003C",
+    "EventName": "PM_CMPLU_STALL_DMISS_L2L3",
+    "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3"
+  },
+  {,
+    "EventCode": "0x4C014",
+    "EventName": "PM_CMPLU_STALL_LMQ_FULL",
+    "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full"
+  },
+  {,
+    "EventCode": "0x14048",
+    "EventName": "PM_INST_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x4D014",
+    "EventName": "PM_CMPLU_STALL_LOAD_FINISH",
+    "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish"
+  },
+  {,
+    "EventCode": "0x2404A",
+    "EventName": "PM_INST_FROM_RL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x1404A",
+    "EventName": "PM_INST_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x401EA",
+    "EventName": "PM_THRESH_EXC_128",
+    "BriefDescription": "Threshold counter exceeded a value of 128"
+  },
+  {,
+    "EventCode": "0x400F6",
+    "EventName": "PM_BR_MPRED_CMPL",
+    "BriefDescription": "Number of Branch Mispredicts"
+  },
+  {,
+    "EventCode": "0x2F140",
+    "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x101E6",
+    "EventName": "PM_THRESH_EXC_4096",
+    "BriefDescription": "Threshold counter exceed a count of 4096"
+  },
+  {,
+    "EventCode": "0x3F14A",
+    "EventName": "PM_MRK_DPTEG_FROM_RMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4C016",
+    "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT",
+    "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict"
+  },
+  {,
+    "EventCode": "0x2C01A",
+    "EventName": "PM_CMPLU_STALL_LHS",
+    "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data"
+  },
+  {,
+    "EventCode": "0x401E4",
+    "EventName": "PM_MRK_DTLB_MISS",
+    "BriefDescription": "Marked dtlb miss"
+  },
+  {,
+    "EventCode": "0x24046",
+    "EventName": "PM_INST_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x1002A",
+    "EventName": "PM_CMPLU_STALL_LARX",
+    "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/floating-point.json b/pmu-events/arch/powerpc/power9/floating-point.json
new file mode 100644
index 0000000..8a83bca
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/floating-point.json
@@ -0,0 +1,32 @@
+[
+  {,
+    "EventCode": "0x1415A",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x10058",
+    "EventName": "PM_MEM_LOC_THRESH_IFU",
+    "BriefDescription": "Local Memory above threshold for IFU speculation control"
+  },
+  {,
+    "EventCode": "0x2D028",
+    "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L2",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache"
+  },
+  {,
+    "EventCode": "0x30012",
+    "EventName": "PM_FLUSH_COMPLETION",
+    "BriefDescription": "The instruction that was next to complete did not complete because it suffered a flush"
+  },
+  {,
+    "EventCode": "0x2D154",
+    "EventName": "PM_MRK_DERAT_MISS_64K",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K"
+  },
+  {,
+    "EventCode": "0x4016E",
+    "EventName": "PM_THRESH_NOT_MET",
+    "BriefDescription": "Threshold counter did not meet threshold"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/frontend.json b/pmu-events/arch/powerpc/power9/frontend.json
new file mode 100644
index 0000000..f9fa84b
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/frontend.json
@@ -0,0 +1,357 @@
+[
+  {,
+    "EventCode": "0x25044",
+    "EventName": "PM_IPTEG_FROM_L31_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x101E8",
+    "EventName": "PM_THRESH_EXC_256",
+    "BriefDescription": "Threshold counter exceed a count of 256"
+  },
+  {,
+    "EventCode": "0x4504E",
+    "EventName": "PM_IPTEG_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x1006A",
+    "EventName": "PM_NTC_ISSUE_HELD_DARQ_FULL",
+    "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it"
+  },
+  {,
+    "EventCode": "0x4E016",
+    "EventName": "PM_CMPLU_STALL_LSAQ_ARB",
+    "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch"
+  },
+  {,
+    "EventCode": "0x1001A",
+    "EventName": "PM_LSU_SRQ_FULL_CYC",
+    "BriefDescription": "Cycles in which the Store Queue is full on all 4 slices. This is event is not per thread. All the threads will see the same count for this core resource"
+  },
+  {,
+    "EventCode": "0x1E15E",
+    "EventName": "PM_MRK_L2_TM_REQ_ABORT",
+    "BriefDescription": "TM abort"
+  },
+  {,
+    "EventCode": "0x34052",
+    "EventName": "PM_INST_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x20114",
+    "EventName": "PM_MRK_L2_RC_DISP",
+    "BriefDescription": "Marked Instruction RC dispatched in L2"
+  },
+  {,
+    "EventCode": "0x4C044",
+    "EventName": "PM_DATA_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x1C044",
+    "EventName": "PM_DATA_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load"
+  },
+  {,
+    "EventCode": "0x44050",
+    "EventName": "PM_INST_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x30154",
+    "EventName": "PM_MRK_FAB_RSP_DCLAIM",
+    "BriefDescription": "Marked store had to do a dclaim"
+  },
+  {,
+    "EventCode": "0x30014",
+    "EventName": "PM_CMPLU_STALL_STORE_FIN_ARB",
+    "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe"
+  },
+  {,
+    "EventCode": "0x3E054",
+    "EventName": "PM_LD_MISS_L1",
+    "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+  },
+  {,
+    "EventCode": "0x2E01A",
+    "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
+    "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete"
+  },
+  {,
+    "EventCode": "0x2D01C",
+    "EventName": "PM_CMPLU_STALL_STCX",
+    "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2"
+  },
+  {,
+    "EventCode": "0x2C010",
+    "EventName": "PM_CMPLU_STALL_LSU",
+    "BriefDescription": "Completion stall by LSU instruction"
+  },
+  {,
+    "EventCode": "0x2C042",
+    "EventName": "PM_DATA_FROM_L3_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load"
+  },
+  {,
+    "EventCode": "0x4E012",
+    "EventName": "PM_CMPLU_STALL_MTFPSCR",
+    "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)"
+  },
+  {,
+    "EventCode": "0x100F2",
+    "EventName": "PM_1PLUS_PPC_CMPL",
+    "BriefDescription": "1 or more ppc insts finished"
+  },
+  {,
+    "EventCode": "0x3001C",
+    "EventName": "PM_LSU_REJECT_LMQ_FULL",
+    "BriefDescription": "LSU Reject due to LMQ full (up to 4 per cycles)"
+  },
+  {,
+    "EventCode": "0x15046",
+    "EventName": "PM_IPTEG_FROM_L31_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x1015E",
+    "EventName": "PM_MRK_FAB_RSP_RD_T_INTV",
+    "BriefDescription": "Sampled Read got a T intervention"
+  },
+  {,
+    "EventCode": "0x101EC",
+    "EventName": "PM_THRESH_MET",
+    "BriefDescription": "threshold exceeded"
+  },
+  {,
+    "EventCode": "0x10020",
+    "EventName": "PM_PMC4_REWIND",
+    "BriefDescription": "PMC4 Rewind Event"
+  },
+  {,
+    "EventCode": "0x301EA",
+    "EventName": "PM_THRESH_EXC_1024",
+    "BriefDescription": "Threshold counter exceeded a value of 1024"
+  },
+  {,
+    "EventCode": "0x34056",
+    "EventName": "PM_CMPLU_STALL_LSU_MFSPR",
+    "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned"
+  },
+  {,
+    "EventCode": "0x44056",
+    "EventName": "PM_VECTOR_ST_CMPL",
+    "BriefDescription": "Number of vector store instructions completed"
+  },
+  {,
+    "EventCode": "0x2C124",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x4C12A",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x30060",
+    "EventName": "PM_TM_TRANS_RUN_INST",
+    "BriefDescription": "Run instructions completed in transactional state (gated by the run latch)"
+  },
+  {,
+    "EventCode": "0x2C014",
+    "EventName": "PM_CMPLU_STALL_STORE_FINISH",
+    "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish"
+  },
+  {,
+    "EventCode": "0x3515A",
+    "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC",
+    "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x34050",
+    "EventName": "PM_INST_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x3015E",
+    "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY",
+    "BriefDescription": "Sampled store did a rwitm and got a rty"
+  },
+  {,
+    "EventCode": "0x0",
+    "EventName": "PM_SUSPENDED",
+    "BriefDescription": "Counter OFF"
+  },
+  {,
+    "EventCode": "0x10010",
+    "EventName": "PM_PMC4_OVERFLOW",
+    "BriefDescription": "Overflow from counter 4"
+  },
+  {,
+    "EventCode": "0x3E04A",
+    "EventName": "PM_DPTEG_FROM_RMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2F152",
+    "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC",
+    "BriefDescription": "cycles L2 RC took for a dclaim"
+  },
+  {,
+    "EventCode": "0x10004",
+    "EventName": "PM_CMPLU_STALL_LRQ_OTHER",
+    "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others"
+  },
+  {,
+    "EventCode": "0x4F150",
+    "EventName": "PM_MRK_FAB_RSP_RWITM_CYC",
+    "BriefDescription": "cycles L2 RC took for a rwitm"
+  },
+  {,
+    "EventCode": "0x4E042",
+    "EventName": "PM_DPTEG_FROM_L3",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1F054",
+    "EventName": "PM_TLB_HIT",
+    "BriefDescription": "Number of times the TLB had the data required by the instruction. Applies to both HPT and RPT"
+  },
+  {,
+    "EventCode": "0x2C01E",
+    "EventName": "PM_CMPLU_STALL_SYNC_PMU_INT",
+    "BriefDescription": "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt"
+  },
+  {,
+    "EventCode": "0x24050",
+    "EventName": "PM_IOPS_CMPL",
+    "BriefDescription": "Internal Operations completed"
+  },
+  {,
+    "EventCode": "0x1515C",
+    "EventName": "PM_SYNC_MRK_BR_MPRED",
+    "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt"
+  },
+  {,
+    "EventCode": "0x300FA",
+    "EventName": "PM_INST_FROM_L3MISS",
+    "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet"
+  },
+  {,
+    "EventCode": "0x15044",
+    "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x15152",
+    "EventName": "PM_SYNC_MRK_BR_LINK",
+    "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt"
+  },
+  {,
+    "EventCode": "0x1E050",
+    "EventName": "PM_CMPLU_STALL_TEND",
+    "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2"
+  },
+  {,
+    "EventCode": "0x1013E",
+    "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC",
+    "BriefDescription": "Marked Load exposed Miss (use edge detect to count #)"
+  },
+  {,
+    "EventCode": "0x25042",
+    "EventName": "PM_IPTEG_FROM_L3_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x14054",
+    "EventName": "PM_INST_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x4015E",
+    "EventName": "PM_MRK_FAB_RSP_RD_RTY",
+    "BriefDescription": "Sampled L2 reads retry count"
+  },
+  {,
+    "EventCode": "0x45048",
+    "EventName": "PM_IPTEG_FROM_DL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x44052",
+    "EventName": "PM_INST_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x30026",
+    "EventName": "PM_CMPLU_STALL_STORE_DATA",
+    "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data"
+  },
+  {,
+    "EventCode": "0x301E6",
+    "EventName": "PM_MRK_DERAT_MISS",
+    "BriefDescription": "Erat Miss (TLB Access) All page sizes"
+  },
+  {,
+    "EventCode": "0x24154",
+    "EventName": "PM_THRESH_ACC",
+    "BriefDescription": "This event increments every time the threshold event counter ticks. Thresholding must be enabled (via MMCRA) and the thresholding start event must occur for this counter to increment. It will stop incrementing when the thresholding stop event occurs or when thresholding is disabled, until the next time a configured thresholding start event occurs."
+  },
+  {,
+    "EventCode": "0x2015E",
+    "EventName": "PM_MRK_FAB_RSP_RWITM_RTY",
+    "BriefDescription": "Sampled store did a rwitm and got a rty"
+  },
+  {,
+    "EventCode": "0x200FA",
+    "EventName": "PM_BR_TAKEN_CMPL",
+    "BriefDescription": "New event for Branch Taken"
+  },
+  {,
+    "EventCode": "0x35044",
+    "EventName": "PM_IPTEG_FROM_L31_ECO_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x4C010",
+    "EventName": "PM_CMPLU_STALL_STORE_PIPE_ARB",
+    "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration"
+  },
+  {,
+    "EventCode": "0x4C01C",
+    "EventName": "PM_CMPLU_STALL_ST_FWD",
+    "BriefDescription": "Completion stall due to store forward"
+  },
+  {,
+    "EventCode": "0x3515C",
+    "EventName": "PM_MRK_DATA_FROM_RL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load"
+  },
+  {,
+    "EventCode": "0x2D14C",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x40116",
+    "EventName": "PM_MRK_LARX_FIN",
+    "BriefDescription": "Larx finished"
+  },
+  {,
+    "EventCode": "0x1003A",
+    "EventName": "PM_CMPLU_STALL_LSU_FIN",
+    "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish"
+  },
+  {,
+    "EventCode": "0x3012A",
+    "EventName": "PM_MRK_L2_RC_DONE",
+    "BriefDescription": "Marked RC done"
+  },
+  {,
+    "EventCode": "0x45044",
+    "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/marked.json b/pmu-events/arch/powerpc/power9/marked.json
new file mode 100644
index 0000000..b1954c3
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/marked.json
@@ -0,0 +1,627 @@
+[
+  {,
+    "EventCode": "0x3013E",
+    "EventName": "PM_MRK_STALL_CMPLU_CYC",
+    "BriefDescription": "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)"
+  },
+  {,
+    "EventCode": "0x4F056",
+    "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3MISS",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache. The source could be local/remote/distant memory or another core's cache"
+  },
+  {,
+    "EventCode": "0x24158",
+    "EventName": "PM_MRK_INST",
+    "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens"
+  },
+  {,
+    "EventCode": "0x1E046",
+    "EventName": "PM_DPTEG_FROM_L31_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x3C04A",
+    "EventName": "PM_DATA_FROM_RMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load"
+  },
+  {,
+    "EventCode": "0x2C01C",
+    "EventName": "PM_CMPLU_STALL_DMISS_REMOTE",
+    "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)"
+  },
+  {,
+    "EventCode": "0x44040",
+    "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x2E050",
+    "EventName": "PM_DARQ0_7_9_ENTRIES",
+    "BriefDescription": "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x2D02E",
+    "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L2",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache. This implies that a level 4 PWC access was not necessary for this translation"
+  },
+  {,
+    "EventCode": "0x3F05E",
+    "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation"
+  },
+  {,
+    "EventCode": "0x2E01E",
+    "EventName": "PM_CMPLU_STALL_NTC_FLUSH",
+    "BriefDescription": "Completion stall due to ntc flush"
+  },
+  {,
+    "EventCode": "0x1F14C",
+    "EventName": "PM_MRK_DPTEG_FROM_LL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x20130",
+    "EventName": "PM_MRK_INST_DECODED",
+    "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only"
+  },
+  {,
+    "EventCode": "0x3F144",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D058",
+    "EventName": "PM_VECTOR_FLOP_CMPL",
+    "BriefDescription": "Vector FP instruction completed"
+  },
+  {,
+    "EventCode": "0x14040",
+    "EventName": "PM_INST_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x4404E",
+    "EventName": "PM_INST_FROM_L3MISS_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch"
+  },
+  {,
+    "EventCode": "0x3003A",
+    "EventName": "PM_CMPLU_STALL_EXCEPTION",
+    "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete"
+  },
+  {,
+    "EventCode": "0x4F144",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x3E044",
+    "EventName": "PM_DPTEG_FROM_L31_ECO_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x300F6",
+    "EventName": "PM_L1_DCACHE_RELOAD_VALID",
+    "BriefDescription": "DL1 reloaded due to Demand Load"
+  },
+  {,
+    "EventCode": "0x1415E",
+    "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC",
+    "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load"
+  },
+  {,
+    "EventCode": "0x1E052",
+    "EventName": "PM_CMPLU_STALL_SLB",
+    "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB"
+  },
+  {,
+    "EventCode": "0x4404C",
+    "EventName": "PM_INST_FROM_DMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x3000E",
+    "EventName": "PM_FXU_1PLUS_BUSY",
+    "BriefDescription": "At least one of the 4 FXU units is busy"
+  },
+  {,
+    "EventCode": "0x2C048",
+    "EventName": "PM_DATA_FROM_LMEM",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load"
+  },
+  {,
+    "EventCode": "0x3000A",
+    "EventName": "PM_CMPLU_STALL_PM",
+    "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish. Includes permute and decimal fixed point instructions (128 bit BCD arithmetic) + a few 128 bit fixpoint add/subtract instructions with carry. Not qualified by vector or multicycle"
+  },
+  {,
+    "EventCode": "0x1504E",
+    "EventName": "PM_IPTEG_FROM_L2MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x1C052",
+    "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load"
+  },
+  {,
+    "EventCode": "0x30008",
+    "EventName": "PM_DISP_STARVED",
+    "BriefDescription": "Dispatched Starved"
+  },
+  {,
+    "EventCode": "0x14042",
+    "EventName": "PM_INST_FROM_L2",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x4000C",
+    "EventName": "PM_FREQ_UP",
+    "BriefDescription": "Power Management: Above Threshold A"
+  },
+  {,
+    "EventCode": "0x3C050",
+    "EventName": "PM_DATA_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load"
+  },
+  {,
+    "EventCode": "0x25040",
+    "EventName": "PM_IPTEG_FROM_L2_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x10132",
+    "EventName": "PM_MRK_INST_ISSUED",
+    "BriefDescription": "Marked instruction issued"
+  },
+  {,
+    "EventCode": "0x1C046",
+    "EventName": "PM_DATA_FROM_L31_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x2C044",
+    "EventName": "PM_DATA_FROM_L31_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x2C04A",
+    "EventName": "PM_DATA_FROM_RL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load"
+  },
+  {,
+    "EventCode": "0x24044",
+    "EventName": "PM_INST_FROM_L31_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x4C050",
+    "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load"
+  },
+  {,
+    "EventCode": "0x2C052",
+    "EventName": "PM_DATA_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load"
+  },
+  {,
+    "EventCode": "0x2F148",
+    "EventName": "PM_MRK_DPTEG_FROM_LMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D01A",
+    "EventName": "PM_CMPLU_STALL_EIEIO",
+    "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2"
+  },
+  {,
+    "EventCode": "0x4F14E",
+    "EventName": "PM_MRK_DPTEG_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4F05A",
+    "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache. This is the deepest level of PWC possible for a translation"
+  },
+  {,
+    "EventCode": "0x1F05A",
+    "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L2",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache. This is the deepest level of PWC possible for a translation"
+  },
+  {,
+    "EventCode": "0x30068",
+    "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+    "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)"
+  },
+  {,
+    "EventCode": "0x4C04A",
+    "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load"
+  },
+  {,
+    "EventCode": "0x400FE",
+    "EventName": "PM_DATA_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load"
+  },
+  {,
+    "EventCode": "0x3F058",
+    "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache"
+  },
+  {,
+    "EventCode": "0x3C052",
+    "EventName": "PM_DATA_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load"
+  },
+  {,
+    "EventCode": "0x4D142",
+    "EventName": "PM_MRK_DATA_FROM_L3",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load"
+  },
+  {,
+    "EventCode": "0x30050",
+    "EventName": "PM_SYS_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x30028",
+    "EventName": "PM_CMPLU_STALL_SPEC_FINISH",
+    "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC"
+  },
+  {,
+    "EventCode": "0x400F4",
+    "EventName": "PM_RUN_PURR",
+    "BriefDescription": "Run_PURR"
+  },
+  {,
+    "EventCode": "0x3404C",
+    "EventName": "PM_INST_FROM_DL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x3D05A",
+    "EventName": "PM_NTC_ISSUE_HELD_OTHER",
+    "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU"
+  },
+  {,
+    "EventCode": "0x2E048",
+    "EventName": "PM_DPTEG_FROM_LMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2D02A",
+    "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L2",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache"
+  },
+  {,
+    "EventCode": "0x1F05C",
+    "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L3",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache"
+  },
+  {,
+    "EventCode": "0x4D04A",
+    "EventName": "PM_DARQ0_0_3_ENTRIES",
+    "BriefDescription": "Cycles in which 3 or less DARQ entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x1404C",
+    "EventName": "PM_INST_FROM_LL4",
+    "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x200FD",
+    "EventName": "PM_L1_ICACHE_MISS",
+    "BriefDescription": "Demand iCache Miss"
+  },
+  {,
+    "EventCode": "0x34040",
+    "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x20138",
+    "EventName": "PM_MRK_ST_NEST",
+    "BriefDescription": "Marked store sent to nest"
+  },
+  {,
+    "EventCode": "0x44048",
+    "EventName": "PM_INST_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x35046",
+    "EventName": "PM_IPTEG_FROM_L21_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x4C04E",
+    "EventName": "PM_DATA_FROM_L3MISS_MOD",
+    "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load"
+  },
+  {,
+    "EventCode": "0x401E0",
+    "EventName": "PM_MRK_INST_CMPL",
+    "BriefDescription": "marked instruction completed"
+  },
+  {,
+    "EventCode": "0x2C128",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x34044",
+    "EventName": "PM_INST_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x4E018",
+    "EventName": "PM_CMPLU_STALL_NTC_DISP_FIN",
+    "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch."
+  },
+  {,
+    "EventCode": "0x2E05E",
+    "EventName": "PM_LMQ_EMPTY_CYC",
+    "BriefDescription": "Cycles in which the LMQ has no pending load misses for this thread"
+  },
+  {,
+    "EventCode": "0x4C122",
+    "EventName": "PM_DARQ1_0_3_ENTRIES",
+    "BriefDescription": "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x4F058",
+    "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation"
+  },
+  {,
+    "EventCode": "0x14046",
+    "EventName": "PM_INST_FROM_L31_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x3012C",
+    "EventName": "PM_MRK_ST_FWD",
+    "BriefDescription": "Marked st forwards"
+  },
+  {,
+    "EventCode": "0x101E0",
+    "EventName": "PM_MRK_INST_DISP",
+    "BriefDescription": "The thread has dispatched a randomly sampled marked instruction"
+  },
+  {,
+    "EventCode": "0x1D058",
+    "EventName": "PM_DARQ0_10_12_ENTRIES",
+    "BriefDescription": "Cycles in which 10 or more DARQ entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x300FE",
+    "EventName": "PM_DATA_FROM_L3MISS",
+    "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)"
+  },
+  {,
+    "EventCode": "0x30006",
+    "EventName": "PM_CMPLU_STALL_OTHER_CMPL",
+    "BriefDescription": "Instructions the core completed while this tread was stalled"
+  },
+  {,
+    "EventCode": "0x1005C",
+    "EventName": "PM_CMPLU_STALL_DP",
+    "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by NOT vector"
+  },
+  {,
+    "EventCode": "0x1E042",
+    "EventName": "PM_DPTEG_FROM_L2",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1016E",
+    "EventName": "PM_MRK_BR_CMPL",
+    "BriefDescription": "Branch Instruction completed"
+  },
+  {,
+    "EventCode": "0x2013A",
+    "EventName": "PM_MRK_BRU_FIN",
+    "BriefDescription": "bru marked instr finish"
+  },
+  {,
+    "EventCode": "0x4F05E",
+    "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation. The source could be local/remote/distant memory or another core's cache"
+  },
+  {,
+    "EventCode": "0x400FC",
+    "EventName": "PM_ITLB_MISS",
+    "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed"
+  },
+  {,
+    "EventCode": "0x1E044",
+    "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D05A",
+    "EventName": "PM_NON_MATH_FLOP_CMPL",
+    "BriefDescription": "Non FLOP operation completed"
+  },
+  {,
+    "EventCode": "0x101E2",
+    "EventName": "PM_MRK_BR_TAKEN_CMPL",
+    "BriefDescription": "Marked Branch Taken completed"
+  },
+  {,
+    "EventCode": "0x3E158",
+    "EventName": "PM_MRK_STCX_FAIL",
+    "BriefDescription": "marked stcx failed"
+  },
+  {,
+    "EventCode": "0x1C048",
+    "EventName": "PM_DATA_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x1C054",
+    "EventName": "PM_DATA_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load"
+  },
+  {,
+    "EventCode": "0x4405E",
+    "EventName": "PM_DARQ_STORE_REJECT",
+    "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected. Divide by PM_DARQ_STORE_XMIT to get reject ratio"
+  },
+  {,
+    "EventCode": "0x1C042",
+    "EventName": "PM_DATA_FROM_L2",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load"
+  },
+  {,
+    "EventCode": "0x1D14C",
+    "EventName": "PM_MRK_DATA_FROM_LL4",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load"
+  },
+  {,
+    "EventCode": "0x1006C",
+    "EventName": "PM_RUN_CYC_ST_MODE",
+    "BriefDescription": "Cycles run latch is set and core is in ST mode"
+  },
+  {,
+    "EventCode": "0x3C044",
+    "EventName": "PM_DATA_FROM_L31_ECO_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x4C052",
+    "EventName": "PM_DATA_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load"
+  },
+  {,
+    "EventCode": "0x20050",
+    "EventName": "PM_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x1F150",
+    "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC",
+    "BriefDescription": "cycles from L2 rc disp to l2 rc completion"
+  },
+  {,
+    "EventCode": "0x4505A",
+    "EventName": "PM_SP_FLOP_CMPL",
+    "BriefDescription": "SP instruction completed"
+  },
+  {,
+    "EventCode": "0x4000A",
+    "EventName": "PM_ISQ_36_44_ENTRIES",
+    "BriefDescription": "Cycles in which 36 or more Issue Queue entries are in use. This is a shared event, not per thread. There are 44 issue queue entries across 4 slices in the whole core"
+  },
+  {,
+    "EventCode": "0x2C12E",
+    "EventName": "PM_MRK_DATA_FROM_LL4_CYC",
+    "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load"
+  },
+  {,
+    "EventCode": "0x2C058",
+    "EventName": "PM_MEM_PREF",
+    "BriefDescription": "Memory prefetch for this thread. Includes L4"
+  },
+  {,
+    "EventCode": "0x40012",
+    "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+    "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch"
+  },
+  {,
+    "EventCode": "0x3003C",
+    "EventName": "PM_CMPLU_STALL_NESTED_TEND",
+    "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay"
+  },
+  {,
+    "EventCode": "0x3D05C",
+    "EventName": "PM_DISP_HELD_HB_FULL",
+    "BriefDescription": "Dispatch held due to History Buffer full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)"
+  },
+  {,
+    "EventCode": "0x30052",
+    "EventName": "PM_SYS_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x2E044",
+    "EventName": "PM_DPTEG_FROM_L31_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x34048",
+    "EventName": "PM_INST_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x45042",
+    "EventName": "PM_IPTEG_FROM_L3",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x15042",
+    "EventName": "PM_IPTEG_FROM_L2",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x1C05E",
+    "EventName": "PM_MEM_LOC_THRESH_LSU_MED",
+    "BriefDescription": "Local memory above threshold for data prefetch"
+  },
+  {,
+    "EventCode": "0x40134",
+    "EventName": "PM_MRK_INST_TIMEO",
+    "BriefDescription": "marked Instruction finish timeout (instruction lost)"
+  },
+  {,
+    "EventCode": "0x1002C",
+    "EventName": "PM_L1_DCACHE_RELOADED_ALL",
+    "BriefDescription": "L1 data cache reloaded for demand. If MMCR1[16] is 1, prefetches will be included as well"
+  },
+  {,
+    "EventCode": "0x30130",
+    "EventName": "PM_MRK_INST_FIN",
+    "BriefDescription": "marked instruction finished"
+  },
+  {,
+    "EventCode": "0x1F14A",
+    "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x3504E",
+    "EventName": "PM_DARQ0_4_6_ENTRIES",
+    "BriefDescription": "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x30064",
+    "EventName": "PM_DARQ_STORE_XMIT",
+    "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry. Includes rejects. Not qualified by thread, so it includes counts for the whole core"
+  },
+  {,
+    "EventCode": "0x45046",
+    "EventName": "PM_IPTEG_FROM_L21_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x2C016",
+    "EventName": "PM_CMPLU_STALL_PASTE",
+    "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2"
+  },
+  {,
+    "EventCode": "0x24156",
+    "EventName": "PM_MRK_STCX_FIN",
+    "BriefDescription": "Number of marked stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed"
+  },
+  {,
+    "EventCode": "0x15150",
+    "EventName": "PM_SYNC_MRK_PROBE_NOP",
+    "BriefDescription": "Marked probeNops which can cause synchronous interrupts"
+  },
+  {,
+    "EventCode": "0x301E4",
+    "EventName": "PM_MRK_BR_MPRED_CMPL",
+    "BriefDescription": "Marked Branch Mispredicted"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/memory.json b/pmu-events/arch/powerpc/power9/memory.json
new file mode 100644
index 0000000..2e2ebc7
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/memory.json
@@ -0,0 +1,127 @@
+[
+  {,
+    "EventCode": "0x3006E",
+    "EventName": "PM_NEST_REF_CLK",
+    "BriefDescription": "Multiply by 4 to obtain the number of PB cycles"
+  },
+  {,
+    "EventCode": "0x20010",
+    "EventName": "PM_PMC1_OVERFLOW",
+    "BriefDescription": "Overflow from counter 1"
+  },
+  {,
+    "EventCode": "0x2005A",
+    "EventName": "PM_DARQ1_7_9_ENTRIES",
+    "BriefDescription": "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x3C048",
+    "EventName": "PM_DATA_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x10008",
+    "EventName": "PM_RUN_SPURR",
+    "BriefDescription": "Run SPURR"
+  },
+  {,
+    "EventCode": "0x200F6",
+    "EventName": "PM_LSU_DERAT_MISS",
+    "BriefDescription": "DERAT Reloaded due to a DERAT miss"
+  },
+  {,
+    "EventCode": "0x4C048",
+    "EventName": "PM_DATA_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x1D15E",
+    "EventName": "PM_MRK_RUN_CYC",
+    "BriefDescription": "Run cycles in which a marked instruction is in the pipeline"
+  },
+  {,
+    "EventCode": "0x4003E",
+    "EventName": "PM_LD_CMPL",
+    "BriefDescription": "count of Loads completed"
+  },
+  {,
+    "EventCode": "0x4C042",
+    "EventName": "PM_DATA_FROM_L3",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load"
+  },
+  {,
+    "EventCode": "0x4D02C",
+    "EventName": "PM_PMC1_REWIND",
+    "BriefDescription": ""
+  },
+  {,
+    "EventCode": "0x15158",
+    "EventName": "PM_SYNC_MRK_L2HIT",
+    "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt"
+  },
+  {,
+    "EventCode": "0x3404A",
+    "EventName": "PM_INST_FROM_RMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x301E2",
+    "EventName": "PM_MRK_ST_CMPL",
+    "BriefDescription": "Marked store completed and sent to nest"
+  },
+  {,
+    "EventCode": "0x1C050",
+    "EventName": "PM_DATA_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load"
+  },
+  {,
+    "EventCode": "0x4C040",
+    "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load"
+  },
+  {,
+    "EventCode": "0x2E05C",
+    "EventName": "PM_LSU_REJECT_ERAT_MISS",
+    "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)"
+  },
+  {,
+    "EventCode": "0x1000A",
+    "EventName": "PM_PMC3_REWIND",
+    "BriefDescription": "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change."
+  },
+  {,
+    "EventCode": "0x3C058",
+    "EventName": "PM_LARX_FIN",
+    "BriefDescription": "Larx finished"
+  },
+  {,
+    "EventCode": "0x1C040",
+    "EventName": "PM_DATA_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load"
+  },
+  {,
+    "EventCode": "0x2C040",
+    "EventName": "PM_DATA_FROM_L2_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load"
+  },
+  {,
+    "EventCode": "0x2E05A",
+    "EventName": "PM_LRQ_REJECT",
+    "BriefDescription": "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects"
+  },
+  {,
+    "EventCode": "0x2C05C",
+    "EventName": "PM_INST_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)"
+  },
+  {,
+    "EventCode": "0x4D056",
+    "EventName": "PM_NON_FMA_FLOP_CMPL",
+    "BriefDescription": "Non FMA instruction completed"
+  },
+  {,
+    "EventCode": "0x3E050",
+    "EventName": "PM_DARQ1_4_6_ENTRIES",
+    "BriefDescription": "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/other.json b/pmu-events/arch/powerpc/power9/other.json
new file mode 100644
index 0000000..48cf4f9
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/other.json
@@ -0,0 +1,2337 @@
+[
+  {,
+    "EventCode": "0x3084",
+    "EventName": "PM_ISU1_ISS_HOLD_ALL",
+    "BriefDescription": "All ISU rejects"
+  },
+  {,
+    "EventCode": "0xF880",
+    "EventName": "PM_SNOOP_TLBIE",
+    "BriefDescription": "TLBIE snoop"
+  },
+  {,
+    "EventCode": "0x4088",
+    "EventName": "PM_IC_DEMAND_REQ",
+    "BriefDescription": "Demand Instruction fetch request"
+  },
+  {,
+    "EventCode": "0x20A4",
+    "EventName": "PM_TM_TRESUME",
+    "BriefDescription": "TM resume instruction completed"
+  },
+  {,
+    "EventCode": "0x40008",
+    "EventName": "PM_SRQ_EMPTY_CYC",
+    "BriefDescription": "Cycles in which the SRQ has at least one (out of four) empty slice"
+  },
+  {,
+    "EventCode": "0x20064",
+    "EventName": "PM_IERAT_RELOAD_4K",
+    "BriefDescription": "IERAT reloaded (after a miss) for 4K pages"
+  },
+  {,
+    "EventCode": "0x260B4",
+    "EventName": "PM_L3_P2_LCO_RTY",
+    "BriefDescription": "L3 initiated LCO received retry on port 2 (can try 4 times)"
+  },
+  {,
+    "EventCode": "0x20006",
+    "EventName": "PM_DISP_HELD_ISSQ_FULL",
+    "BriefDescription": "Dispatch held due to Issue q full. Includes issue queue and branch queue"
+  },
+  {,
+    "EventCode": "0x201E4",
+    "EventName": "PM_MRK_DATA_FROM_L3MISS",
+    "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load"
+  },
+  {,
+    "EventCode": "0x4E044",
+    "EventName": "PM_DPTEG_FROM_L31_ECO_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x40B8",
+    "EventName": "PM_BR_MPRED_TAKEN_CR",
+    "BriefDescription": "A Conditional Branch that resolved to taken was mispredicted as not taken (due to the BHT Direction Prediction)."
+  },
+  {,
+    "EventCode": "0xF8AC",
+    "EventName": "PM_DC_DEALLOC_NO_CONF",
+    "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)"
+  },
+  {,
+    "EventCode": "0xD090",
+    "EventName": "PM_LS0_DC_COLLISIONS",
+    "BriefDescription": "Read-write data cache collisions"
+  },
+  {,
+    "EventCode": "0x40BC",
+    "EventName": "PM_THRD_PRIO_0_1_CYC",
+    "BriefDescription": "Cycles thread running at priority level 0 or 1"
+  },
+  {,
+    "EventCode": "0x4C054",
+    "EventName": "PM_DERAT_MISS_16G_1G",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) or 1G (radix mode)"
+  },
+  {,
+    "EventCode": "0x2084",
+    "EventName": "PM_FLUSH_HB_RESTORE_CYC",
+    "BriefDescription": "Cycles in which no new instructions can be dispatched to the ICT after a flush.  History buffer recovery"
+  },
+  {,
+    "EventCode": "0x4F054",
+    "EventName": "PM_RADIX_PWC_MISS",
+    "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache."
+  },
+  {,
+    "EventCode": "0x26882",
+    "EventName": "PM_L2_DC_INV",
+    "BriefDescription": "D-cache invalidates sent over the reload bus to the core"
+  },
+  {,
+    "EventCode": "0x24048",
+    "EventName": "PM_INST_FROM_LMEM",
+    "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0xD8B4",
+    "EventName": "PM_LSU0_LRQ_S0_VALID_CYC",
+    "BriefDescription": "Slot 0 of LRQ valid"
+  },
+  {,
+    "EventCode": "0x2E052",
+    "EventName": "PM_TM_PASSED",
+    "BriefDescription": "Number of TM transactions that passed"
+  },
+  {,
+    "EventCode": "0xF088",
+    "EventName": "PM_LSU0_STORE_REJECT",
+    "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+  },
+  {,
+    "EventCode": "0x360B2",
+    "EventName": "PM_L3_GRP_GUESS_WRONG_LOW",
+    "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was LNS"
+  },
+  {,
+    "EventCode": "0x168A6",
+    "EventName": "PM_TM_CAM_OVERFLOW",
+    "BriefDescription": "L3 TM CAM is full when a L2 castout of TM_SC line occurs.  Line is pushed to memory"
+  },
+  {,
+    "EventCode": "0xE8B0",
+    "EventName": "PM_TEND_PEND_CYC",
+    "BriefDescription": "TEND latency per thread"
+  },
+  {,
+    "EventCode": "0x4884",
+    "EventName": "PM_IBUF_FULL_CYC",
+    "BriefDescription": "Cycles No room in ibuff"
+  },
+  {,
+    "EventCode": "0xD08C",
+    "EventName": "PM_LSU2_LDMX_FIN",
+    "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491):  The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region.  This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+  },
+  {,
+    "EventCode": "0x300F8",
+    "EventName": "PM_TB_BIT_TRANS",
+    "BriefDescription": "timebase event"
+  },
+  {,
+    "EventCode": "0x3C040",
+    "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load"
+  },
+  {,
+    "EventCode": "0xE0BC",
+    "EventName": "PM_LS0_PTE_TABLEWALK_CYC",
+    "BriefDescription": "Cycles when a tablewalk is pending on this thread on table 0"
+  },
+  {,
+    "EventCode": "0x3884",
+    "EventName": "PM_ISU3_ISS_HOLD_ALL",
+    "BriefDescription": "All ISU rejects"
+  },
+  {,
+    "EventCode": "0x468A0",
+    "EventName": "PM_L3_PF_OFF_CHIP_MEM",
+    "BriefDescription": "L3 PF from Off chip memory"
+  },
+  {,
+    "EventCode": "0x268AA",
+    "EventName": "PM_L3_P1_LCO_DATA",
+    "BriefDescription": "LCO sent with data port 1"
+  },
+  {,
+    "EventCode": "0xE894",
+    "EventName": "PM_LSU1_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1"
+  },
+  {,
+    "EventCode": "0x5888",
+    "EventName": "PM_IC_INVALIDATE",
+    "BriefDescription": "Ic line invalidated"
+  },
+  {,
+    "EventCode": "0x2890",
+    "EventName": "PM_DISP_CLB_HELD_TLBIE",
+    "BriefDescription": "Dispatch Hold: Due to TLBIE"
+  },
+  {,
+    "EventCode": "0x1001C",
+    "EventName": "PM_CMPLU_STALL_THRD",
+    "BriefDescription": "Completion Stalled because the thread was blocked"
+  },
+  {,
+    "EventCode": "0x368A6",
+    "EventName": "PM_SNP_TM_HIT_T",
+    "BriefDescription": "TM snoop that is a store hits line in L3 in T, Tn or Te state (shared modified)"
+  },
+  {,
+    "EventCode": "0x3001A",
+    "EventName": "PM_DATA_TABLEWALK_CYC",
+    "BriefDescription": "Data Tablewalk Cycles.  Could be 1 or 2 active tablewalks. Includes data prefetches."
+  },
+  {,
+    "EventCode": "0xD894",
+    "EventName": "PM_LS3_DC_COLLISIONS",
+    "BriefDescription": "Read-write data cache collisions"
+  },
+  {,
+    "EventCode": "0x35158",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0xF0B4",
+    "EventName": "PM_DC_PREF_CONS_ALLOC",
+    "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch. The sum of this pair subtracted from the total number of allocs will give the total allocs in normal phase"
+  },
+  {,
+    "EventCode": "0xF894",
+    "EventName": "PM_LSU3_L1_CAM_CANCEL",
+    "BriefDescription": "ls3 l1 tm cam cancel"
+  },
+  {,
+    "EventCode": "0x2888",
+    "EventName": "PM_FLUSH_DISP_TLBIE",
+    "BriefDescription": "Dispatch Flush: TLBIE"
+  },
+  {,
+    "EventCode": "0x4E11E",
+    "EventName": "PM_MRK_DATA_FROM_DMEM_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load"
+  },
+  {,
+    "EventCode": "0x14156",
+    "EventName": "PM_MRK_DATA_FROM_L2_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load"
+  },
+  {,
+    "EventCode": "0x468A6",
+    "EventName": "PM_RD_CLEARING_SC",
+    "BriefDescription": "Core TM load hits line in L3 in TM_SC state and causes it to be invalidated"
+  },
+  {,
+    "EventCode": "0xD0B0",
+    "EventName": "PM_HWSYNC",
+    "BriefDescription": ""
+  },
+  {,
+    "EventCode": "0x168B0",
+    "EventName": "PM_L3_P1_NODE_PUMP",
+    "BriefDescription": "L3 PF sent with nodal scope port 1, counts even retried requests"
+  },
+  {,
+    "EventCode": "0xD0BC",
+    "EventName": "PM_LSU0_1_LRQF_FULL_CYC",
+    "BriefDescription": "Counts the number of cycles the LRQF is full.  LRQF is the queue that holds loads between finish and completion.  If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ"
+  },
+  {,
+    "EventCode": "0x2D148",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x468AE",
+    "EventName": "PM_L3_P3_CO_RTY",
+    "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
+  },
+  {,
+    "EventCode": "0x460A8",
+    "EventName": "PM_SN_HIT",
+    "BriefDescription": "Any port snooper hit L3.  Up to 4 can happen in a cycle but we only count 1"
+  },
+  {,
+    "EventCode": "0x360AA",
+    "EventName": "PM_L3_P0_CO_MEM",
+    "BriefDescription": "L3 CO to memory port 0 with or without data"
+  },
+  {,
+    "EventCode": "0xF0A4",
+    "EventName": "PM_DC_PREF_HW_ALLOC",
+    "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism"
+  },
+  {,
+    "EventCode": "0xF0BC",
+    "EventName": "PM_LS2_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0xD0AC",
+    "EventName": "PM_SRQ_SYNC_CYC",
+    "BriefDescription": "A sync is in the S2Q (edge detect to count)"
+  },
+  {,
+    "EventCode": "0x401E6",
+    "EventName": "PM_MRK_INST_FROM_L3MISS",
+    "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet"
+  },
+  {,
+    "EventCode": "0x58A8",
+    "EventName": "PM_DECODE_HOLD_ICT_FULL",
+    "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use.  This means the ICT is full for this thread"
+  },
+  {,
+    "EventCode": "0x26082",
+    "EventName": "PM_L2_IC_INV",
+    "BriefDescription": "I-cache Invalidates sent over the realod bus to the core"
+  },
+  {,
+    "EventCode": "0xC8AC",
+    "EventName": "PM_LSU_FLUSH_RELAUNCH_MISS",
+    "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent"
+  },
+  {,
+    "EventCode": "0x260A4",
+    "EventName": "PM_L3_LD_HIT",
+    "BriefDescription": "L3 Hits for demand LDs"
+  },
+  {,
+    "EventCode": "0xF0A0",
+    "EventName": "PM_DATA_STORE",
+    "BriefDescription": "All ops that drain from s2q to L2 containing data"
+  },
+  {,
+    "EventCode": "0x1D148",
+    "EventName": "PM_MRK_DATA_FROM_RMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load"
+  },
+  {,
+    "EventCode": "0x16088",
+    "EventName": "PM_L2_LOC_GUESS_CORRECT",
+    "BriefDescription": "L2 guess local (LNS) and guess was correct (ie data local)"
+  },
+  {,
+    "EventCode": "0x160A4",
+    "EventName": "PM_L3_HIT",
+    "BriefDescription": "L3 Hits (L2 miss hitting L3, including data/instrn/xlate)"
+  },
+  {,
+    "EventCode": "0xE09C",
+    "EventName": "PM_LSU0_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss"
+  },
+  {,
+    "EventCode": "0x168B4",
+    "EventName": "PM_L3_P1_LCO_RTY",
+    "BriefDescription": "L3 initiated LCO received retry on port 1 (can try 4 times)"
+  },
+  {,
+    "EventCode": "0x268AC",
+    "EventName": "PM_L3_RD_USAGE",
+    "BriefDescription": "Rotating sample of 16 RD actives"
+  },
+  {,
+    "EventCode": "0x1415C",
+    "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load"
+  },
+  {,
+    "EventCode": "0xE880",
+    "EventName": "PM_L1_SW_PREF",
+    "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches"
+  },
+  {,
+    "EventCode": "0x288C",
+    "EventName": "PM_DISP_CLB_HELD_BAL",
+    "BriefDescription": "Dispatch/CLB Hold: Balance Flush"
+  },
+  {,
+    "EventCode": "0x101EA",
+    "EventName": "PM_MRK_L1_RELOAD_VALID",
+    "BriefDescription": "Marked demand reload"
+  },
+  {,
+    "EventCode": "0x1D156",
+    "EventName": "PM_MRK_LD_MISS_L1_CYC",
+    "BriefDescription": "Marked ld latency"
+  },
+  {,
+    "EventCode": "0x4C01A",
+    "EventName": "PM_CMPLU_STALL_DMISS_L3MISS",
+    "BriefDescription": "Completion stall due to cache miss resolving missed the L3"
+  },
+  {,
+    "EventCode": "0x2006C",
+    "EventName": "PM_RUN_CYC_SMT4_MODE",
+    "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode"
+  },
+  {,
+    "EventCode": "0x1D14E",
+    "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC",
+    "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load"
+  },
+  {,
+    "EventCode": "0xF888",
+    "EventName": "PM_LSU1_STORE_REJECT",
+    "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+  },
+  {,
+    "EventCode": "0xC098",
+    "EventName": "PM_LS2_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0x20058",
+    "EventName": "PM_DARQ1_10_12_ENTRIES",
+    "BriefDescription": "Cycles in which 10 or  more DARQ1 entries (out of 12) are in use"
+  },
+  {,
+    "EventCode": "0x360A6",
+    "EventName": "PM_SNP_TM_HIT_M",
+    "BriefDescription": "TM snoop that is a store hits line in L3 in M or Mu state (exclusive modified)"
+  },
+  {,
+    "EventCode": "0x5898",
+    "EventName": "PM_LINK_STACK_INVALID_PTR",
+    "BriefDescription": "It is most often caused by certain types of flush where the pointer is not available. Can result in the data in the link stack becoming unusable."
+  },
+  {,
+    "EventCode": "0x46088",
+    "EventName": "PM_L2_CHIP_PUMP",
+    "BriefDescription": "RC requests that were local (aka chip) pump attempts"
+  },
+  {,
+    "EventCode": "0x28A0",
+    "EventName": "PM_TM_TSUSPEND",
+    "BriefDescription": "TM suspend instruction completed"
+  },
+  {,
+    "EventCode": "0x20054",
+    "EventName": "PM_L1_PREF",
+    "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch"
+  },
+  {,
+    "EventCode": "0x2608E",
+    "EventName": "PM_TM_LD_CONF",
+    "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)"
+  },
+  {,
+    "EventCode": "0x1D144",
+    "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x400FA",
+    "EventName": "PM_RUN_INST_CMPL",
+    "BriefDescription": "Run_Instructions"
+  },
+  {,
+    "EventCode": "0x15154",
+    "EventName": "PM_SYNC_MRK_L3MISS",
+    "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt"
+  },
+  {,
+    "EventCode": "0xE0B4",
+    "EventName": "PM_LS0_TM_DISALLOW",
+    "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+  },
+  {,
+    "EventCode": "0x26884",
+    "EventName": "PM_DSIDE_MRU_TOUCH",
+    "BriefDescription": "D-side L2 MRU touch commands sent to the L2"
+  },
+  {,
+    "EventCode": "0x30134",
+    "EventName": "PM_MRK_ST_CMPL_INT",
+    "BriefDescription": "marked store finished with intervention"
+  },
+  {,
+    "EventCode": "0xC0B8",
+    "EventName": "PM_LSU_FLUSH_SAO",
+    "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush"
+  },
+  {,
+    "EventCode": "0x50A8",
+    "EventName": "PM_EAT_FORCE_MISPRED",
+    "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT.  The EAT forces a mispredict in this case since there is no predicated target to validate.  This is a rare case that may occur when the EAT is full and a branch is issued"
+  },
+  {,
+    "EventCode": "0xC094",
+    "EventName": "PM_LS0_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0xF8BC",
+    "EventName": "PM_LS3_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0x460AE",
+    "EventName": "PM_L3_P2_CO_RTY",
+    "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted"
+  },
+  {,
+    "EventCode": "0x58B0",
+    "EventName": "PM_BTAC_GOOD_RESULT",
+    "BriefDescription": "BTAC predicts a taken branch and the BHT agrees, and the target address is correct"
+  },
+  {,
+    "EventCode": "0x1C04C",
+    "EventName": "PM_DATA_FROM_LL4",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load"
+  },
+  {,
+    "EventCode": "0x3608E",
+    "EventName": "PM_TM_ST_CONF",
+    "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)"
+  },
+  {,
+    "EventCode": "0xF8A0",
+    "EventName": "PM_NON_DATA_STORE",
+    "BriefDescription": "All ops that drain from s2q to L2 and contain no data"
+  },
+  {,
+    "EventCode": "0x3F146",
+    "EventName": "PM_MRK_DPTEG_FROM_L21_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x40A0",
+    "EventName": "PM_BR_UNCOND",
+    "BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve."
+  },
+  {,
+    "EventCode": "0xF8A8",
+    "EventName": "PM_DC_PREF_FUZZY_CONF",
+    "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)"
+  },
+  {,
+    "EventCode": "0xF8A4",
+    "EventName": "PM_DC_PREF_SW_ALLOC",
+    "BriefDescription": "Prefetch stream allocated by software prefetching"
+  },
+  {,
+    "EventCode": "0xE0A0",
+    "EventName": "PM_LSU2_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss"
+  },
+  {,
+    "EventCode": "0xC880",
+    "EventName": "PM_LS1_LD_VECTOR_FIN",
+    "BriefDescription": "LS1 finished load vector op"
+  },
+  {,
+    "EventCode": "0x2894",
+    "EventName": "PM_TM_OUTER_TEND",
+    "BriefDescription": "Completion time outer tend"
+  },
+  {,
+    "EventCode": "0xF098",
+    "EventName": "PM_XLATE_HPT_MODE",
+    "BriefDescription": "LSU reports every cycle the thread is in HPT translation mode (as opposed to radix mode)"
+  },
+  {,
+    "EventCode": "0x2C04E",
+    "EventName": "PM_LD_MISS_L1_FIN",
+    "BriefDescription": "Number of load instructions that finished with an L1 miss. Note that even if a load spans multiple slices this event will increment only once per load op."
+  },
+  {,
+    "EventCode": "0x30162",
+    "EventName": "PM_MRK_LSU_DERAT_MISS",
+    "BriefDescription": "Marked derat reload (miss) for any page size"
+  },
+  {,
+    "EventCode": "0x160A0",
+    "EventName": "PM_L3_PF_MISS_L3",
+    "BriefDescription": "L3 PF missed in L3"
+  },
+  {,
+    "EventCode": "0x1C04A",
+    "EventName": "PM_DATA_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x268B0",
+    "EventName": "PM_L3_P1_GRP_PUMP",
+    "BriefDescription": "L3 PF sent with grp scope port 1, counts even retried requests"
+  },
+  {,
+    "EventCode": "0x30016",
+    "EventName": "PM_CMPLU_STALL_SRQ_FULL",
+    "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full"
+  },
+  {,
+    "EventCode": "0x40B4",
+    "EventName": "PM_BR_PRED_TA",
+    "BriefDescription": "Conditional Branch Completed that had its target address predicted. Only XL-form branches set this event.  This equal the sum of CCACHE, LSTACK, and PCACHE"
+  },
+  {,
+    "EventCode": "0x40AC",
+    "EventName": "PM_BR_MPRED_CCACHE",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction"
+  },
+  {,
+    "EventCode": "0x3688A",
+    "EventName": "PM_L2_RTY_LD",
+    "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
+  },
+  {,
+    "EventCode": "0xE08C",
+    "EventName": "PM_LSU0_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit.  There is no secondary ERAT"
+  },
+  {,
+    "EventCode": "0xE088",
+    "EventName": "PM_LS2_ERAT_MISS_PREF",
+    "BriefDescription": "LS0 Erat miss due to prefetch"
+  },
+  {,
+    "EventCode": "0xF0A8",
+    "EventName": "PM_DC_PREF_CONF",
+    "BriefDescription": "A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Includes forwards and backwards streams"
+  },
+  {,
+    "EventCode": "0x16888",
+    "EventName": "PM_L2_LOC_GUESS_WRONG",
+    "BriefDescription": "L2 guess local (LNS) and guess was not correct (ie data not on chip)"
+  },
+  {,
+    "EventCode": "0xC888",
+    "EventName": "PM_LSU_DTLB_MISS_64K",
+    "BriefDescription": "Data TLB Miss page size 64K"
+  },
+  {,
+    "EventCode": "0xE0A4",
+    "EventName": "PM_TMA_REQ_L2",
+    "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding"
+  },
+  {,
+    "EventCode": "0xC088",
+    "EventName": "PM_LSU_DTLB_MISS_4K",
+    "BriefDescription": "Data TLB Miss page size 4K"
+  },
+  {,
+    "EventCode": "0x3C042",
+    "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load"
+  },
+  {,
+    "EventCode": "0x168AA",
+    "EventName": "PM_L3_P1_LCO_NO_DATA",
+    "BriefDescription": "Dataless L3 LCO sent port 1"
+  },
+  {,
+    "EventCode": "0x3D140",
+    "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0xC89C",
+    "EventName": "PM_LS1_LAUNCH_HELD_PREF",
+    "BriefDescription": "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle"
+  },
+  {,
+    "EventCode": "0x4894",
+    "EventName": "PM_IC_RELOAD_PRIVATE",
+    "BriefDescription": "Reloading line was brought in private for a specific thread.  Most lines are brought in shared for all eight threads.  If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was invalidat"
+  },
+  {,
+    "EventCode": "0x1688E",
+    "EventName": "PM_TM_LD_CAUSED_FAIL",
+    "BriefDescription": "Non-TM Load caused any thread to fail"
+  },
+  {,
+    "EventCode": "0x26084",
+    "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER",
+    "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflicts with an L2 machines (e.g. Read-Claim/Snoop machine not available)"
+  },
+  {,
+    "EventCode": "0x101E4",
+    "EventName": "PM_MRK_L1_ICACHE_MISS",
+    "BriefDescription": "sampled Instruction suffered an icache Miss"
+  },
+  {,
+    "EventCode": "0x20A0",
+    "EventName": "PM_TM_NESTED_TBEGIN",
+    "BriefDescription": "Completion Tm nested tbegin"
+  },
+  {,
+    "EventCode": "0x368AA",
+    "EventName": "PM_L3_P1_CO_MEM",
+    "BriefDescription": "L3 CO to memory port 1 with or without data"
+  },
+  {,
+    "EventCode": "0xC8A4",
+    "EventName": "PM_LSU3_FALSE_LHS",
+    "BriefDescription": "False LHS match detected"
+  },
+  {,
+    "EventCode": "0xF0B0",
+    "EventName": "PM_L3_LD_PREF",
+    "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest"
+  },
+  {,
+    "EventCode": "0x4D012",
+    "EventName": "PM_PMC3_SAVED",
+    "BriefDescription": "PMC3 Rewind Value saved"
+  },
+  {,
+    "EventCode": "0xE888",
+    "EventName": "PM_LS3_ERAT_MISS_PREF",
+    "BriefDescription": "LS1 Erat miss due to prefetch"
+  },
+  {,
+    "EventCode": "0x368B4",
+    "EventName": "PM_L3_RD0_BUSY",
+    "BriefDescription": "Lifetime, sample of RD machine 0 valid"
+  },
+  {,
+    "EventCode": "0x46080",
+    "EventName": "PM_L2_DISP_ALL_L2MISS",
+    "BriefDescription": "All successful D-side-Ld/St or I-side-instruction-fetch dispatches for this thread that were an L2 miss"
+  },
+  {,
+    "EventCode": "0xF8B8",
+    "EventName": "PM_LS1_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0x408C",
+    "EventName": "PM_L1_DEMAND_WRITE",
+    "BriefDescription": "Instruction Demand sectors written into IL1"
+  },
+  {,
+    "EventCode": "0x368A8",
+    "EventName": "PM_SN_INVL",
+    "BriefDescription": "Any port snooper detects a store to a line in the Sx state and invalidates the line.  Up to 4 can happen in a cycle but we only count 1"
+  },
+  {,
+    "EventCode": "0x160B2",
+    "EventName": "PM_L3_LOC_GUESS_CORRECT",
+    "BriefDescription": "Prefetch scope predictor selected LNS and was correct"
+  },
+  {,
+    "EventCode": "0x48B4",
+    "EventName": "PM_DECODE_FUSION_CONST_GEN",
+    "BriefDescription": "32-bit constant generation"
+  },
+  {,
+    "EventCode": "0x4D146",
+    "EventName": "PM_MRK_DATA_FROM_L21_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0xE080",
+    "EventName": "PM_S2Q_FULL",
+    "BriefDescription": "Cycles during which the S2Q is full"
+  },
+  {,
+    "EventCode": "0x268B4",
+    "EventName": "PM_L3_P3_LCO_RTY",
+    "BriefDescription": "L3 initiated LCO received retry on port 3 (can try 4 times)"
+  },
+  {,
+    "EventCode": "0xD8B8",
+    "EventName": "PM_LSU0_LMQ_S0_VALID",
+    "BriefDescription": "Slot 0 of LMQ valid"
+  },
+  {,
+    "EventCode": "0x2098",
+    "EventName": "PM_TM_NESTED_TEND",
+    "BriefDescription": "Completion time nested tend"
+  },
+  {,
+    "EventCode": "0x368A0",
+    "EventName": "PM_L3_PF_OFF_CHIP_CACHE",
+    "BriefDescription": "L3 PF from Off chip cache"
+  },
+  {,
+    "EventCode": "0x20056",
+    "EventName": "PM_TAKEN_BR_MPRED_CMPL",
+    "BriefDescription": "Total number of taken branches that were incorrectly predicted as not-taken. This event counts branches completed and does not include speculative instructions"
+  },
+  {,
+    "EventCode": "0x4688A",
+    "EventName": "PM_L2_SYS_PUMP",
+    "BriefDescription": "RC requests that were system pump attempts"
+  },
+  {,
+    "EventCode": "0xE090",
+    "EventName": "PM_LSU2_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit.  There is no secondary ERAT"
+  },
+  {,
+    "EventCode": "0x4001C",
+    "EventName": "PM_INST_IMC_MATCH_CMPL",
+    "BriefDescription": "IMC Match Count"
+  },
+  {,
+    "EventCode": "0x40A8",
+    "EventName": "PM_BR_PRED_LSTACK",
+    "BriefDescription": "Conditional Branch Completed  that used the Link Stack for Target Prediction"
+  },
+  {,
+    "EventCode": "0x268A2",
+    "EventName": "PM_L3_CI_MISS",
+    "BriefDescription": "L3 castins miss (total count)"
+  },
+  {,
+    "EventCode": "0x289C",
+    "EventName": "PM_TM_NON_FAV_TBEGIN",
+    "BriefDescription": "Dispatch time non favored tbegin"
+  },
+  {,
+    "EventCode": "0xF08C",
+    "EventName": "PM_LSU2_STORE_REJECT",
+    "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+  },
+  {,
+    "EventCode": "0x360A0",
+    "EventName": "PM_L3_PF_ON_CHIP_CACHE",
+    "BriefDescription": "L3 PF from On chip cache"
+  },
+  {,
+    "EventCode": "0x35152",
+    "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC",
+    "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load"
+  },
+  {,
+    "EventCode": "0x160AC",
+    "EventName": "PM_L3_SN_USAGE",
+    "BriefDescription": "Rotating sample of 16 snoop valids"
+  },
+  {,
+    "EventCode": "0x1608C",
+    "EventName": "PM_RC0_BUSY",
+    "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
+  },
+  {,
+    "EventCode": "0x36082",
+    "EventName": "PM_L2_LD_DISP",
+    "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread"
+  },
+  {,
+    "EventCode": "0xF8B0",
+    "EventName": "PM_L3_SW_PREF",
+    "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest"
+  },
+  {,
+    "EventCode": "0xF884",
+    "EventName": "PM_TABLEWALK_CYC_PREF",
+    "BriefDescription": "tablewalk qualified for pte  prefetches"
+  },
+  {,
+    "EventCode": "0x4D144",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x16884",
+    "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR",
+    "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflicts with an L2 machines already working on this line (e.g. ld-hit-stq or Read-claim/Castout/Snoop machines)"
+  },
+  {,
+    "EventCode": "0x460A0",
+    "EventName": "PM_L3_PF_ON_CHIP_MEM",
+    "BriefDescription": "L3 PF from On chip memory"
+  },
+  {,
+    "EventCode": "0xF084",
+    "EventName": "PM_PTE_PREFETCH",
+    "BriefDescription": "PTE prefetches"
+  },
+  {,
+    "EventCode": "0x2D026",
+    "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L2",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache"
+  },
+  {,
+    "EventCode": "0x48B0",
+    "EventName": "PM_BR_MPRED_PCACHE",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to pattern cache prediction"
+  },
+  {,
+    "EventCode": "0x2C126",
+    "EventName": "PM_MRK_DATA_FROM_L2",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load"
+  },
+  {,
+    "EventCode": "0xE0AC",
+    "EventName": "PM_TM_FAIL_TLBIE",
+    "BriefDescription": "Transaction failed because there was a TLBIE hit in the bloom filter"
+  },
+  {,
+    "EventCode": "0x260AA",
+    "EventName": "PM_L3_P0_LCO_DATA",
+    "BriefDescription": "LCO sent with data port 0"
+  },
+  {,
+    "EventCode": "0x4888",
+    "EventName": "PM_IC_PREF_REQ",
+    "BriefDescription": "Instruction prefetch requests"
+  },
+  {,
+    "EventCode": "0xC898",
+    "EventName": "PM_LS3_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0x488C",
+    "EventName": "PM_IC_PREF_WRITE",
+    "BriefDescription": "Instruction prefetch written into IL1"
+  },
+  {,
+    "EventCode": "0xF89C",
+    "EventName": "PM_XLATE_MISS",
+    "BriefDescription": "The LSU requested a line from L2 for translation.  It may be satisfied from any source beyond L2.  Includes speculative instructions. Includes instruction, prefetch and demand"
+  },
+  {,
+    "EventCode": "0x14158",
+    "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x35156",
+    "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0xC88C",
+    "EventName": "PM_LSU_DTLB_MISS_16G_1G",
+    "BriefDescription": "Data TLB Miss page size 16G (HPT) or 1G (Radix)"
+  },
+  {,
+    "EventCode": "0x268A6",
+    "EventName": "PM_TM_RST_SC",
+    "BriefDescription": "TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated"
+  },
+  {,
+    "EventCode": "0x468A4",
+    "EventName": "PM_L3_TRANS_PF",
+    "BriefDescription": "L3 Transient prefetch received from L2"
+  },
+  {,
+    "EventCode": "0x4094",
+    "EventName": "PM_IC_PREF_CANCEL_L2",
+    "BriefDescription": "L2 Squashed a demand or prefetch request"
+  },
+  {,
+    "EventCode": "0x48AC",
+    "EventName": "PM_BR_MPRED_LSTACK",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction"
+  },
+  {,
+    "EventCode": "0xE88C",
+    "EventName": "PM_LSU1_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit.  There is no secondary ERAT"
+  },
+  {,
+    "EventCode": "0xC0B4",
+    "EventName": "PM_LSU_FLUSH_WRK_ARND",
+    "BriefDescription": "LSU workaround flush.  These flushes are setup with programmable scan only latches to perform various actions when the flush macro receives a trigger from the dbg macros. These actions include things like flushing the next op encountered for a particular thread or flushing the next op that is NTC op that is encountered on a particular slice. The kind of flush that the workaround is setup to perform is highly variable."
+  },
+  {,
+    "EventCode": "0x34054",
+    "EventName": "PM_PARTIAL_ST_FIN",
+    "BriefDescription": "Any store finished by an LSU slice"
+  },
+  {,
+    "EventCode": "0x5880",
+    "EventName": "PM_THRD_PRIO_6_7_CYC",
+    "BriefDescription": "Cycles thread running at priority level 6 or 7"
+  },
+  {,
+    "EventCode": "0x4898",
+    "EventName": "PM_IC_DEMAND_L2_BR_REDIRECT",
+    "BriefDescription": "L2 I cache demand request due to branch Mispredict ( 15 cycle path)"
+  },
+  {,
+    "EventCode": "0x4880",
+    "EventName": "PM_BANK_CONFLICT",
+    "BriefDescription": "Read blocked due to interleave conflict.  The ifar logic will detect an interleave conflict and kill the data that was read that cycle."
+  },
+  {,
+    "EventCode": "0x360B0",
+    "EventName": "PM_L3_P0_SYS_PUMP",
+    "BriefDescription": "L3 PF sent with sys scope port 0, counts even retried requests"
+  },
+  {,
+    "EventCode": "0x3006A",
+    "EventName": "PM_IERAT_RELOAD_64K",
+    "BriefDescription": "IERAT Reloaded (Miss) for a 64k page"
+  },
+  {,
+    "EventCode": "0xD8BC",
+    "EventName": "PM_LSU2_3_LRQF_FULL_CYC",
+    "BriefDescription": "Counts the number of cycles the LRQF is full.  LRQF is the queue that holds loads between finish and completion.  If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ"
+  },
+  {,
+    "EventCode": "0x46086",
+    "EventName": "PM_L2_SN_M_RD_DONE",
+    "BriefDescription": "Snoop dispatched for a read and was M (true M)"
+  },
+  {,
+    "EventCode": "0x40154",
+    "EventName": "PM_MRK_FAB_RSP_BKILL",
+    "BriefDescription": "Marked store had to do a bkill"
+  },
+  {,
+    "EventCode": "0xF094",
+    "EventName": "PM_LSU2_L1_CAM_CANCEL",
+    "BriefDescription": "ls2 l1 tm cam cancel"
+  },
+  {,
+    "EventCode": "0x2D014",
+    "EventName": "PM_CMPLU_STALL_LRQ_FULL",
+    "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full"
+  },
+  {,
+    "EventCode": "0x3E05E",
+    "EventName": "PM_L3_CO_MEPF",
+    "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory).  The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
+  },
+  {,
+    "EventCode": "0x460A2",
+    "EventName": "PM_L3_LAT_CI_HIT",
+    "BriefDescription": "L3 Lateral Castins Hit"
+  },
+  {,
+    "EventCode": "0x3D14E",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x3D15E",
+    "EventName": "PM_MULT_MRK",
+    "BriefDescription": "mult marked instr"
+  },
+  {,
+    "EventCode": "0x4084",
+    "EventName": "PM_EAT_FULL_CYC",
+    "BriefDescription": "Cycles No room in EAT"
+  },
+  {,
+    "EventCode": "0x5098",
+    "EventName": "PM_LINK_STACK_WRONG_ADD_PRED",
+    "BriefDescription": "Link stack predicts wrong address, because of link stack design limitation or software violating the coding conventions"
+  },
+  {,
+    "EventCode": "0x2C050",
+    "EventName": "PM_DATA_GRP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load"
+  },
+  {,
+    "EventCode": "0xC0A4",
+    "EventName": "PM_LSU2_FALSE_LHS",
+    "BriefDescription": "False LHS match detected"
+  },
+  {,
+    "EventCode": "0x58A0",
+    "EventName": "PM_LINK_STACK_CORRECT",
+    "BriefDescription": "Link stack predicts right address"
+  },
+  {,
+    "EventCode": "0x36886",
+    "EventName": "PM_L2_SN_SX_I_DONE",
+    "BriefDescription": "Snoop dispatched and went from Sx to Ix"
+  },
+  {,
+    "EventCode": "0x4E04A",
+    "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2C12C",
+    "EventName": "PM_MRK_DATA_FROM_DL4_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load"
+  },
+  {,
+    "EventCode": "0x4080",
+    "EventName": "PM_INST_FROM_L1",
+    "BriefDescription": "Instruction fetches from L1.  L1 instruction hit"
+  },
+  {,
+    "EventCode": "0xE898",
+    "EventName": "PM_LSU3_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1"
+  },
+  {,
+    "EventCode": "0x260A0",
+    "EventName": "PM_L3_CO_MEM",
+    "BriefDescription": "L3 CO to memory OR of port 0 and 1 (lossy = may undercount if two cresp come in the same cyc)"
+  },
+  {,
+    "EventCode": "0x16082",
+    "EventName": "PM_L2_CASTOUT_MOD",
+    "BriefDescription": "L2 Castouts - Modified (M,Mu,Me)"
+  },
+  {,
+    "EventCode": "0xC09C",
+    "EventName": "PM_LS0_LAUNCH_HELD_PREF",
+    "BriefDescription": "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle"
+  },
+  {,
+    "EventCode": "0xC8B8",
+    "EventName": "PM_LSU_FLUSH_LARX_STCX",
+    "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread.  A stcx is flushed because an older stcx is in the LMQ.  The flush happens when the older larx/stcx relaunches"
+  },
+  {,
+    "EventCode": "0x260A6",
+    "EventName": "PM_NON_TM_RST_SC",
+    "BriefDescription": "Non-TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated"
+  },
+  {,
+    "EventCode": "0x3608A",
+    "EventName": "PM_L2_RTY_ST",
+    "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
+  },
+  {,
+    "EventCode": "0x24040",
+    "EventName": "PM_INST_FROM_L2_MEPF",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x209C",
+    "EventName": "PM_TM_FAV_TBEGIN",
+    "BriefDescription": "Dispatch time Favored tbegin"
+  },
+  {,
+    "EventCode": "0x2D01E",
+    "EventName": "PM_ICT_NOSLOT_DISP_HELD_ISSQ",
+    "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full"
+  },
+  {,
+    "EventCode": "0x50A4",
+    "EventName": "PM_FLUSH_MPRED",
+    "BriefDescription": "Branch mispredict flushes.  Includes target and address misprecition"
+  },
+  {,
+    "EventCode": "0x1504C",
+    "EventName": "PM_IPTEG_FROM_LL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x268A4",
+    "EventName": "PM_L3_LD_MISS",
+    "BriefDescription": "L3 Misses for demand LDs"
+  },
+  {,
+    "EventCode": "0x26088",
+    "EventName": "PM_L2_GRP_GUESS_CORRECT",
+    "BriefDescription": "L2 guess grp (GS or NNS) and guess was correct (data intra-group AND ^on-chip)"
+  },
+  {,
+    "EventCode": "0xD088",
+    "EventName": "PM_LSU0_LDMX_FIN",
+    "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491):  The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region.  This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+  },
+  {,
+    "EventCode": "0xE8B4",
+    "EventName": "PM_LS1_TM_DISALLOW",
+    "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+  },
+  {,
+    "EventCode": "0x1688C",
+    "EventName": "PM_RC_USAGE",
+    "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
+  },
+  {,
+    "EventCode": "0x3F054",
+    "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache. This is the deepest level of PWC possible for a translation. The source could be local/remote/distant memory or another core's cache"
+  },
+  {,
+    "EventCode": "0x2608A",
+    "EventName": "PM_ISIDE_DISP_FAIL_ADDR",
+    "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflict with an L2 machine already working on this line (e.g. ld-hit-stq or RC/CO/SN machines)"
+  },
+  {,
+    "EventCode": "0x50B4",
+    "EventName": "PM_TAGE_CORRECT_TAKEN_CMPL",
+    "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct.  Counted at completion for taken branches only"
+  },
+  {,
+    "EventCode": "0x2090",
+    "EventName": "PM_DISP_CLB_HELD_SB",
+    "BriefDescription": "Dispatch/CLB Hold: Scoreboard"
+  },
+  {,
+    "EventCode": "0xE0B0",
+    "EventName": "PM_TM_FAIL_NON_TX_CONFLICT",
+    "BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR"
+  },
+  {,
+    "EventCode": "0x201E0",
+    "EventName": "PM_MRK_DATA_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load"
+  },
+  {,
+    "EventCode": "0x368A2",
+    "EventName": "PM_L3_L2_CO_MISS",
+    "BriefDescription": "L2 CO miss"
+  },
+  {,
+    "EventCode": "0x3608C",
+    "EventName": "PM_CO0_BUSY",
+    "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
+  },
+  {,
+    "EventCode": "0x2C122",
+    "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x35154",
+    "EventName": "PM_MRK_DATA_FROM_L3_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load"
+  },
+  {,
+    "EventCode": "0x1D140",
+    "EventName": "PM_MRK_DATA_FROM_L31_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x4404A",
+    "EventName": "PM_INST_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x28AC",
+    "EventName": "PM_TM_FAIL_SELF",
+    "BriefDescription": "TM aborted because a self-induced conflict occurred in Suspended state, due to one of the following: a store to a storage location that was previously accessed transactionally; a dcbf, dcbi, or icbi specify- ing a block that was previously accessed transactionally; a dcbst specifying a block that was previously written transactionally; or a tlbie that specifies a translation that was pre- viously used transactionally"
+  },
+  {,
+    "EventCode": "0x45056",
+    "EventName": "PM_SCALAR_FLOP_CMPL",
+    "BriefDescription": "Scalar flop operation completed"
+  },
+  {,
+    "EventCode": "0x16092",
+    "EventName": "PM_L2_LD_MISS_128B",
+    "BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)"
+  },
+  {,
+    "EventCode": "0x2E014",
+    "EventName": "PM_STCX_FIN",
+    "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed"
+  },
+  {,
+    "EventCode": "0xD8AC",
+    "EventName": "PM_LWSYNC",
+    "BriefDescription": ""
+  },
+  {,
+    "EventCode": "0x2094",
+    "EventName": "PM_TM_OUTER_TBEGIN",
+    "BriefDescription": "Completion time outer tbegin"
+  },
+  {,
+    "EventCode": "0x160B4",
+    "EventName": "PM_L3_P0_LCO_RTY",
+    "BriefDescription": "L3 initiated LCO received retry on port 0 (can try 4 times)"
+  },
+  {,
+    "EventCode": "0x36892",
+    "EventName": "PM_DSIDE_OTHER_64B_L2MEMACC",
+    "BriefDescription": "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory that was for hpc_read64, (RC had to fetch other 64B of a line from MC) i.e., number of times RC had to go to memory to get 'missing' 64B"
+  },
+  {,
+    "EventCode": "0x20A8",
+    "EventName": "PM_TM_FAIL_FOOTPRINT_OVERFLOW",
+    "BriefDescription": "TM aborted because the tracking limit for transactional storage accesses was exceeded.. Asynchronous"
+  },
+  {,
+    "EventCode": "0x30018",
+    "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL",
+    "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)"
+  },
+  {,
+    "EventCode": "0xC894",
+    "EventName": "PM_LS1_UNALIGNED_LD",
+    "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.  If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0x360A2",
+    "EventName": "PM_L3_L2_CO_HIT",
+    "BriefDescription": "L2 CO hits"
+  },
+  {,
+    "EventCode": "0x36092",
+    "EventName": "PM_DSIDE_L2MEMACC",
+    "BriefDescription": "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory (excluding hpcread64 accesses), i.e., total memory accesses by RCs"
+  },
+  {,
+    "EventCode": "0x10138",
+    "EventName": "PM_MRK_BR_2PATH",
+    "BriefDescription": "marked branches which are not strongly biased"
+  },
+  {,
+    "EventCode": "0x2884",
+    "EventName": "PM_ISYNC",
+    "BriefDescription": "Isync completion count per thread"
+  },
+  {,
+    "EventCode": "0x16882",
+    "EventName": "PM_L2_CASTOUT_SHR",
+    "BriefDescription": "L2 Castouts - Shared (Tx,Sx)"
+  },
+  {,
+    "EventCode": "0x26092",
+    "EventName": "PM_L2_LD_MISS_64B",
+    "BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B(i.e., M=1)"
+  },
+  {,
+    "EventCode": "0x26080",
+    "EventName": "PM_L2_LD_MISS",
+    "BriefDescription": "All successful D-Side Load dispatches that were an L2 miss for this thread"
+  },
+  {,
+    "EventCode": "0x3D14C",
+    "EventName": "PM_MRK_DATA_FROM_DMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load"
+  },
+  {,
+    "EventCode": "0x100FA",
+    "EventName": "PM_ANY_THRD_RUN_CYC",
+    "BriefDescription": "Cycles in which at least one thread has the run latch set"
+  },
+  {,
+    "EventCode": "0x2C12A",
+    "EventName": "PM_MRK_DATA_FROM_RMEM_CYC",
+    "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load"
+  },
+  {,
+    "EventCode": "0x25048",
+    "EventName": "PM_IPTEG_FROM_LMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
+  },
+  {,
+    "EventCode": "0xD8A8",
+    "EventName": "PM_ISLB_MISS",
+    "BriefDescription": "Instruction SLB Miss - Total of all segment sizes"
+  },
+  {,
+    "EventCode": "0x368AE",
+    "EventName": "PM_L3_P1_CO_RTY",
+    "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
+  },
+  {,
+    "EventCode": "0x260A2",
+    "EventName": "PM_L3_CI_HIT",
+    "BriefDescription": "L3 Castins Hit (total count)"
+  },
+  {,
+    "EventCode": "0x44054",
+    "EventName": "PM_VECTOR_LD_CMPL",
+    "BriefDescription": "Number of vector load instructions completed"
+  },
+  {,
+    "EventCode": "0x1E05C",
+    "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN",
+    "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT"
+  },
+  {,
+    "EventCode": "0xC084",
+    "EventName": "PM_LS2_LD_VECTOR_FIN",
+    "BriefDescription": "LS2 finished load vector op"
+  },
+  {,
+    "EventCode": "0x1608E",
+    "EventName": "PM_ST_CAUSED_FAIL",
+    "BriefDescription": "Non-TM Store caused any thread to fail"
+  },
+  {,
+    "EventCode": "0x3080",
+    "EventName": "PM_ISU0_ISS_HOLD_ALL",
+    "BriefDescription": "All ISU rejects"
+  },
+  {,
+    "EventCode": "0x1515A",
+    "EventName": "PM_SYNC_MRK_L2MISS",
+    "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt"
+  },
+  {,
+    "EventCode": "0x26892",
+    "EventName": "PM_L2_ST_MISS_64B",
+    "BriefDescription": "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B (i.e., M=1)"
+  },
+  {,
+    "EventCode": "0x2688C",
+    "EventName": "PM_CO_USAGE",
+    "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
+  },
+  {,
+    "EventCode": "0x48B8",
+    "EventName": "PM_BR_MPRED_TAKEN_TA",
+    "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack.  Only XL-form branches that resolved Taken set this event."
+  },
+  {,
+    "EventCode": "0x50B0",
+    "EventName": "PM_BTAC_BAD_RESULT",
+    "BriefDescription": "BTAC thinks branch will be taken but it is either predicted not-taken by the BHT, or the target address is wrong (less common).  In both cases, a redirect will happen"
+  },
+  {,
+    "EventCode": "0xD888",
+    "EventName": "PM_LSU1_LDMX_FIN",
+    "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491):  The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region.  This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+  },
+  {,
+    "EventCode": "0x58B4",
+    "EventName": "PM_TAGE_CORRECT",
+    "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct.   Includes taken and not taken and is counted at execution time"
+  },
+  {,
+    "EventCode": "0x3688C",
+    "EventName": "PM_SN_USAGE",
+    "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
+  },
+  {,
+    "EventCode": "0x36084",
+    "EventName": "PM_L2_RCST_DISP",
+    "BriefDescription": "All D-side store dispatch attempts for this thread"
+  },
+  {,
+    "EventCode": "0x46084",
+    "EventName": "PM_L2_RCST_DISP_FAIL_OTHER",
+    "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to reason other than address collision"
+  },
+  {,
+    "EventCode": "0xF0AC",
+    "EventName": "PM_DC_PREF_STRIDED_CONF",
+    "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software."
+  },
+  {,
+    "EventCode": "0x45054",
+    "EventName": "PM_FMA_CMPL",
+    "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. "
+  },
+  {,
+    "EventCode": "0x201E8",
+    "EventName": "PM_THRESH_EXC_512",
+    "BriefDescription": "Threshold counter exceeded a value of 512"
+  },
+  {,
+    "EventCode": "0x36080",
+    "EventName": "PM_L2_INST",
+    "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread"
+  },
+  {,
+    "EventCode": "0x3504C",
+    "EventName": "PM_IPTEG_FROM_DL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request"
+  },
+  {,
+    "EventCode": "0xD890",
+    "EventName": "PM_LS1_DC_COLLISIONS",
+    "BriefDescription": "Read-write data cache collisions"
+  },
+  {,
+    "EventCode": "0x1688A",
+    "EventName": "PM_ISIDE_DISP",
+    "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread"
+  },
+  {,
+    "EventCode": "0x468AA",
+    "EventName": "PM_L3_P1_CO_L31",
+    "BriefDescription": "L3 CO to L3.1 (LCO) port 1 with or without data"
+  },
+  {,
+    "EventCode": "0x28B0",
+    "EventName": "PM_DISP_HELD_TBEGIN",
+    "BriefDescription": "This outer tbegin transaction cannot be dispatched until the previous tend instruction completes"
+  },
+  {,
+    "EventCode": "0xE8A0",
+    "EventName": "PM_LSU3_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss"
+  },
+  {,
+    "EventCode": "0x2C05E",
+    "EventName": "PM_INST_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)"
+  },
+  {,
+    "EventCode": "0xC8BC",
+    "EventName": "PM_STCX_SUCCESS_CMPL",
+    "BriefDescription": "Number of stcx instructions that completed successfully"
+  },
+  {,
+    "EventCode": "0xE098",
+    "EventName": "PM_LSU2_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1"
+  },
+  {,
+    "EventCode": "0xE0B8",
+    "EventName": "PM_LS2_TM_DISALLOW",
+    "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+  },
+  {,
+    "EventCode": "0x44044",
+    "EventName": "PM_INST_FROM_L31_ECO_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x16886",
+    "EventName": "PM_CO_DISP_FAIL",
+    "BriefDescription": "CO dispatch failed due to all CO machines being busy"
+  },
+  {,
+    "EventCode": "0x3D146",
+    "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x16892",
+    "EventName": "PM_L2_ST_MISS_128B",
+    "BriefDescription": "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)"
+  },
+  {,
+    "EventCode": "0x26890",
+    "EventName": "PM_ISIDE_L2MEMACC",
+    "BriefDescription": "Valid when first beat of data comes in for an I-side fetch where data came from memory"
+  },
+  {,
+    "EventCode": "0xD094",
+    "EventName": "PM_LS2_DC_COLLISIONS",
+    "BriefDescription": "Read-write data cache collisions"
+  },
+  {,
+    "EventCode": "0x3C05E",
+    "EventName": "PM_MEM_RWITM",
+    "BriefDescription": "Memory Read With Intent to Modify for this thread"
+  },
+  {,
+    "EventCode": "0xC090",
+    "EventName": "PM_LSU_STCX",
+    "BriefDescription": "STCX sent to nest, i.e. total"
+  },
+  {,
+    "EventCode": "0x2C120",
+    "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x36086",
+    "EventName": "PM_L2_RC_ST_DONE",
+    "BriefDescription": "Read-claim machine did store to line that was in Tx or Sx (Tagged or Shared state)"
+  },
+  {,
+    "EventCode": "0xE8AC",
+    "EventName": "PM_TM_FAIL_TX_CONFLICT",
+    "BriefDescription": "Transactional conflict from LSU, gets reported to TEXASR"
+  },
+  {,
+    "EventCode": "0x48A8",
+    "EventName": "PM_DECODE_FUSION_LD_ST_DISP",
+    "BriefDescription": "32-bit displacement D-form and 16-bit displacement X-form"
+  },
+  {,
+    "EventCode": "0x3D144",
+    "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load"
+  },
+  {,
+    "EventCode": "0x44046",
+    "EventName": "PM_INST_FROM_L21_MOD",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x40B0",
+    "EventName": "PM_BR_PRED_TAKEN_CR",
+    "BriefDescription": "Conditional Branch that had its direction predicted. I-form branches do not set this event.  In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches"
+  },
+  {,
+    "EventCode": "0x15040",
+    "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x460A6",
+    "EventName": "PM_RD_FORMING_SC",
+    "BriefDescription": "Doesn't occur"
+  },
+  {,
+    "EventCode": "0x35042",
+    "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request"
+  },
+  {,
+    "EventCode": "0xF898",
+    "EventName": "PM_XLATE_RADIX_MODE",
+    "BriefDescription": "LSU reports every cycle the thread is in radix translation mode (as opposed to HPT mode)"
+  },
+  {,
+    "EventCode": "0x2D142",
+    "EventName": "PM_MRK_DATA_FROM_L3_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load"
+  },
+  {,
+    "EventCode": "0x160B0",
+    "EventName": "PM_L3_P0_NODE_PUMP",
+    "BriefDescription": "L3 PF sent with nodal scope port 0, counts even retried requests"
+  },
+  {,
+    "EventCode": "0xD88C",
+    "EventName": "PM_LSU3_LDMX_FIN",
+    "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491):  The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region.  This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+  },
+  {,
+    "EventCode": "0x36882",
+    "EventName": "PM_L2_LD_HIT",
+    "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread that were L2 hits"
+  },
+  {,
+    "EventCode": "0x168AC",
+    "EventName": "PM_L3_CI_USAGE",
+    "BriefDescription": "Rotating sample of 16 CI or CO actives"
+  },
+  {,
+    "EventCode": "0x20134",
+    "EventName": "PM_MRK_FXU_FIN",
+    "BriefDescription": "fxu marked instr finish"
+  },
+  {,
+    "EventCode": "0x4608E",
+    "EventName": "PM_TM_CAP_OVERFLOW",
+    "BriefDescription": "TM Footprint Capacity Overflow"
+  },
+  {,
+    "EventCode": "0x4F05C",
+    "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation. The source could be local/remote/distant memory or another core's cache"
+  },
+  {,
+    "EventCode": "0x40014",
+    "EventName": "PM_PROBE_NOP_DISP",
+    "BriefDescription": "ProbeNops dispatched"
+  },
+  {,
+    "EventCode": "0x10052",
+    "EventName": "PM_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x2505E",
+    "EventName": "PM_BACK_BR_CMPL",
+    "BriefDescription": "Branch instruction completed with a target address less than current instruction address"
+  },
+  {,
+    "EventCode": "0x2688A",
+    "EventName": "PM_ISIDE_DISP_FAIL_OTHER",
+    "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflict with an L2 machine (e.g. no available RC/CO machines)"
+  },
+  {,
+    "EventCode": "0x2001A",
+    "EventName": "PM_NTC_ALL_FIN",
+    "BriefDescription": "Cycles after instruction finished to instruction completed."
+  },
+  {,
+    "EventCode": "0x3005A",
+    "EventName": "PM_ISQ_0_8_ENTRIES",
+    "BriefDescription": "Cycles in which 8 or less Issue Queue entries are in use. This is a shared event, not per thread"
+  },
+  {,
+    "EventCode": "0x3515E",
+    "EventName": "PM_MRK_BACK_BR_CMPL",
+    "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address"
+  },
+  {,
+    "EventCode": "0xF890",
+    "EventName": "PM_LSU1_L1_CAM_CANCEL",
+    "BriefDescription": "ls1 l1 tm cam cancel"
+  },
+  {,
+    "EventCode": "0x268AE",
+    "EventName": "PM_L3_P3_PF_RTY",
+    "BriefDescription": "L3 PF received retry port 3, every retry counted"
+  },
+  {,
+    "EventCode": "0xE884",
+    "EventName": "PM_LS1_ERAT_MISS_PREF",
+    "BriefDescription": "LS1 Erat miss due to prefetch"
+  },
+  {,
+    "EventCode": "0xE89C",
+    "EventName": "PM_LSU1_TM_L1_MISS",
+    "BriefDescription": "Load tm L1 miss"
+  },
+  {,
+    "EventCode": "0x28A8",
+    "EventName": "PM_TM_FAIL_CONF_NON_TM",
+    "BriefDescription": "TM aborted because a conflict occurred with a non-transactional access by another processor"
+  },
+  {,
+    "EventCode": "0x16890",
+    "EventName": "PM_L1PF_L2MEMACC",
+    "BriefDescription": "Valid when first beat of data comes in for an L1PF where data came from memory"
+  },
+  {,
+    "EventCode": "0x4504C",
+    "EventName": "PM_IPTEG_FROM_DMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x1002E",
+    "EventName": "PM_LMQ_MERGE",
+    "BriefDescription": "A demand miss collides with a prefetch for the same line"
+  },
+  {,
+    "EventCode": "0x160B6",
+    "EventName": "PM_L3_WI0_BUSY",
+    "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
+  },
+  {,
+    "EventCode": "0x368AC",
+    "EventName": "PM_L3_CO0_BUSY",
+    "BriefDescription": "Lifetime, sample of CO machine 0 valid"
+  },
+  {,
+    "EventCode": "0x2E040",
+    "EventName": "PM_DPTEG_FROM_L2_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1D152",
+    "EventName": "PM_MRK_DATA_FROM_DL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load"
+  },
+  {,
+    "EventCode": "0x46880",
+    "EventName": "PM_ISIDE_MRU_TOUCH",
+    "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread I-side L2 MRU touch commands sent to the L2 for this thread"
+  },
+  {,
+    "EventCode": "0x508C",
+    "EventName": "PM_SHL_CREATED",
+    "BriefDescription": "Store-Hit-Load Table Entry Created"
+  },
+  {,
+    "EventCode": "0x50B8",
+    "EventName": "PM_TAGE_OVERRIDE_WRONG",
+    "BriefDescription": "The TAGE overrode BHT direction prediction but it was incorrect.  Counted at completion for taken branches only"
+  },
+  {,
+    "EventCode": "0x160AE",
+    "EventName": "PM_L3_P0_PF_RTY",
+    "BriefDescription": "L3 PF received retry port 0, every retry counted"
+  },
+  {,
+    "EventCode": "0x268B2",
+    "EventName": "PM_L3_LOC_GUESS_WRONG",
+    "BriefDescription": "Prefetch scope predictor selected LNS, but was wrong"
+  },
+  {,
+    "EventCode": "0x36088",
+    "EventName": "PM_L2_SYS_GUESS_CORRECT",
+    "BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)"
+  },
+  {,
+    "EventCode": "0x260AE",
+    "EventName": "PM_L3_P2_PF_RTY",
+    "BriefDescription": "L3 PF received retry port 2, every retry counted"
+  },
+  {,
+    "EventCode": "0xD8B0",
+    "EventName": "PM_PTESYNC",
+    "BriefDescription": ""
+  },
+  {,
+    "EventCode": "0x26086",
+    "EventName": "PM_CO_TM_SC_FOOTPRINT",
+    "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3) OR L2 TM_store hit dirty HPC line and L3 indicated SC line formed in L3 on RDR bus"
+  },
+  {,
+    "EventCode": "0x1E05A",
+    "EventName": "PM_CMPLU_STALL_ANY_SYNC",
+    "BriefDescription": "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete"
+  },
+  {,
+    "EventCode": "0xF090",
+    "EventName": "PM_LSU0_L1_CAM_CANCEL",
+    "BriefDescription": "ls0 l1 tm cam cancel"
+  },
+  {,
+    "EventCode": "0xC0A8",
+    "EventName": "PM_LSU_FLUSH_CI",
+    "BriefDescription": "Load was not issued to LSU as a cache inhibited (non-cacheable) load but it was later determined to be cache inhibited"
+  },
+  {,
+    "EventCode": "0x20AC",
+    "EventName": "PM_TM_FAIL_CONF_TM",
+    "BriefDescription": "TM aborted because a conflict occurred with another transaction."
+  },
+  {,
+    "EventCode": "0x588C",
+    "EventName": "PM_SHL_ST_DEP_CREATED",
+    "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled"
+  },
+  {,
+    "EventCode": "0x46882",
+    "EventName": "PM_L2_ST_HIT",
+    "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
+  },
+  {,
+    "EventCode": "0x360AC",
+    "EventName": "PM_L3_SN0_BUSY",
+    "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
+  },
+  {,
+    "EventCode": "0x3005C",
+    "EventName": "PM_BFU_BUSY",
+    "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity"
+  },
+  {,
+    "EventCode": "0x48A0",
+    "EventName": "PM_BR_PRED_PCACHE",
+    "BriefDescription": "Conditional branch completed that used pattern cache prediction"
+  },
+  {,
+    "EventCode": "0x26880",
+    "EventName": "PM_L2_ST_MISS",
+    "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread"
+  },
+  {,
+    "EventCode": "0xF8B4",
+    "EventName": "PM_DC_PREF_XCONS_ALLOC",
+    "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch"
+  },
+  {,
+    "EventCode": "0x35048",
+    "EventName": "PM_IPTEG_FROM_DL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x260A8",
+    "EventName": "PM_L3_PF_HIT_L3",
+    "BriefDescription": "L3 PF hit in L3 (abandoned)"
+  },
+  {,
+    "EventCode": "0x360B4",
+    "EventName": "PM_L3_PF0_BUSY",
+    "BriefDescription": "Lifetime, sample of PF machine 0 valid"
+  },
+  {,
+    "EventCode": "0xC0B0",
+    "EventName": "PM_LSU_FLUSH_UE",
+    "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time"
+  },
+  {,
+    "EventCode": "0x4013A",
+    "EventName": "PM_MRK_IC_MISS",
+    "BriefDescription": "Marked instruction experienced I cache miss"
+  },
+  {,
+    "EventCode": "0x2088",
+    "EventName": "PM_FLUSH_DISP_SB",
+    "BriefDescription": "Dispatch Flush: Scoreboard"
+  },
+  {,
+    "EventCode": "0x401E8",
+    "EventName": "PM_MRK_DATA_FROM_L2MISS",
+    "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load"
+  },
+  {,
+    "EventCode": "0x3688E",
+    "EventName": "PM_TM_ST_CAUSED_FAIL",
+    "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail"
+  },
+  {,
+    "EventCode": "0x460B2",
+    "EventName": "PM_L3_SYS_GUESS_WRONG",
+    "BriefDescription": "Prefetch scope predictor selected VGS or RNS, but was wrong"
+  },
+  {,
+    "EventCode": "0x58B8",
+    "EventName": "PM_TAGE_OVERRIDE_WRONG_SPEC",
+    "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct.   Includes taken and not taken and is counted at execution time"
+  },
+  {,
+    "EventCode": "0xE890",
+    "EventName": "PM_LSU3_ERAT_HIT",
+    "BriefDescription": "Primary ERAT hit.  There is no secondary ERAT"
+  },
+  {,
+    "EventCode": "0x2898",
+    "EventName": "PM_TM_TABORT_TRECLAIM",
+    "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim"
+  },
+  {,
+    "EventCode": "0x268A0",
+    "EventName": "PM_L3_CO_L31",
+    "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 (lossy = may undercount if two cresps come in the same cyc)"
+  },
+  {,
+    "EventCode": "0x5080",
+    "EventName": "PM_THRD_PRIO_4_5_CYC",
+    "BriefDescription": "Cycles thread running at priority level 4 or 5"
+  },
+  {,
+    "EventCode": "0x2505C",
+    "EventName": "PM_VSU_FIN",
+    "BriefDescription": "VSU instruction finished. Up to 4 per cycle"
+  },
+  {,
+    "EventCode": "0x40A4",
+    "EventName": "PM_BR_PRED_CCACHE",
+    "BriefDescription": "Conditional Branch Completed that used the Count Cache for Target Prediction"
+  },
+  {,
+    "EventCode": "0x2E04A",
+    "EventName": "PM_DPTEG_FROM_RL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D12E",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0xC8B4",
+    "EventName": "PM_LSU_FLUSH_LHL_SHL",
+    "BriefDescription": "The instruction was flushed because of a sequential load/store consistency.  If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)."
+  },
+  {,
+    "EventCode": "0x58A4",
+    "EventName": "PM_FLUSH_LSU",
+    "BriefDescription": "LSU flushes.  Includes all lsu flushes"
+  },
+  {,
+    "EventCode": "0x1D150",
+    "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0xC8A0",
+    "EventName": "PM_LSU1_FALSE_LHS",
+    "BriefDescription": "False LHS match detected"
+  },
+  {,
+    "EventCode": "0x48BC",
+    "EventName": "PM_THRD_PRIO_2_3_CYC",
+    "BriefDescription": "Cycles thread running at priority level 2 or 3"
+  },
+  {,
+    "EventCode": "0x368B2",
+    "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH",
+    "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was VGS or RNS"
+  },
+  {,
+    "EventCode": "0xE8BC",
+    "EventName": "PM_LS1_PTE_TABLEWALK_CYC",
+    "BriefDescription": "Cycles when a tablewalk is pending on this thread on table 1"
+  },
+  {,
+    "EventCode": "0x1F152",
+    "EventName": "PM_MRK_FAB_RSP_BKILL_CYC",
+    "BriefDescription": "cycles L2 RC took for a bkill"
+  },
+  {,
+    "EventCode": "0x4C124",
+    "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC",
+    "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load"
+  },
+  {,
+    "EventCode": "0x2F14A",
+    "EventName": "PM_MRK_DPTEG_FROM_RL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x26888",
+    "EventName": "PM_L2_GRP_GUESS_WRONG",
+    "BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)"
+  },
+  {,
+    "EventCode": "0xC0AC",
+    "EventName": "PM_LSU_FLUSH_EMSH",
+    "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
+  },
+  {,
+    "EventCode": "0x260B2",
+    "EventName": "PM_L3_SYS_GUESS_CORRECT",
+    "BriefDescription": "Prefetch scope predictor selected VGS or RNS and was correct"
+  },
+  {,
+    "EventCode": "0x1D146",
+    "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC",
+    "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load"
+  },
+  {,
+    "EventCode": "0xE094",
+    "EventName": "PM_LSU0_TM_L1_HIT",
+    "BriefDescription": "Load tm hit in L1"
+  },
+  {,
+    "EventCode": "0x46888",
+    "EventName": "PM_L2_GROUP_PUMP",
+    "BriefDescription": "RC requests that were on group (aka nodel) pump attempts"
+  },
+  {,
+    "EventCode": "0xC08C",
+    "EventName": "PM_LSU_DTLB_MISS_16M_2M",
+    "BriefDescription": "Data TLB Miss page size 16M (HPT) or 2M (Radix)"
+  },
+  {,
+    "EventCode": "0x16080",
+    "EventName": "PM_L2_LD",
+    "BriefDescription": "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)"
+  },
+  {,
+    "EventCode": "0x4505C",
+    "EventName": "PM_MATH_FLOP_CMPL",
+    "BriefDescription": "Math flop instruction completed"
+  },
+  {,
+    "EventCode": "0xC080",
+    "EventName": "PM_LS0_LD_VECTOR_FIN",
+    "BriefDescription": "LS0 finished load vector op"
+  },
+  {,
+    "EventCode": "0x368B0",
+    "EventName": "PM_L3_P1_SYS_PUMP",
+    "BriefDescription": "L3 PF sent with sys scope port 1, counts even retried requests"
+  },
+  {,
+    "EventCode": "0x1F146",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2000C",
+    "EventName": "PM_THRD_ALL_RUN_CYC",
+    "BriefDescription": "Cycles in which all the threads have the run latch set"
+  },
+  {,
+    "EventCode": "0xC0BC",
+    "EventName": "PM_LSU_FLUSH_OTHER",
+    "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the “bad dval” back and flush all younger ops)"
+  },
+  {,
+    "EventCode": "0x5094",
+    "EventName": "PM_IC_MISS_ICBI",
+    "BriefDescription": "threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out"
+  },
+  {,
+    "EventCode": "0xC8A8",
+    "EventName": "PM_LSU_FLUSH_ATOMIC",
+    "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices.  If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed"
+  },
+  {,
+    "EventCode": "0x1E04E",
+    "EventName": "PM_DPTEG_FROM_L2MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D05E",
+    "EventName": "PM_BR_CMPL",
+    "BriefDescription": "Any Branch instruction completed"
+  },
+  {,
+    "EventCode": "0x260B0",
+    "EventName": "PM_L3_P0_GRP_PUMP",
+    "BriefDescription": "L3 PF sent with grp scope port 0, counts even retried requests"
+  },
+  {,
+    "EventCode": "0x30132",
+    "EventName": "PM_MRK_VSU_FIN",
+    "BriefDescription": "VSU marked instr finish"
+  },
+  {,
+    "EventCode": "0x2D120",
+    "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load"
+  },
+  {,
+    "EventCode": "0x1E048",
+    "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x16086",
+    "EventName": "PM_L2_SN_M_WR_DONE",
+    "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
+  },
+  {,
+    "EventCode": "0x489C",
+    "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
+    "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken.  Counted at completion time"
+  },
+  {,
+    "EventCode": "0xF0B8",
+    "EventName": "PM_LS0_UNALIGNED_ST",
+    "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.  If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+  },
+  {,
+    "EventCode": "0x20132",
+    "EventName": "PM_MRK_DFU_FIN",
+    "BriefDescription": "Decimal Unit marked Instruction Finish"
+  },
+  {,
+    "EventCode": "0x160A6",
+    "EventName": "PM_TM_SC_CO",
+    "BriefDescription": "L3 castout of line that was StoreCopy (original value of speculatively written line) in a Transaction"
+  },
+  {,
+    "EventCode": "0xC8B0",
+    "EventName": "PM_LSU_FLUSH_LHS",
+    "BriefDescription": "Effective Address alias flush : no EA match but Real Address match.  If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed"
+  },
+  {,
+    "EventCode": "0x16084",
+    "EventName": "PM_L2_RCLD_DISP",
+    "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread"
+  },
+  {,
+    "EventCode": "0x3F150",
+    "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC",
+    "BriefDescription": "cycles to drain st from core to L2"
+  },
+  {,
+    "EventCode": "0x168A4",
+    "EventName": "PM_L3_MISS",
+    "BriefDescription": "L3 Misses (L2 miss also missing L3, including data/instrn/xlate)"
+  },
+  {,
+    "EventCode": "0xF080",
+    "EventName": "PM_LSU_STCX_FAIL",
+    "BriefDescription": ""
+  },
+  {,
+    "EventCode": "0x30038",
+    "EventName": "PM_CMPLU_STALL_DMISS_LMEM",
+    "BriefDescription": "Completion stall due to cache miss that resolves in local memory"
+  },
+  {,
+    "EventCode": "0x28A4",
+    "EventName": "PM_MRK_TEND_FAIL",
+    "BriefDescription": "Nested or not nested tend failed for a marked tend instruction"
+  },
+  {,
+    "EventCode": "0x100FC",
+    "EventName": "PM_LD_REF_L1",
+    "BriefDescription": "All L1 D cache load references counted at finish, gated by reject"
+  },
+  {,
+    "EventCode": "0xC0A0",
+    "EventName": "PM_LSU0_FALSE_LHS",
+    "BriefDescription": "False LHS match detected"
+  },
+  {,
+    "EventCode": "0x468A8",
+    "EventName": "PM_SN_MISS",
+    "BriefDescription": "Any port snooper L3 miss or collision.  Up to 4 can happen in a cycle but we only count 1"
+  },
+  {,
+    "EventCode": "0x36888",
+    "EventName": "PM_L2_SYS_GUESS_WRONG",
+    "BriefDescription": "L2 guess system (VGS or RNS) and guess was not correct (ie data ^beyond-group)"
+  },
+  {,
+    "EventCode": "0x2080",
+    "EventName": "PM_EE_OFF_EXT_INT",
+    "BriefDescription": "CyclesMSR[EE] is off and external interrupts are active"
+  },
+  {,
+    "EventCode": "0xE8B8",
+    "EventName": "PM_LS3_TM_DISALLOW",
+    "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+  },
+  {,
+    "EventCode": "0x2688E",
+    "EventName": "PM_TM_FAV_CAUSED_FAIL",
+    "BriefDescription": "TM Load (fav) caused another thread to fail"
+  },
+  {,
+    "EventCode": "0x16090",
+    "EventName": "PM_SN0_BUSY",
+    "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
+  },
+  {,
+    "EventCode": "0x360AE",
+    "EventName": "PM_L3_P0_CO_RTY",
+    "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
+  },
+  {,
+    "EventCode": "0x168A8",
+    "EventName": "PM_L3_WI_USAGE",
+    "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid"
+  },
+  {,
+    "EventCode": "0x468A2",
+    "EventName": "PM_L3_LAT_CI_MISS",
+    "BriefDescription": "L3 Lateral Castins Miss"
+  },
+  {,
+    "EventCode": "0x4090",
+    "EventName": "PM_IC_PREF_CANCEL_PAGE",
+    "BriefDescription": "Prefetch Canceled due to page boundary"
+  },
+  {,
+    "EventCode": "0x460AA",
+    "EventName": "PM_L3_P0_CO_L31",
+    "BriefDescription": "L3 CO to L3.1 (LCO) port 0 with or without data"
+  },
+  {,
+    "EventCode": "0x2880",
+    "EventName": "PM_FLUSH_DISP",
+    "BriefDescription": "Dispatch flush"
+  },
+  {,
+    "EventCode": "0x168AE",
+    "EventName": "PM_L3_P1_PF_RTY",
+    "BriefDescription": "L3 PF received retry port 1, every retry counted"
+  },
+  {,
+    "EventCode": "0x46082",
+    "EventName": "PM_L2_ST_DISP",
+    "BriefDescription": "All successful D-side store dispatches for this thread"
+  },
+  {,
+    "EventCode": "0x36880",
+    "EventName": "PM_L2_INST_MISS",
+    "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread that were an L2 miss"
+  },
+  {,
+    "EventCode": "0xE084",
+    "EventName": "PM_LS0_ERAT_MISS_PREF",
+    "BriefDescription": "LS0 Erat miss due to prefetch"
+  },
+  {,
+    "EventCode": "0x409C",
+    "EventName": "PM_BR_PRED",
+    "BriefDescription": "Conditional Branch Executed in which the HW predicted the Direction or Target.  Includes taken and not taken and is counted at execution time"
+  },
+  {,
+    "EventCode": "0x2D144",
+    "EventName": "PM_MRK_DATA_FROM_L31_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x360A4",
+    "EventName": "PM_L3_CO_LCO",
+    "BriefDescription": "Total L3 COs occurred on LCO L3.1 (good cresp, may end up in mem on a retry)"
+  },
+  {,
+    "EventCode": "0x4890",
+    "EventName": "PM_IC_PREF_CANCEL_HIT",
+    "BriefDescription": "Prefetch Canceled due to icache hit"
+  },
+  {,
+    "EventCode": "0x268A8",
+    "EventName": "PM_RD_HIT_PF",
+    "BriefDescription": "RD machine hit L3 PF machine"
+  },
+  {,
+    "EventCode": "0x16880",
+    "EventName": "PM_L2_ST",
+    "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
+  },
+  {,
+    "EventCode": "0x4098",
+    "EventName": "PM_IC_DEMAND_L2_BHT_REDIRECT",
+    "BriefDescription": "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)"
+  },
+  {,
+    "EventCode": "0xD0B4",
+    "EventName": "PM_LSU0_SRQ_S0_VALID_CYC",
+    "BriefDescription": "Slot 0 of SRQ valid"
+  },
+  {,
+    "EventCode": "0x160AA",
+    "EventName": "PM_L3_P0_LCO_NO_DATA",
+    "BriefDescription": "Dataless L3 LCO sent port 0"
+  },
+  {,
+    "EventCode": "0x208C",
+    "EventName": "PM_CLB_HELD",
+    "BriefDescription": "CLB (control logic block - indicates quadword fetch block) Hold: Any Reason"
+  },
+  {,
+    "EventCode": "0xF88C",
+    "EventName": "PM_LSU3_STORE_REJECT",
+    "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+  },
+  {,
+    "EventCode": "0x200F2",
+    "EventName": "PM_INST_DISP",
+    "BriefDescription": "# PPC Dispatched"
+  },
+  {,
+    "EventCode": "0x4E05E",
+    "EventName": "PM_TM_OUTER_TBEGIN_DISP",
+    "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions"
+  },
+  {,
+    "EventCode": "0x2D018",
+    "EventName": "PM_CMPLU_STALL_EXEC_UNIT",
+    "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)"
+  },
+  {,
+    "EventCode": "0x20B0",
+    "EventName": "PM_LSU_FLUSH_NEXT",
+    "BriefDescription": "LSU flush next reported at flush time.  Sometimes these also come with an exception"
+  },
+  {,
+    "EventCode": "0x3880",
+    "EventName": "PM_ISU2_ISS_HOLD_ALL",
+    "BriefDescription": "All ISU rejects"
+  },
+  {,
+    "EventCode": "0xC884",
+    "EventName": "PM_LS3_LD_VECTOR_FIN",
+    "BriefDescription": "LS3 finished load vector op"
+  },
+  {,
+    "EventCode": "0x360A8",
+    "EventName": "PM_L3_CO",
+    "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))"
+  },
+  {,
+    "EventCode": "0x368A4",
+    "EventName": "PM_L3_CINJ",
+    "BriefDescription": "L3 castin of cache inject"
+  },
+  {,
+    "EventCode": "0xC890",
+    "EventName": "PM_LSU_NCST",
+    "BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1"
+  },
+  {,
+    "EventCode": "0xD0B8",
+    "EventName": "PM_LSU_LMQ_FULL_CYC",
+    "BriefDescription": "Counts the number of cycles the LMQ is full"
+  },
+  {,
+    "EventCode": "0x168B2",
+    "EventName": "PM_L3_GRP_GUESS_CORRECT",
+    "BriefDescription": "Prefetch scope predictor selected GS or NNS and was correct"
+  },
+  {,
+    "EventCode": "0x48A4",
+    "EventName": "PM_STOP_FETCH_PENDING_CYC",
+    "BriefDescription": "Fetching is stopped due to an incoming instruction that will result in a flush"
+  },
+  {,
+    "EventCode": "0x36884",
+    "EventName": "PM_L2_RCST_DISP_FAIL_ADDR",
+    "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ"
+  },
+  {,
+    "EventCode": "0x260AC",
+    "EventName": "PM_L3_PF_USAGE",
+    "BriefDescription": "Rotating sample of 32 PF actives"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/pipeline.json b/pmu-events/arch/powerpc/power9/pipeline.json
new file mode 100644
index 0000000..b4772f5
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/pipeline.json
@@ -0,0 +1,532 @@
+[
+  {,
+    "EventCode": "0x4D04C",
+    "EventName": "PM_DFU_BUSY",
+    "BriefDescription": "Cycles in which all 4 Decimal Floating Point units are busy. The DFU is running at capacity"
+  },
+  {,
+    "EventCode": "0x100F6",
+    "EventName": "PM_IERAT_RELOAD",
+    "BriefDescription": "Number of I-ERAT reloads"
+  },
+  {,
+    "EventCode": "0x201E2",
+    "EventName": "PM_MRK_LD_MISS_L1",
+    "BriefDescription": "Marked DL1 Demand Miss counted at exec time. Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+  },
+  {,
+    "EventCode": "0x40010",
+    "EventName": "PM_PMC3_OVERFLOW",
+    "BriefDescription": "Overflow from counter 3"
+  },
+  {,
+    "EventCode": "0x1005A",
+    "EventName": "PM_CMPLU_STALL_DFLONG",
+    "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Qualified by multicycle"
+  },
+  {,
+    "EventCode": "0x4D140",
+    "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x3F14C",
+    "EventName": "PM_MRK_DPTEG_FROM_DL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1E040",
+    "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x24052",
+    "EventName": "PM_FXU_IDLE",
+    "BriefDescription": "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle"
+  },
+  {,
+    "EventCode": "0x1E054",
+    "EventName": "PM_CMPLU_STALL",
+    "BriefDescription": "Nothing completed and ICT not empty"
+  },
+  {,
+    "EventCode": "0x2",
+    "EventName": "PM_INST_CMPL",
+    "BriefDescription": "Number of PowerPC Instructions that completed."
+  },
+  {,
+    "EventCode": "0x3D058",
+    "EventName": "PM_VSU_DP_FSQRT_FDIV",
+    "BriefDescription": "vector versions of fdiv,fsqrt"
+  },
+  {,
+    "EventCode": "0x10006",
+    "EventName": "PM_DISP_HELD",
+    "BriefDescription": "Dispatch Held"
+  },
+  {,
+    "EventCode": "0x200F8",
+    "EventName": "PM_EXT_INT",
+    "BriefDescription": "external interrupt"
+  },
+  {,
+    "EventCode": "0x20008",
+    "EventName": "PM_ICT_EMPTY_CYC",
+    "BriefDescription": "Cycles in which the ICT is completely empty. No itags are assigned to any thread"
+  },
+  {,
+    "EventCode": "0x4F146",
+    "EventName": "PM_MRK_DPTEG_FROM_L21_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x10056",
+    "EventName": "PM_MEM_READ",
+    "BriefDescription": "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4"
+  },
+  {,
+    "EventCode": "0x3C04C",
+    "EventName": "PM_DATA_FROM_DL4",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load"
+  },
+  {,
+    "EventCode": "0x4E046",
+    "EventName": "PM_DPTEG_FROM_L21_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2E016",
+    "EventName": "PM_NTC_ISSUE_HELD_ARB",
+    "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)"
+  },
+  {,
+    "EventCode": "0x15156",
+    "EventName": "PM_SYNC_MRK_FX_DIVIDE",
+    "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt"
+  },
+  {,
+    "EventCode": "0x1C056",
+    "EventName": "PM_DERAT_MISS_4K",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K"
+  },
+  {,
+    "EventCode": "0x2F142",
+    "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4C15C",
+    "EventName": "PM_MRK_DERAT_MISS_16G_1G",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) and 1G (radix mode)"
+  },
+  {,
+    "EventCode": "0x10024",
+    "EventName": "PM_PMC5_OVERFLOW",
+    "BriefDescription": "Overflow from counter 5"
+  },
+  {,
+    "EventCode": "0x4505E",
+    "EventName": "PM_FLOP_CMPL",
+    "BriefDescription": "Floating Point Operation Finished"
+  },
+  {,
+    "EventCode": "0x2C018",
+    "EventName": "PM_CMPLU_STALL_DMISS_L21_L31",
+    "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)"
+  },
+  {,
+    "EventCode": "0x4006A",
+    "EventName": "PM_IERAT_RELOAD_16M",
+    "BriefDescription": "IERAT Reloaded (Miss) for a 16M page"
+  },
+  {,
+    "EventCode": "0x4E010",
+    "EventName": "PM_ICT_NOSLOT_IC_L3MISS",
+    "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache"
+  },
+  {,
+    "EventCode": "0x4D01C",
+    "EventName": "PM_ICT_NOSLOT_DISP_HELD_SYNC",
+    "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch"
+  },
+  {,
+    "EventCode": "0x2D01A",
+    "EventName": "PM_ICT_NOSLOT_IC_MISS",
+    "BriefDescription": "Ict empty for this thread due to Icache Miss"
+  },
+  {,
+    "EventCode": "0x4F14A",
+    "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x30058",
+    "EventName": "PM_TLBIE_FIN",
+    "BriefDescription": "tlbie finished"
+  },
+  {,
+    "EventCode": "0x100F8",
+    "EventName": "PM_ICT_NOSLOT_CYC",
+    "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread"
+  },
+  {,
+    "EventCode": "0x3E042",
+    "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1F140",
+    "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1F058",
+    "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2",
+    "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation"
+  },
+  {,
+    "EventCode": "0x1D14A",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x10050",
+    "EventName": "PM_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x45058",
+    "EventName": "PM_IC_MISS_CMPL",
+    "BriefDescription": "Non-speculative icache miss, counted at completion"
+  },
+  {,
+    "EventCode": "0x2D150",
+    "EventName": "PM_MRK_DERAT_MISS_4K",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K"
+  },
+  {,
+    "EventCode": "0x34058",
+    "EventName": "PM_ICT_NOSLOT_BR_MPRED_ICMISS",
+    "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred"
+  },
+  {,
+    "EventCode": "0x10022",
+    "EventName": "PM_PMC2_SAVED",
+    "BriefDescription": "PMC2 Rewind Value saved"
+  },
+  {,
+    "EventCode": "0x2000A",
+    "EventName": "PM_HV_CYC",
+    "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration"
+  },
+  {,
+    "EventCode": "0x1F144",
+    "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x300FC",
+    "EventName": "PM_DTLB_MISS",
+    "BriefDescription": "Data PTEG reload"
+  },
+  {,
+    "EventCode": "0x2C046",
+    "EventName": "PM_DATA_FROM_RL2L3_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x20052",
+    "EventName": "PM_GRP_PUMP_MPRED",
+    "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x3F05A",
+    "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L3",
+    "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache"
+  },
+  {,
+    "EventCode": "0x1E04A",
+    "EventName": "PM_DPTEG_FROM_RL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x10064",
+    "EventName": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN",
+    "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch"
+  },
+  {,
+    "EventCode": "0x2E046",
+    "EventName": "PM_DPTEG_FROM_RL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4F14C",
+    "EventName": "PM_MRK_DPTEG_FROM_DMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2E042",
+    "EventName": "PM_DPTEG_FROM_L3_MEPF",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2D012",
+    "EventName": "PM_CMPLU_STALL_DFU",
+    "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle"
+  },
+  {,
+    "EventCode": "0x3C054",
+    "EventName": "PM_DERAT_MISS_16M_2M",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M (HPT mode) or 2M (Radix mode)"
+  },
+  {,
+    "EventCode": "0x4C04C",
+    "EventName": "PM_DATA_FROM_DMEM",
+    "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load"
+  },
+  {,
+    "EventCode": "0x30022",
+    "EventName": "PM_PMC4_SAVED",
+    "BriefDescription": "PMC4 Rewind Value saved (matched condition)"
+  },
+  {,
+    "EventCode": "0x200F4",
+    "EventName": "PM_RUN_CYC",
+    "BriefDescription": "Run_cycles"
+  },
+  {,
+    "EventCode": "0x400F2",
+    "EventName": "PM_1PLUS_PPC_DISP",
+    "BriefDescription": "Cycles at least one Instr Dispatched"
+  },
+  {,
+    "EventCode": "0x3D148",
+    "EventName": "PM_MRK_DATA_FROM_L21_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x2F146",
+    "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4E01A",
+    "EventName": "PM_ICT_NOSLOT_DISP_HELD",
+    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason"
+  },
+  {,
+    "EventCode": "0x401EC",
+    "EventName": "PM_THRESH_EXC_2048",
+    "BriefDescription": "Threshold counter exceeded a value of 2048"
+  },
+  {,
+    "EventCode": "0x35150",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x3E052",
+    "EventName": "PM_ICT_NOSLOT_IC_L3",
+    "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3"
+  },
+  {,
+    "EventCode": "0x2405A",
+    "EventName": "PM_NTC_FIN",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack"
+  },
+  {,
+    "EventCode": "0x40052",
+    "EventName": "PM_PUMP_MPRED",
+    "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x30056",
+    "EventName": "PM_TM_ABORTS",
+    "BriefDescription": "Number of TM transactions aborted"
+  },
+  {,
+    "EventCode": "0x2404C",
+    "EventName": "PM_INST_FROM_MEMORY",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x30024",
+    "EventName": "PM_PMC6_OVERFLOW",
+    "BriefDescription": "Overflow from counter 6"
+  },
+  {,
+    "EventCode": "0x10068",
+    "EventName": "PM_BRU_FIN",
+    "BriefDescription": "Branch Instruction Finished"
+  },
+  {,
+    "EventCode": "0x3D154",
+    "EventName": "PM_MRK_DERAT_MISS_16M_2M",
+    "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M (hpt mode) or 2M (radix mode)"
+  },
+  {,
+    "EventCode": "0x30020",
+    "EventName": "PM_PMC2_REWIND",
+    "BriefDescription": "PMC2 Rewind Event (did not match condition)"
+  },
+  {,
+    "EventCode": "0x40064",
+    "EventName": "PM_DUMMY2_REMOVE_ME",
+    "BriefDescription": "Space holder for LS_PC_RELOAD_RA"
+  },
+  {,
+    "EventCode": "0x3F148",
+    "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D01E",
+    "EventName": "PM_ICT_NOSLOT_BR_MPRED",
+    "BriefDescription": "Ict empty for this thread due to branch mispred"
+  },
+  {,
+    "EventCode": "0x1F148",
+    "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x3E046",
+    "EventName": "PM_DPTEG_FROM_L21_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2F144",
+    "EventName": "PM_MRK_DPTEG_FROM_L31_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x14052",
+    "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0xD0A8",
+    "EventName": "PM_DSLB_MISS",
+    "BriefDescription": "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))"
+  },
+  {,
+    "EventCode": "0x4C058",
+    "EventName": "PM_MEM_CO",
+    "BriefDescription": "Memory castouts from this thread"
+  },
+  {,
+    "EventCode": "0x40004",
+    "EventName": "PM_FXU_FIN",
+    "BriefDescription": "The fixed point unit Unit finished an instruction. Instructions that finish may not necessary complete."
+  },
+  {,
+    "EventCode": "0x2C054",
+    "EventName": "PM_DERAT_MISS_64K",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K"
+  },
+  {,
+    "EventCode": "0x10018",
+    "EventName": "PM_IC_DEMAND_CYC",
+    "BriefDescription": "Icache miss demand cycles"
+  },
+  {,
+    "EventCode": "0x2D14E",
+    "EventName": "PM_MRK_DATA_FROM_L21_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x3405C",
+    "EventName": "PM_CMPLU_STALL_DPLONG",
+    "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle"
+  },
+  {,
+    "EventCode": "0x4D052",
+    "EventName": "PM_2FLOP_CMPL",
+    "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg "
+  },
+  {,
+    "EventCode": "0x1F142",
+    "EventName": "PM_MRK_DPTEG_FROM_L2",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x40062",
+    "EventName": "PM_DUMMY1_REMOVE_ME",
+    "BriefDescription": "Space holder for L2_PC_PM_MK_LDST_SCOPE_PRED_STATUS"
+  },
+  {,
+    "EventCode": "0x4C012",
+    "EventName": "PM_CMPLU_STALL_ERAT_MISS",
+    "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss"
+  },
+  {,
+    "EventCode": "0x4D050",
+    "EventName": "PM_VSU_NON_FLOP_CMPL",
+    "BriefDescription": "Non FLOP operation completed"
+  },
+  {,
+    "EventCode": "0x2E012",
+    "EventName": "PM_TM_TX_PASS_RUN_CYC",
+    "BriefDescription": "cycles spent in successful transactions"
+  },
+  {,
+    "EventCode": "0x4D04E",
+    "EventName": "PM_VSU_FSQRT_FDIV",
+    "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only"
+  },
+  {,
+    "EventCode": "0x4C120",
+    "EventName": "PM_MRK_DATA_FROM_L2_MEPF",
+    "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load"
+  },
+  {,
+    "EventCode": "0x10062",
+    "EventName": "PM_LD_L3MISS_PEND_CYC",
+    "BriefDescription": "Cycles L3 miss was pending for this thread"
+  },
+  {,
+    "EventCode": "0x2F14C",
+    "EventName": "PM_MRK_DPTEG_FROM_MEMORY",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x14050",
+    "EventName": "PM_INST_CHIP_PUMP_CPRED",
+    "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch"
+  },
+  {,
+    "EventCode": "0x2000E",
+    "EventName": "PM_FXU_BUSY",
+    "BriefDescription": "Cycles in which all 4 FXUs are busy. The FXU is running at capacity"
+  },
+  {,
+    "EventCode": "0x20066",
+    "EventName": "PM_TLB_MISS",
+    "BriefDescription": "TLB Miss (I + D)"
+  },
+  {,
+    "EventCode": "0x10054",
+    "EventName": "PM_PUMP_CPRED",
+    "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x4D124",
+    "EventName": "PM_MRK_DATA_FROM_L31_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x400F8",
+    "EventName": "PM_FLUSH",
+    "BriefDescription": "Flush (any type)"
+  },
+  {,
+    "EventCode": "0x30004",
+    "EventName": "PM_CMPLU_STALL_EMQ_FULL",
+    "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full"
+  },
+  {,
+    "EventCode": "0x1D154",
+    "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/pmc.json b/pmu-events/arch/powerpc/power9/pmc.json
new file mode 100644
index 0000000..8b3b0f3
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/pmc.json
@@ -0,0 +1,117 @@
+[
+  {,
+    "EventCode": "0x20036",
+    "EventName": "PM_BR_2PATH",
+    "BriefDescription": "Branches that are not strongly biased"
+  },
+  {,
+    "EventCode": "0x40056",
+    "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
+    "BriefDescription": "Local memory above threshold for LSU medium"
+  },
+  {,
+    "EventCode": "0x40118",
+    "EventName": "PM_MRK_DCACHE_RELOAD_INTV",
+    "BriefDescription": "Combined Intervention event"
+  },
+  {,
+    "EventCode": "0x4F148",
+    "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x301E8",
+    "EventName": "PM_THRESH_EXC_64",
+    "BriefDescription": "Threshold counter exceeded a value of 64"
+  },
+  {,
+    "EventCode": "0x4E04E",
+    "EventName": "PM_DPTEG_FROM_L3MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x40050",
+    "EventName": "PM_SYS_PUMP_MPRED_RTY",
+    "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+  },
+  {,
+    "EventCode": "0x1F14E",
+    "EventName": "PM_MRK_DPTEG_FROM_L2MISS",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4D018",
+    "EventName": "PM_CMPLU_STALL_BRU",
+    "BriefDescription": "Completion stall due to a Branch Unit"
+  },
+  {,
+    "EventCode": "0x45052",
+    "EventName": "PM_4FLOP_CMPL",
+    "BriefDescription": "4 FLOP instruction completed"
+  },
+  {,
+    "EventCode": "0x3D142",
+    "EventName": "PM_MRK_DATA_FROM_LMEM",
+    "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load"
+  },
+  {,
+    "EventCode": "0x4C01E",
+    "EventName": "PM_CMPLU_STALL_CRYPTO",
+    "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish"
+  },
+  {,
+    "EventCode": "0x3000C",
+    "EventName": "PM_FREQ_DOWN",
+    "BriefDescription": "Power Management: Below Threshold B"
+  },
+  {,
+    "EventCode": "0x4D128",
+    "EventName": "PM_MRK_DATA_FROM_LMEM_CYC",
+    "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load"
+  },
+  {,
+    "EventCode": "0x4D054",
+    "EventName": "PM_8FLOP_CMPL",
+    "BriefDescription": "8 FLOP instruction completed"
+  },
+  {,
+    "EventCode": "0x10026",
+    "EventName": "PM_TABLEWALK_CYC",
+    "BriefDescription": "Cycles when an instruction tablewalk is active"
+  },
+  {,
+    "EventCode": "0x2C012",
+    "EventName": "PM_CMPLU_STALL_DCACHE_MISS",
+    "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest"
+  },
+  {,
+    "EventCode": "0x2E04C",
+    "EventName": "PM_DPTEG_FROM_MEMORY",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x3F142",
+    "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x4F142",
+    "EventName": "PM_MRK_DPTEG_FROM_L3",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x10060",
+    "EventName": "PM_TM_TRANS_RUN_CYC",
+    "BriefDescription": "run cycles in transactional state"
+  },
+  {,
+    "EventCode": "0x1E04C",
+    "EventName": "PM_DPTEG_FROM_LL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x45050",
+    "EventName": "PM_1FLOP_CMPL",
+    "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/powerpc/power9/translation.json b/pmu-events/arch/powerpc/power9/translation.json
new file mode 100644
index 0000000..b276426
--- /dev/null
+++ b/pmu-events/arch/powerpc/power9/translation.json
@@ -0,0 +1,227 @@
+[
+  {,
+    "EventCode": "0x1E",
+    "EventName": "PM_CYC",
+    "BriefDescription": "Processor cycles"
+  },
+  {,
+    "EventCode": "0x30010",
+    "EventName": "PM_PMC2_OVERFLOW",
+    "BriefDescription": "Overflow from counter 2"
+  },
+  {,
+    "EventCode": "0x3C046",
+    "EventName": "PM_DATA_FROM_L21_SHR",
+    "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x4D05C",
+    "EventName": "PM_DP_QP_FLOP_CMPL",
+    "BriefDescription": "Double-Precion or Quad-Precision instruction completed"
+  },
+  {,
+    "EventCode": "0x4E04C",
+    "EventName": "PM_DPTEG_FROM_DMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x20016",
+    "EventName": "PM_ST_FIN",
+    "BriefDescription": "Store finish count. Includes speculative activity"
+  },
+  {,
+    "EventCode": "0x1504A",
+    "EventName": "PM_IPTEG_FROM_RL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x40132",
+    "EventName": "PM_MRK_LSU_FIN",
+    "BriefDescription": "lsu marked instr PPC finish"
+  },
+  {,
+    "EventCode": "0x3C05C",
+    "EventName": "PM_CMPLU_STALL_VFXU",
+    "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes"
+  },
+  {,
+    "EventCode": "0x30066",
+    "EventName": "PM_LSU_FIN",
+    "BriefDescription": "LSU Finished a PPC instruction (up to 4 per cycle)"
+  },
+  {,
+    "EventCode": "0x2011C",
+    "EventName": "PM_MRK_NTC_CYC",
+    "BriefDescription": "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)"
+  },
+  {,
+    "EventCode": "0x3E048",
+    "EventName": "PM_DPTEG_FROM_DL2L3_SHR",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x2E018",
+    "EventName": "PM_CMPLU_STALL_VFXLONG",
+    "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)"
+  },
+  {,
+    "EventCode": "0x1C04E",
+    "EventName": "PM_DATA_FROM_L2MISS_MOD",
+    "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load"
+  },
+  {,
+    "EventCode": "0x15048",
+    "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x34046",
+    "EventName": "PM_INST_FROM_L21_SHR",
+    "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x1E058",
+    "EventName": "PM_STCX_FAIL",
+    "BriefDescription": "stcx failed"
+  },
+  {,
+    "EventCode": "0x300F0",
+    "EventName": "PM_ST_MISS_L1",
+    "BriefDescription": "Store Missed L1"
+  },
+  {,
+    "EventCode": "0x4C046",
+    "EventName": "PM_DATA_FROM_L21_MOD",
+    "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load"
+  },
+  {,
+    "EventCode": "0x2504A",
+    "EventName": "PM_IPTEG_FROM_RL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x2003E",
+    "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC",
+    "BriefDescription": "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)"
+  },
+  {,
+    "EventCode": "0x201E6",
+    "EventName": "PM_THRESH_EXC_32",
+    "BriefDescription": "Threshold counter exceeded a value of 32"
+  },
+  {,
+    "EventCode": "0x4405C",
+    "EventName": "PM_CMPLU_STALL_VDP",
+    "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by vector"
+  },
+  {,
+    "EventCode": "0x4D010",
+    "EventName": "PM_PMC1_SAVED",
+    "BriefDescription": "PMC1 Rewind Value saved"
+  },
+  {,
+    "EventCode": "0x44042",
+    "EventName": "PM_INST_FROM_L3",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x200FE",
+    "EventName": "PM_DATA_FROM_L2MISS",
+    "BriefDescription": "Demand LD - L2 Miss (not L2 hit)"
+  },
+  {,
+    "EventCode": "0x2D14A",
+    "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC",
+    "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x10028",
+    "EventName": "PM_STALL_END_ICT_EMPTY",
+    "BriefDescription": "The number a times the core transitioned from a stall to ICT-empty for this thread"
+  },
+  {,
+    "EventCode": "0x2504C",
+    "EventName": "PM_IPTEG_FROM_MEMORY",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x4504A",
+    "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x1404E",
+    "EventName": "PM_INST_FROM_L2MISS",
+    "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x34042",
+    "EventName": "PM_INST_FROM_L3_DISP_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x4E048",
+    "EventName": "PM_DPTEG_FROM_DL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x200F0",
+    "EventName": "PM_ST_CMPL",
+    "BriefDescription": "Stores completed from S2Q (2nd-level store queue)."
+  },
+  {,
+    "EventCode": "0x4E05C",
+    "EventName": "PM_LSU_REJECT_LHS",
+    "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)"
+  },
+  {,
+    "EventCode": "0x14044",
+    "EventName": "PM_INST_FROM_L3_NO_CONFLICT",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x3E04C",
+    "EventName": "PM_DPTEG_FROM_DL4",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+  },
+  {,
+    "EventCode": "0x1F15E",
+    "EventName": "PM_MRK_PROBE_NOP_CMPL",
+    "BriefDescription": "Marked probeNops completed"
+  },
+  {,
+    "EventCode": "0x20018",
+    "EventName": "PM_ST_FWD",
+    "BriefDescription": "Store forwards that finished"
+  },
+  {,
+    "EventCode": "0x1D142",
+    "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR_CYC",
+    "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load"
+  },
+  {,
+    "EventCode": "0x24042",
+    "EventName": "PM_INST_FROM_L3_MEPF",
+    "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)"
+  },
+  {,
+    "EventCode": "0x25046",
+    "EventName": "PM_IPTEG_FROM_RL2L3_MOD",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x3504A",
+    "EventName": "PM_IPTEG_FROM_RMEM",
+    "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request"
+  },
+  {,
+    "EventCode": "0x3C05A",
+    "EventName": "PM_CMPLU_STALL_VDPLONG",
+    "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle"
+  },
+  {,
+    "EventCode": "0x2E01C",
+    "EventName": "PM_CMPLU_STALL_TLBIE",
+    "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/s390/cf_z10/basic.json b/pmu-events/arch/s390/cf_z10/basic.json
new file mode 100644
index 0000000..8bf1675
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z10/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z10/crypto.json b/pmu-events/arch/s390/cf_z10/crypto.json
new file mode 100644
index 0000000..7e5b724
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z10/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z10/extended.json b/pmu-events/arch/s390/cf_z10/extended.json
new file mode 100644
index 0000000..0feedb4
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z10/extended.json
@@ -0,0 +1,110 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1I_L2_SOURCED_WRITES",
+		"BriefDescription": "L1I L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 (L1.5) cache"
+	},
+	{
+		"EventCode": "129",
+		"EventName": "L1D_L2_SOURCED_WRITES",
+		"BriefDescription": "L1D L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from the Level-2 (L1.5) cache"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "L1I_L3_LOCAL_WRITES",
+		"BriefDescription": "L1I L3 Local Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from the Level-3 cache that is on the same book as the Instruction cache (Local L2 cache)"
+	},
+	{
+		"EventCode": "131",
+		"EventName": "L1D_L3_LOCAL_WRITES",
+		"BriefDescription": "L1D L3 Local Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installtion cache line was source from the Level-3 cache that is on the same book as the Data cache (Local L2 cache)"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "L1I_L3_REMOTE_WRITES",
+		"BriefDescription": "L1I L3 Remote Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Instruction cache (Remote L2 cache)"
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L1D_L3_REMOTE_WRITES",
+		"BriefDescription": "L1D L3 Remote Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Data cache (Remote L2 cache)"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "L1D_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "L1I_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the s ame book as the Instruction cache (Local Memory)"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "L1I_CACHELINE_INVALIDATES",
+		"BriefDescription": "L1I Cacheline Invalidates",
+		"PublicDescription": "A cache line in the Level-1 I-Cache has been invalidated by a store on the same CPU as the Level-1 I-Cache"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written into the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "142",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle an DTLB1 miss is in progress"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L2C_STORES_SENT",
+		"BriefDescription": "L2C Stores Sent",
+		"PublicDescription": "Incremented by one for every store sent to Level-2 (L1.5) cache"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z13/basic.json b/pmu-events/arch/s390/cf_z13/basic.json
new file mode 100644
index 0000000..8bf1675
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z13/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z13/crypto.json b/pmu-events/arch/s390/cf_z13/crypto.json
new file mode 100644
index 0000000..7e5b724
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z13/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z13/extended.json b/pmu-events/arch/s390/cf_z13/extended.json
new file mode 100644
index 0000000..9a002b6
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z13/extended.json
@@ -0,0 +1,338 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line."
+	},
+	{
+		"EventCode": "129",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+	},
+	{
+		"EventCode": "131",
+		"EventName": "DTLB1_HPAGE_WRITES",
+		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "DTLB1_GPAGE_WRITES",
+		"BriefDescription": "DTLB1 Two-Gigabyte Page Writes",
+		"PublicDescription": "Counter:132	Name:DTLB1_GPAGE_WRITES A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page."
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "L1C_TLB1_MISSES",
+		"BriefDescription": "L1C TLB1 Misses",
+		"PublicDescription": "Increments by one for any cycle where a Level-1 cache or Level-1 TLB miss is in progress."
+	},
+	{
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "L1D_ONNODE_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Node L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L1D_ONNODE_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Node L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "L1D_ONNODE_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Node L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "149",
+		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "151",
+		"EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Same-Column L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "154",
+		"EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
+	},
+	{
+		"EventCode": "156",
+		"EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "157",
+		"EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "158",
+		"EventName": "L1D_ONNODE_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Node Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Node memory"
+	},
+	{
+		"EventCode": "159",
+		"EventName": "L1D_ONDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "160",
+		"EventName": "L1D_OFFDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "161",
+		"EventName": "L1D_ONCHIP_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "162",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "163",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "164",
+		"EventName": "L1I_ONNODE_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
+	},
+	{
+		"EventCode": "165",
+		"EventName": "L1I_ONNODE_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Node L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "166",
+		"EventName": "L1I_ONNODE_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Node L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "167",
+		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "168",
+		"EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "169",
+		"EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "170",
+		"EventName": "L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Same-Column L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
+	},
+	{
+		"EventCode": "171",
+		"EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "172",
+		"EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "173",
+		"EventName": "L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Far-Column L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
+	},
+	{
+		"EventCode": "174",
+		"EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "175",
+		"EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "176",
+		"EventName": "L1I_ONNODE_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Node Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Node memory"
+	},
+	{
+		"EventCode": "177",
+		"EventName": "L1I_ONDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "178",
+		"EventName": "L1I_OFFDRAWER_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
+	},
+	{
+		"EventCode": "179",
+		"EventName": "L1I_ONCHIP_MEM_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "218",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "219",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "220",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "448",
+		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+		"BriefDescription": "Cycle count with one thread active",
+		"PublicDescription": "Cycle count with one thread active"
+	},
+	{
+		"EventCode": "449",
+		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+		"BriefDescription": "Cycle count with two threads active",
+		"PublicDescription": "Cycle count with two threads active"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z14/basic.json b/pmu-events/arch/s390/cf_z14/basic.json
new file mode 100644
index 0000000..8f653c9
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z14/basic.json
@@ -0,0 +1,50 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z14/crypto.json b/pmu-events/arch/s390/cf_z14/crypto.json
new file mode 100644
index 0000000..7e5b724
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z14/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z14/extended.json b/pmu-events/arch/s390/cf_z14/extended.json
new file mode 100644
index 0000000..aa4dfb4
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z14/extended.json
@@ -0,0 +1,320 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "Counter:128	Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "129",
+		"EventName": "DTLB2_WRITES",
+		"BriefDescription": "DTLB2 Writes",
+		"PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "DTLB2_MISSES",
+		"BriefDescription": "DTLB2 Misses",
+		"PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
+	},
+	{
+		"EventCode": "131",
+		"EventName": "DTLB2_HPAGE_WRITES",
+		"BriefDescription": "DTLB2 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "DTLB2_GPAGE_WRITES",
+		"BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
+		"PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "ITLB2_WRITES",
+		"BriefDescription": "ITLB2 Writes",
+		"PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "ITLB2_MISSES",
+		"BriefDescription": "ITLB2 Misses",
+		"PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "TLB2_ENGINES_BUSY",
+		"BriefDescription": "TLB2 Engines Busy",
+		"PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "L1C_TLB2_MISSES",
+		"BriefDescription": "L1C TLB2 Misses",
+		"PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
+	},
+	{
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
+	},
+	{
+		"EventCode": "149",
+		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "151",
+		"EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "154",
+		"EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "156",
+		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "157",
+		"EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "158",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
+	},
+	{
+		"EventCode": "162",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "163",
+		"EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
+	},
+	{
+		"EventCode": "164",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "165",
+		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "166",
+		"EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
+	},
+	{
+		"EventCode": "167",
+		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "168",
+		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "169",
+		"EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
+	},
+	{
+		"EventCode": "170",
+		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "171",
+		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "172",
+		"EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
+	},
+	{
+		"EventCode": "173",
+		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "174",
+		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "175",
+		"EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+	},
+	{
+		"EventCode": "224",
+		"EventName": "BCD_DFP_EXECUTION_SLOTS",
+		"BriefDescription": "BCD DFP Execution Slots",
+		"PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
+	},
+	{
+		"EventCode": "225",
+		"EventName": "VX_BCD_EXECUTION_SLOTS",
+		"BriefDescription": "VX BCD Execution Slots",
+		"PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
+	},
+	{
+		"EventCode": "226",
+		"EventName": "DECIMAL_INSTRUCTIONS",
+		"BriefDescription": "Decimal Instructions",
+		"PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
+	},
+	{
+		"EventCode": "232",
+		"EventName": "LAST_HOST_TRANSLATIONS",
+		"BriefDescription": "Last host translation done",
+		"PublicDescription": "Last Host Translation done"
+	},
+	{
+		"EventCode": "243",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "244",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "245",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "448",
+		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+		"BriefDescription": "Cycle count with one thread active",
+		"PublicDescription": "Cycle count with one thread active"
+	},
+	{
+		"EventCode": "449",
+		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+		"BriefDescription": "Cycle count with two threads active",
+		"PublicDescription": "Cycle count with two threads active"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z196/basic.json b/pmu-events/arch/s390/cf_z196/basic.json
new file mode 100644
index 0000000..8bf1675
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z196/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z196/crypto.json b/pmu-events/arch/s390/cf_z196/crypto.json
new file mode 100644
index 0000000..7e5b724
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z196/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_z196/extended.json b/pmu-events/arch/s390/cf_z196/extended.json
new file mode 100644
index 0000000..b6d7fec
--- /dev/null
+++ b/pmu-events/arch/s390/cf_z196/extended.json
@@ -0,0 +1,146 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "L1D_L2_SOURCED_WRITES",
+		"BriefDescription": "L1D L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from the Level-2 cache"
+	},
+	{
+		"EventCode": "129",
+		"EventName": "L1I_L2_SOURCED_WRITES",
+		"BriefDescription": "L1I L2 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 cache"
+	},
+	{
+		"EventCode": "130",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+	},
+	{
+		"EventCode": "131",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
+	},
+	{
+		"EventCode": "133",
+		"EventName": "L2C_STORES_SENT",
+		"BriefDescription": "L2C Stores Sent",
+		"PublicDescription": "Incremented by one for every store sent to Level-2 cache"
+	},
+	{
+		"EventCode": "134",
+		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "136",
+		"EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "DTLB1_HPAGE_WRITES",
+		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "L1D_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+	},
+	{
+		"EventCode": "142",
+		"EventName": "L1I_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
+	},
+	{
+		"EventCode": "144",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_zec12/basic.json b/pmu-events/arch/s390/cf_zec12/basic.json
new file mode 100644
index 0000000..8bf1675
--- /dev/null
+++ b/pmu-events/arch/s390/cf_zec12/basic.json
@@ -0,0 +1,74 @@
+[
+	{
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+	{
+		"EventCode": "34",
+		"EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+		"BriefDescription": "Problem-State L1I Directory Writes",
+		"PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "35",
+		"EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1I Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"EventCode": "36",
+		"EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+		"BriefDescription": "Problem-State L1D Directory Writes",
+		"PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"EventCode": "37",
+		"EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+		"BriefDescription": "Problem-State L1D Penalty Cycles",
+		"PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_zec12/crypto.json b/pmu-events/arch/s390/cf_zec12/crypto.json
new file mode 100644
index 0000000..7e5b724
--- /dev/null
+++ b/pmu-events/arch/s390/cf_zec12/crypto.json
@@ -0,0 +1,98 @@
+[
+	{
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/pmu-events/arch/s390/cf_zec12/extended.json b/pmu-events/arch/s390/cf_zec12/extended.json
new file mode 100644
index 0000000..8682126
--- /dev/null
+++ b/pmu-events/arch/s390/cf_zec12/extended.json
@@ -0,0 +1,212 @@
+[
+	{
+		"EventCode": "128",
+		"EventName": "DTLB1_MISSES",
+		"BriefDescription": "DTLB1 Misses",
+		"PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+	},
+	{
+		"EventCode": "129",
+		"EventName": "ITLB1_MISSES",
+		"BriefDescription": "ITLB1 Misses",
+		"PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
+	},
+	{
+		"EventCode": "130",
+		"EventName": "L1D_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1D L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "131",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"EventCode": "132",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"EventCode": "133",
+		"EventName": "DTLB1_WRITES",
+		"BriefDescription": "DTLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "135",
+		"EventName": "L1D_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1D Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+	},
+	{
+		"EventCode": "137",
+		"EventName": "L1I_LMEM_SOURCED_WRITES",
+		"BriefDescription": "L1I Local Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
+	},
+	{
+		"EventCode": "138",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"EventCode": "139",
+		"EventName": "DTLB1_HPAGE_WRITES",
+		"BriefDescription": "DTLB1 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+	},
+	{
+		"EventCode": "140",
+		"EventName": "ITLB1_WRITES",
+		"BriefDescription": "ITLB1 Writes",
+		"PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+	},
+	{
+		"EventCode": "141",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+	},
+	{
+		"EventCode": "142",
+		"EventName": "TLB2_CRSTE_HPAGE_WRITES",
+		"BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+	},
+	{
+		"EventCode": "143",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+	},
+	{
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "145",
+		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "146",
+		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "147",
+		"EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "148",
+		"EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "149",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a nonconstrained transactional-execution mode"
+	},
+	{
+		"EventCode": "150",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from a On Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "151",
+		"EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "152",
+		"EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Book L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "153",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "154",
+		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "155",
+		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
+	},
+	{
+		"EventCode": "156",
+		"EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+	},
+	{
+		"EventCode": "157",
+		"EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Book L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+	},
+	{
+		"EventCode": "158",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"EventCode": "159",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "160",
+		"EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "161",
+		"EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Book L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
+	},
+	{
+		"EventCode": "177",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a nonconstrained transactional-execution mode"
+	},
+	{
+		"EventCode": "178",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"EventCode": "179",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+]
diff --git a/pmu-events/arch/s390/mapfile.csv b/pmu-events/arch/s390/mapfile.csv
new file mode 100644
index 0000000..78bcf7f
--- /dev/null
+++ b/pmu-events/arch/s390/mapfile.csv
@@ -0,0 +1,6 @@
+Family-model,Version,Filename,EventType
+^IBM.209[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z10,core
+^IBM.281[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z196,core
+^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
+^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
+^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
diff --git a/pmu-events/arch/x86/bonnell/cache.json b/pmu-events/arch/x86/bonnell/cache.json
new file mode 100644
index 0000000..ffab90c
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/cache.json
@@ -0,0 +1,746 @@
+[
+    {
+        "EventCode": "0x21",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_ADS.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cycles L2 address bus is in use."
+    },
+    {
+        "EventCode": "0x22",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_DBUS_BUSY.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cycles the L2 cache data bus is busy."
+    },
+    {
+        "EventCode": "0x23",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_DBUS_BUSY_RD.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cycles the L2 transfers data to the core."
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1",
+        "UMask": "0x70",
+        "EventName": "L2_LINES_IN.SELF.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache misses."
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_LINES_IN.SELF.DEMAND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache misses."
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1",
+        "UMask": "0x50",
+        "EventName": "L2_LINES_IN.SELF.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache misses."
+    },
+    {
+        "EventCode": "0x25",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_M_LINES_IN.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache line modifications."
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1",
+        "UMask": "0x70",
+        "EventName": "L2_LINES_OUT.SELF.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache lines evicted."
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_LINES_OUT.SELF.DEMAND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache lines evicted."
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1",
+        "UMask": "0x50",
+        "EventName": "L2_LINES_OUT.SELF.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache lines evicted."
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1",
+        "UMask": "0x70",
+        "EventName": "L2_M_LINES_OUT.SELF.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Modified lines evicted from the L2 cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_M_LINES_OUT.SELF.DEMAND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Modified lines evicted from the L2 cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1",
+        "UMask": "0x50",
+        "EventName": "L2_M_LINES_OUT.SELF.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Modified lines evicted from the L2 cache"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_IFETCH.SELF.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cacheable instruction fetch requests"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_IFETCH.SELF.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cacheable instruction fetch requests"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_IFETCH.SELF.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cacheable instruction fetch requests"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_IFETCH.SELF.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cacheable instruction fetch requests"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_IFETCH.SELF.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cacheable instruction fetch requests"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x74",
+        "EventName": "L2_LD.SELF.ANY.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x71",
+        "EventName": "L2_LD.SELF.ANY.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x78",
+        "EventName": "L2_LD.SELF.ANY.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x72",
+        "EventName": "L2_LD.SELF.ANY.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x7f",
+        "EventName": "L2_LD.SELF.ANY.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_LD.SELF.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_LD.SELF.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_LD.SELF.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_LD.SELF.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_LD.SELF.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x54",
+        "EventName": "L2_LD.SELF.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x51",
+        "EventName": "L2_LD.SELF.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x58",
+        "EventName": "L2_LD.SELF.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x52",
+        "EventName": "L2_LD.SELF.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x29",
+        "Counter": "0,1",
+        "UMask": "0x5f",
+        "EventName": "L2_LD.SELF.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache reads"
+    },
+    {
+        "EventCode": "0x2A",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_ST.SELF.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 store requests"
+    },
+    {
+        "EventCode": "0x2A",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_ST.SELF.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 store requests"
+    },
+    {
+        "EventCode": "0x2A",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_ST.SELF.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 store requests"
+    },
+    {
+        "EventCode": "0x2A",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_ST.SELF.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 store requests"
+    },
+    {
+        "EventCode": "0x2A",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_ST.SELF.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 store requests"
+    },
+    {
+        "EventCode": "0x2B",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_LOCK.SELF.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 locked accesses"
+    },
+    {
+        "EventCode": "0x2B",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_LOCK.SELF.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 locked accesses"
+    },
+    {
+        "EventCode": "0x2B",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_LOCK.SELF.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 locked accesses"
+    },
+    {
+        "EventCode": "0x2B",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_LOCK.SELF.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 locked accesses"
+    },
+    {
+        "EventCode": "0x2B",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_LOCK.SELF.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 locked accesses"
+    },
+    {
+        "EventCode": "0x2C",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_DATA_RQSTS.SELF.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All data requests from the L1 data cache"
+    },
+    {
+        "EventCode": "0x2C",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_DATA_RQSTS.SELF.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All data requests from the L1 data cache"
+    },
+    {
+        "EventCode": "0x2C",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_DATA_RQSTS.SELF.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All data requests from the L1 data cache"
+    },
+    {
+        "EventCode": "0x2C",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_DATA_RQSTS.SELF.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All data requests from the L1 data cache"
+    },
+    {
+        "EventCode": "0x2C",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_DATA_RQSTS.SELF.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All data requests from the L1 data cache"
+    },
+    {
+        "EventCode": "0x2D",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_LD_IFETCH.SELF.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All read requests from L1 instruction and data caches"
+    },
+    {
+        "EventCode": "0x2D",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_LD_IFETCH.SELF.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All read requests from L1 instruction and data caches"
+    },
+    {
+        "EventCode": "0x2D",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_LD_IFETCH.SELF.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All read requests from L1 instruction and data caches"
+    },
+    {
+        "EventCode": "0x2D",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_LD_IFETCH.SELF.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All read requests from L1 instruction and data caches"
+    },
+    {
+        "EventCode": "0x2D",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_LD_IFETCH.SELF.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All read requests from L1 instruction and data caches"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x74",
+        "EventName": "L2_RQSTS.SELF.ANY.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x71",
+        "EventName": "L2_RQSTS.SELF.ANY.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x78",
+        "EventName": "L2_RQSTS.SELF.ANY.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x72",
+        "EventName": "L2_RQSTS.SELF.ANY.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x7f",
+        "EventName": "L2_RQSTS.SELF.ANY.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_RQSTS.SELF.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_RQSTS.SELF.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_RQSTS.SELF.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x54",
+        "EventName": "L2_RQSTS.SELF.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x51",
+        "EventName": "L2_RQSTS.SELF.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x58",
+        "EventName": "L2_RQSTS.SELF.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x52",
+        "EventName": "L2_RQSTS.SELF.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x5f",
+        "EventName": "L2_RQSTS.SELF.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_RQSTS.SELF.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache demand requests from this core that missed the L2"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_RQSTS.SELF.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 cache demand requests from this core"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x74",
+        "EventName": "L2_REJECT_BUSQ.SELF.ANY.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x71",
+        "EventName": "L2_REJECT_BUSQ.SELF.ANY.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x78",
+        "EventName": "L2_REJECT_BUSQ.SELF.ANY.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x72",
+        "EventName": "L2_REJECT_BUSQ.SELF.ANY.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x7f",
+        "EventName": "L2_REJECT_BUSQ.SELF.ANY.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x44",
+        "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x42",
+        "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x54",
+        "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x51",
+        "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x58",
+        "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x52",
+        "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x5f",
+        "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Rejected L2 cache requests"
+    },
+    {
+        "EventCode": "0x32",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "L2_NO_REQ.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cycles no L2 cache requests are pending"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0xa1",
+        "EventName": "L1D_CACHE.LD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 Cacheable Data Reads"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0xa2",
+        "EventName": "L1D_CACHE.ST",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 Cacheable Data Writes"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x83",
+        "EventName": "L1D_CACHE.ALL_REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 Data reads and writes"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0xa3",
+        "EventName": "L1D_CACHE.ALL_CACHE_REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 Data Cacheable reads and writes"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE.REPL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1 Data line replacements"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x48",
+        "EventName": "L1D_CACHE.REPLM",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Modified cache lines allocated in the L1 data cache"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "L1D_CACHE.EVICT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Modified cache lines evicted from the L1 data cache"
+    },
+    {
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that hit the L2 cache (precise event)."
+    },
+    {
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Retired loads that miss the L2 cache"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/bonnell/floating-point.json b/pmu-events/arch/x86/bonnell/floating-point.json
new file mode 100644
index 0000000..f0e090c
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/floating-point.json
@@ -0,0 +1,261 @@
+[
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "X87_COMP_OPS_EXE.ANY.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Floating point computational micro-ops executed."
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0x10",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "X87_COMP_OPS_EXE.ANY.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Floating point computational micro-ops retired."
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "X87_COMP_OPS_EXE.FXCH.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FXCH uops executed."
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0x10",
+        "Counter": "0,1",
+        "UMask": "0x82",
+        "EventName": "X87_COMP_OPS_EXE.FXCH.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FXCH uops retired."
+    },
+    {
+        "EventCode": "0x11",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "FP_ASSIST.S",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Floating point assists."
+    },
+    {
+        "EventCode": "0x11",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "FP_ASSIST.AR",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Floating point assists for retired operations."
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "SIMD_UOPS_EXEC.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD micro-ops executed (excluding stores)."
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB0",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "SIMD_UOPS_EXEC.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD micro-ops retired (excluding stores)."
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "SIMD_SAT_UOP_EXEC.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD saturated arithmetic micro-ops executed."
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "SIMD_SAT_UOP_EXEC.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD saturated arithmetic micro-ops retired."
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "SIMD_UOP_TYPE_EXEC.MUL.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed multiply micro-ops executed"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "SIMD_UOP_TYPE_EXEC.MUL.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed multiply micro-ops retired"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "SIMD_UOP_TYPE_EXEC.SHIFT.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed shift micro-ops executed"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x82",
+        "EventName": "SIMD_UOP_TYPE_EXEC.SHIFT.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed shift micro-ops retired"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "SIMD_UOP_TYPE_EXEC.PACK.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed micro-ops executed"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x84",
+        "EventName": "SIMD_UOP_TYPE_EXEC.PACK.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed micro-ops retired"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "SIMD_UOP_TYPE_EXEC.UNPACK.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD unpacked micro-ops executed"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x88",
+        "EventName": "SIMD_UOP_TYPE_EXEC.UNPACK.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD unpacked micro-ops retired"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "SIMD_UOP_TYPE_EXEC.LOGICAL.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed logical micro-ops executed"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x90",
+        "EventName": "SIMD_UOP_TYPE_EXEC.LOGICAL.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed logical micro-ops retired"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "SIMD_UOP_TYPE_EXEC.ARITHMETIC.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed arithmetic micro-ops executed"
+    },
+    {
+        "EventCode": "0xB3",
+        "Counter": "0,1",
+        "UMask": "0xa0",
+        "EventName": "SIMD_UOP_TYPE_EXEC.ARITHMETIC.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD packed arithmetic micro-ops retired"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "SIMD_INST_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired Streaming SIMD Extensions (SSE) packed-single instructions."
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "SIMD_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired Streaming SIMD Extensions (SSE) scalar-single instructions."
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "SIMD_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired Streaming SIMD Extensions 2 (SSE2) scalar-double instructions."
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "SIMD_INST_RETIRED.VECTOR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired Streaming SIMD Extensions 2 (SSE2) vector instructions."
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "SIMD_COMP_INST_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired computational Streaming SIMD Extensions (SSE) packed-single instructions."
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "SIMD_COMP_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired computational Streaming SIMD Extensions (SSE) scalar-single instructions."
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired computational Streaming SIMD Extensions 2 (SSE2) scalar-double instructions."
+    },
+    {
+        "EventCode": "0xCD",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "SIMD_ASSIST",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "SIMD assists invoked."
+    },
+    {
+        "EventCode": "0xCE",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "SIMD_INSTR_RETIRED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SIMD Instructions retired."
+    },
+    {
+        "EventCode": "0xCF",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "SIMD_SAT_INSTR_RETIRED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Saturated arithmetic instructions retired."
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/bonnell/frontend.json b/pmu-events/arch/x86/bonnell/frontend.json
new file mode 100644
index 0000000..935b7dc
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/frontend.json
@@ -0,0 +1,83 @@
+[
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "ICACHE.ACCESSES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Instruction fetches."
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Icache hit"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Icache miss"
+    },
+    {
+        "EventCode": "0x86",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CYCLES_ICACHE_MEM_STALLED.ICACHE_MEM_STALLED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles during which instruction fetches are  stalled."
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "DECODE_STALL.PFB_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Decode stall due to PFB empty"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "DECODE_STALL.IQ_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Decode stall due to IQ full"
+    },
+    {
+        "EventCode": "0xAA",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.NON_CISC_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Non-CISC nacro instructions decoded"
+    },
+    {
+        "EventCode": "0xAA",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "MACRO_INSTS.CISC_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "CISC macro instructions decoded"
+    },
+    {
+        "EventCode": "0xAA",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "MACRO_INSTS.ALL_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All Instructions decoded"
+    },
+    {
+        "EventCode": "0xA9",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "UOPS.MS_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "This event counts the cycles where 1 or more uops are issued by the micro-sequencer (MS), including microcode assists and inserted flows, and written to the IQ. ",
+        "CounterMask": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/bonnell/memory.json b/pmu-events/arch/x86/bonnell/memory.json
new file mode 100644
index 0000000..3ae843b
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/memory.json
@@ -0,0 +1,154 @@
+[
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0xf",
+        "EventName": "MISALIGN_MEM_REF.SPLIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory references that cross an 8-byte boundary."
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x9",
+        "EventName": "MISALIGN_MEM_REF.LD_SPLIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load splits"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0xa",
+        "EventName": "MISALIGN_MEM_REF.ST_SPLIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Store splits"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x8f",
+        "EventName": "MISALIGN_MEM_REF.SPLIT.AR",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory references that cross an 8-byte boundary (At Retirement)"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x89",
+        "EventName": "MISALIGN_MEM_REF.LD_SPLIT.AR",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load splits (At Retirement)"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x8a",
+        "EventName": "MISALIGN_MEM_REF.ST_SPLIT.AR",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Store splits (Ar Retirement)"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x8c",
+        "EventName": "MISALIGN_MEM_REF.RMW_SPLIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ld-op-st splits"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x97",
+        "EventName": "MISALIGN_MEM_REF.BUBBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Nonzero segbase 1 bubble"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x91",
+        "EventName": "MISALIGN_MEM_REF.LD_BUBBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Nonzero segbase load 1 bubble"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x92",
+        "EventName": "MISALIGN_MEM_REF.ST_BUBBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Nonzero segbase store 1 bubble"
+    },
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1",
+        "UMask": "0x94",
+        "EventName": "MISALIGN_MEM_REF.RMW_BUBBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Nonzero segbase ld-op-st 1 bubble"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "PREFETCH.PREFETCHT0",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT0 instructions executed."
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x82",
+        "EventName": "PREFETCH.PREFETCHT1",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT1 instructions executed."
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x84",
+        "EventName": "PREFETCH.PREFETCHT2",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT2 instructions executed."
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x86",
+        "EventName": "PREFETCH.SW_L2",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions executed"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x88",
+        "EventName": "PREFETCH.PREFETCHNTA",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Streaming SIMD Extensions (SSE) Prefetch NTA instructions executed"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "PREFETCH.HW_PREFETCH",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 hardware prefetch request"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0xf",
+        "EventName": "PREFETCH.SOFTWARE_PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Any Software prefetch"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1",
+        "UMask": "0x8f",
+        "EventName": "PREFETCH.SOFTWARE_PREFETCH.AR",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Any Software prefetch"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/bonnell/other.json b/pmu-events/arch/x86/bonnell/other.json
new file mode 100644
index 0000000..4bc1c58
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/other.json
@@ -0,0 +1,450 @@
+[
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "SEGMENT_REG_LOADS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of segment register loads."
+    },
+    {
+        "EventCode": "0x9",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "DISPATCH_BLOCKED.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason"
+    },
+    {
+        "EventCode": "0x3A",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "EIST_TRANS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions"
+    },
+    {
+        "EventCode": "0x3B",
+        "Counter": "0,1",
+        "UMask": "0xc0",
+        "EventName": "THERMAL_TRIP",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of thermal trips"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_REQUEST_OUTSTANDING.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Outstanding cacheable data read bus requests duration."
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_REQUEST_OUTSTANDING.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Outstanding cacheable data read bus requests duration."
+    },
+    {
+        "EventCode": "0x61",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "BUS_BNR_DRV.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of Bus Not Ready signals asserted."
+    },
+    {
+        "EventCode": "0x61",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BUS_BNR_DRV.THIS_AGENT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of Bus Not Ready signals asserted."
+    },
+    {
+        "EventCode": "0x62",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "BUS_DRDY_CLOCKS.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus cycles when data is sent on the bus."
+    },
+    {
+        "EventCode": "0x62",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BUS_DRDY_CLOCKS.THIS_AGENT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus cycles when data is sent on the bus."
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_LOCK_CLOCKS.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus cycles when a LOCK signal is asserted."
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_LOCK_CLOCKS.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus cycles when a LOCK signal is asserted."
+    },
+    {
+        "EventCode": "0x64",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_DATA_RCV.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus cycles while processor receives data."
+    },
+    {
+        "EventCode": "0x65",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_BRD.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Burst read bus transactions."
+    },
+    {
+        "EventCode": "0x65",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_BRD.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Burst read bus transactions."
+    },
+    {
+        "EventCode": "0x66",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_RFO.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "RFO bus transactions."
+    },
+    {
+        "EventCode": "0x66",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_RFO.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "RFO bus transactions."
+    },
+    {
+        "EventCode": "0x67",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_WB.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Explicit writeback bus transactions."
+    },
+    {
+        "EventCode": "0x67",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_WB.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Explicit writeback bus transactions."
+    },
+    {
+        "EventCode": "0x68",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_IFETCH.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Instruction-fetch bus transactions."
+    },
+    {
+        "EventCode": "0x68",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_IFETCH.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Instruction-fetch bus transactions."
+    },
+    {
+        "EventCode": "0x69",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_INVAL.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Invalidate bus transactions."
+    },
+    {
+        "EventCode": "0x69",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_INVAL.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Invalidate bus transactions."
+    },
+    {
+        "EventCode": "0x6A",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_PWR.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Partial write bus transaction."
+    },
+    {
+        "EventCode": "0x6A",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_PWR.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Partial write bus transaction."
+    },
+    {
+        "EventCode": "0x6B",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_P.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Partial bus transactions."
+    },
+    {
+        "EventCode": "0x6B",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_P.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Partial bus transactions."
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_IO.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "IO bus transactions."
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_IO.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "IO bus transactions."
+    },
+    {
+        "EventCode": "0x6D",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_DEF.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Deferred bus transactions."
+    },
+    {
+        "EventCode": "0x6D",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_DEF.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Deferred bus transactions."
+    },
+    {
+        "EventCode": "0x6E",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_BURST.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Burst (full cache-line) bus transactions."
+    },
+    {
+        "EventCode": "0x6E",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_BURST.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Burst (full cache-line) bus transactions."
+    },
+    {
+        "EventCode": "0x6F",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_MEM.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory bus transactions."
+    },
+    {
+        "EventCode": "0x6F",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_MEM.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory bus transactions."
+    },
+    {
+        "EventCode": "0x70",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "BUS_TRANS_ANY.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All bus transactions."
+    },
+    {
+        "EventCode": "0x70",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_TRANS_ANY.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All bus transactions."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0xb",
+        "EventName": "EXT_SNOOP.THIS_AGENT.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "EXT_SNOOP.THIS_AGENT.CLEAN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "EXT_SNOOP.THIS_AGENT.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "EXT_SNOOP.THIS_AGENT.HITM",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x2b",
+        "EventName": "EXT_SNOOP.ALL_AGENTS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x21",
+        "EventName": "EXT_SNOOP.ALL_AGENTS.CLEAN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x22",
+        "EventName": "EXT_SNOOP.ALL_AGENTS.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x77",
+        "Counter": "0,1",
+        "UMask": "0x28",
+        "EventName": "EXT_SNOOP.ALL_AGENTS.HITM",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "External snoops."
+    },
+    {
+        "EventCode": "0x7A",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "BUS_HIT_DRV.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "HIT signal asserted."
+    },
+    {
+        "EventCode": "0x7A",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BUS_HIT_DRV.THIS_AGENT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "HIT signal asserted."
+    },
+    {
+        "EventCode": "0x7B",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "BUS_HITM_DRV.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "HITM signal asserted."
+    },
+    {
+        "EventCode": "0x7B",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BUS_HITM_DRV.THIS_AGENT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "HITM signal asserted."
+    },
+    {
+        "EventCode": "0x7D",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUSQ_EMPTY.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus queue is empty."
+    },
+    {
+        "EventCode": "0x7E",
+        "Counter": "0,1",
+        "UMask": "0xe0",
+        "EventName": "SNOOP_STALL_DRV.ALL_AGENTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus stalled for snoops."
+    },
+    {
+        "EventCode": "0x7E",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "SNOOP_STALL_DRV.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus stalled for snoops."
+    },
+    {
+        "EventCode": "0x7F",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "BUS_IO_WAIT.SELF",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "IO requests waiting in the bus queue."
+    },
+    {
+        "EventCode": "0xC6",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CYCLES_INT_MASKED.CYCLES_INT_MASKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles during which interrupts are disabled."
+    },
+    {
+        "EventCode": "0xC6",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles during which interrupts are pending and disabled."
+    },
+    {
+        "EventCode": "0xC8",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "HW_INT_RCV",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Hardware interrupts received."
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/bonnell/pipeline.json b/pmu-events/arch/x86/bonnell/pipeline.json
new file mode 100644
index 0000000..b2e681c
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/pipeline.json
@@ -0,0 +1,364 @@
+[
+    {
+        "EventCode": "0x2",
+        "Counter": "0,1",
+        "UMask": "0x83",
+        "EventName": "STORE_FORWARDS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All store forwards"
+    },
+    {
+        "EventCode": "0x2",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "STORE_FORWARDS.GOOD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Good store forwards"
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1",
+        "UMask": "0x7f",
+        "EventName": "REISSUE.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Micro-op reissues for any cause"
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1",
+        "UMask": "0xff",
+        "EventName": "REISSUE.ANY.AR",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Micro-op reissues for any cause (At Retirement)"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MUL.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations executed."
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "MUL.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations retired"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "DIV.S",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide operations executed."
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "DIV.AR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide operations retired"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CYCLES_DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles the divider is busy."
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Core cycles when core is not halted"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.BUS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Bus cycles when core is not halted"
+    },
+    {
+        "EventCode": "0xA",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Core cycles when core is not halted"
+    },
+    {
+        "EventCode": "0xA",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reference cycles when core is not halted."
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BR_INST_TYPE_RETIRED.COND",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All macro conditional branch instructions."
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "BR_INST_TYPE_RETIRED.UNCOND",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All macro unconditional branch instructions, excluding calls and indirects"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "BR_INST_TYPE_RETIRED.IND",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All indirect branches that are not calls."
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "BR_INST_TYPE_RETIRED.RET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All indirect branches that have a return mnemonic"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "BR_INST_TYPE_RETIRED.DIR_CALL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All non-indirect calls"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "BR_INST_TYPE_RETIRED.IND_CALL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All indirect calls, including both register and memory indirect."
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "BR_INST_TYPE_RETIRED.COND_TAKEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Only taken macro conditional branch instructions"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BR_MISSP_TYPE_RETIRED.COND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Mispredicted cond branch instructions retired"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "BR_MISSP_TYPE_RETIRED.IND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Mispredicted ind branches that are not calls"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "BR_MISSP_TYPE_RETIRED.RETURN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Mispredicted return branches"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "BR_MISSP_TYPE_RETIRED.IND_CALL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Mispredicted indirect calls, including both register and memory indirect. "
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1",
+        "UMask": "0x11",
+        "EventName": "BR_MISSP_TYPE_RETIRED.COND_TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Mispredicted and taken cond branch instructions retired"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (precise event)."
+    },
+    {
+        "EventCode": "0xA",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired."
+    },
+    {
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Micro-ops retired."
+    },
+    {
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "UOPS_RETIRED.STALLED_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no micro-ops retired."
+    },
+    {
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "UOPS_RETIRED.STALLS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Periods no micro-ops retired."
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Self-Modifying Code detected."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired branch instructions."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.PRED_NOT_TAKEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired branch instructions that were predicted not-taken."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.MISPRED_NOT_TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions that were mispredicted not-taken."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.PRED_TAKEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired branch instructions that were predicted taken."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.MISPRED_TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions that were mispredicted taken."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xc",
+        "EventName": "BR_INST_RETIRED.TAKEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired taken branch instructions."
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xf",
+        "EventName": "BR_INST_RETIRED.ANY1",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired branch instructions."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.MISPRED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired mispredicted branch instructions (precise event)."
+    },
+    {
+        "EventCode": "0xDC",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles issue is stalled due to div busy."
+    },
+    {
+        "EventCode": "0xE0",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BR_INST_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch instructions decoded"
+    },
+    {
+        "EventCode": "0xE4",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BOGUS_BR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Bogus branches"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEARS asserted."
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "REISSUE.OVERLAP_STORE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Micro-op reissues on a store-load collision"
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1",
+        "UMask": "0x81",
+        "EventName": "REISSUE.OVERLAP_STORE.AR",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Micro-op reissues on a store-load collision (At Retirement)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/bonnell/virtual-memory.json b/pmu-events/arch/x86/bonnell/virtual-memory.json
new file mode 100644
index 0000000..7bb8175
--- /dev/null
+++ b/pmu-events/arch/x86/bonnell/virtual-memory.json
@@ -0,0 +1,124 @@
+[
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1",
+        "UMask": "0x7",
+        "EventName": "DATA_TLB_MISSES.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory accesses that missed the DTLB."
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1",
+        "UMask": "0x5",
+        "EventName": "DATA_TLB_MISSES.DTLB_MISS_LD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses due to load operations."
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1",
+        "UMask": "0x9",
+        "EventName": "DATA_TLB_MISSES.L0_DTLB_MISS_LD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L0 DTLB misses due to load operations."
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1",
+        "UMask": "0x6",
+        "EventName": "DATA_TLB_MISSES.DTLB_MISS_ST",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses due to store operations."
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1",
+        "UMask": "0xa",
+        "EventName": "DATA_TLB_MISSES.L0_DTLB_MISS_ST",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L0 DTLB misses due to store operations"
+    },
+    {
+        "EventCode": "0xC",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.WALKS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of page-walks executed."
+    },
+    {
+        "EventCode": "0xC",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Duration of page-walks in core cycles"
+    },
+    {
+        "EventCode": "0xC",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_WALKS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of D-side only page walks"
+    },
+    {
+        "EventCode": "0xC",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Duration of D-side only page walks"
+    },
+    {
+        "EventCode": "0xC",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_WALKS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of I-Side page walks"
+    },
+    {
+        "EventCode": "0xC",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Duration of I-Side page walks"
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "ITLB.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB hits."
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "ITLB.FLUSH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB flushes."
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0x82",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "ITLB.MISSES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB misses."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss the DTLB (precise event)."
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/bdw-metrics.json b/pmu-events/arch/x86/broadwell/bdw-metrics.json
new file mode 100644
index 0000000..00bfdb5
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwell/cache.json b/pmu-events/arch/x86/broadwell/cache.json
new file mode 100644
index 0000000..bba3152
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/cache.json
@@ -0,0 +1,3399 @@
+[
+    {
+        "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x22",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x27",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand requests that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.L2_PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 prefetch requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All requests that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x44",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x50",
+        "EventName": "L2_RQSTS.L2_PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 prefetch requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe1",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe2",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the total number of L2 code requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe4",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe7",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand requests to L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf8",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All L2 requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of WB requests that hit L2 cache.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x50",
+        "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM76",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.",
+        "EventCode": "0xb2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "Errata": "BDM35",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "BDM35",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM100",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "Errata": "BDM100, BDE70",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM100",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "BDM100",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM100",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM100",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDE70, BDM100",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PublicDescription": "This event counts Demand Data Read requests that access L2 cache, including rejects.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts Read for Ownership (RFO) requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANS.RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache accesses when fetching instructions.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANS.CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts L2 or L3 HW prefetches that access L2 cache including rejects.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts L1D writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANS.L1D_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts L2 fill requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANS.L2_FILL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts L2 writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_IN.I",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by demand.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of split locks in the super queue.",
+        "EventCode": "0xf4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Split locks in SQ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000018000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f80020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00803c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01003c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02003c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/floating-point.json b/pmu-events/arch/x86/broadwell/floating-point.json
new file mode 100644
index 0000000..689d478
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/floating-point.json
@@ -0,0 +1,165 @@
+[
+    {
+        "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM30",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "BDM30",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x15",
+        "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
+        "SampleAfterValue": "2000006",
+        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.  ?.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2a",
+        "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
+        "SampleAfterValue": "2000005",
+        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "FP_ARITH_INST_RETIRED.PACKED",
+        "SampleAfterValue": "2000004",
+        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/frontend.json b/pmu-events/arch/x86/broadwell/frontend.json
new file mode 100644
index 0000000..7142c76
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/frontend.json
@@ -0,0 +1,286 @@
+[
+    {
+        "PublicDescription": "This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end.  It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "IDQ.EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles  uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of both cacheable and noncacheable Instruction Cache, Streaming Buffer and Victim Cache Reads including UC fetches.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes UC accesses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which the demand fetch waits for data (wfdM104H) from L2 or iSB (opportunistic hit).",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE.IFDATA_STALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is  delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/memory.json b/pmu-events/arch/x86/broadwell/memory.json
new file mode 100644
index 0000000..c9154ce
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/memory.json
@@ -0,0 +1,3045 @@
+[
+    {
+        "PublicDescription": "This event counts speculative cache-line split load uops dispatched to the L1 cache.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a TSX line had a cache conflict",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times we could not allocate Lock Buffer",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unfriendly TSX abort triggered by  a vzeroupper instruction.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "TX_EXEC.MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "TX_EXEC.MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RTM region detected inside HLE.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "TX_EXEC.MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "TX_EXEC.MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "HLE_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times we entered an HLE region; does not count nested transactions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times HLE commit succeeded",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "HLE_RETIRED.ABORTED_MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times the TSX watchdog signaled an HLE abort.",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "HLE_RETIRED.ABORTED_MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a disallowed operation caused an HLE abort.",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "HLE_RETIRED.ABORTED_MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times HLE caused a fault.",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "HLE_RETIRED.ABORTED_MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times HLE aborted and was not due to the abort conditions in subevents 3-6.",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "HLE_RETIRED.ABORTED_MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RTM_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times we entered an RTM region; does not count nested transactions",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times RTM commit succeeded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RTM_RETIRED.ABORTED_MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times the TSX watchdog signaled an RTM abort.",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RTM_RETIRED.ABORTED_MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times a disallowed operation caused an RTM abort.",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RTM_RETIRED.ABORTED_MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times a RTM caused a fault.",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RTM_RETIRED.ABORTED_MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times RTM aborted and was not due to the abort conditions in subevents 3-6.",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RTM_RETIRED.ABORTED_MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above four.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 4",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above eight.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Loads with latency value being above 8",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above 16.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Loads with latency value being above 16",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above 32.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Loads with latency value being above 32",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above 64.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Loads with latency value being above 64",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above 128.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Loads with latency value being above 128",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above 256.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Loads with latency value being above 256",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts loads with latency value being above 512.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "BDM100, BDM35",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Loads with latency value being above 512",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000040 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000028000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c8000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "OTHER & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000090 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000120 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000240 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000091 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20003c0122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f84000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000122 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/other.json b/pmu-events/arch/x86/broadwell/other.json
new file mode 100644
index 0000000..4f829c5
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/other.json
@@ -0,0 +1,44 @@
+[
+    {
+        "PublicDescription": "This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPL_CYCLES.RING0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPL_CYCLES.RING123",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/pipeline.json b/pmu-events/arch/x86/broadwell/pipeline.json
new file mode 100644
index 0000000..97c5d07
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/pipeline.json
@@ -0,0 +1,1427 @@
+[
+    {
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.",
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divider is busy executing divide operations",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3c",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.",
+        "EventCode": "0x4c",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the hardware prefetch.",
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts not taken macro-conditional branch instructions.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired indirect branches excluding calls and return branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired indirect branches that have a return mnemonic.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired direct near calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired indirect calls including both register and memory indirect.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired macro-conditional branch instructions.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired direct near calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired branch instructions.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts not taken speculative and retired mispredicted macro conditional branch instructions.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired mispredicted macro conditional branch instructions.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches that have a return mnemonic.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted branch instructions.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file.  The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*.  See the Broadwell Optimization Guide for more information.",
+        "EventCode": "0xA0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 6.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand* load request missing the L2 cache.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Total execution stalls.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand load request missing the L1 data cache.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of uops executed from any thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of uops executed on the core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "Errata": "BDM61",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "Errata": "BDM11, BDM55",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "CounterHTOff": "1"
+    },
+    {
+        "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "FP operations  retired. X87 FP operations that have no exceptions:",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1"
+    },
+    {
+        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts all (macro) branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDW98",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts far branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "Errata": "BDW98",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts all mispredicted macro branch instructions retired.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_MISP_RETIRED.RET",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.",
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count cases of saving new LBR",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xe6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/uncore.json b/pmu-events/arch/x86/broadwell/uncore.json
new file mode 100644
index 0000000..28e1e15
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/uncore.json
@@ -0,0 +1,278 @@
+[
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x41",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x81",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+    "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+    "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x44",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x48",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x11",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in M-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x21",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in M-state",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x81",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in M-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x18",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in I-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x88",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in I-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x1f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x2f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x8f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x86",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x16",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x26",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+    "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+    "Counter": "0,",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x80",
+    "UMask": "0x02",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT",
+    "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
+    "PublicDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
+    "Counter": "0,",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+    "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+    "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x02",
+    "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT",
+    "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode",
+    "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x20",
+    "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+    "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+    "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x84",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+    "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+    "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+    "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;",
+    "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "Counter": "0,",
+    "CounterMask": "1",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "NCU",
+    "EventCode": "0x0",
+    "UMask": "0x01",
+    "EventName": "UNC_CLOCK.SOCKET",
+    "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
+    "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "Counter": "FIXED",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwell/virtual-memory.json b/pmu-events/arch/x86/broadwell/virtual-memory.json
new file mode 100644
index 0000000..2a015e4
--- /dev/null
+++ b/pmu-events/arch/x86/broadwell/virtual-memory.json
@@ -0,0 +1,388 @@
+[
+    {
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM69",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "BDM69",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM69",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM69",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "Errata": "BDM69",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "BDM69",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (4K).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (2M).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x60",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM69",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "BDM69",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM69",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM69",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "Errata": "BDM69",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "BDM69",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (4K).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (2M).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x60",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.",
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "BDM69",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "BDM69",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "BDM69",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "BDM69",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "Errata": "BDM69",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "BDM69",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core misses that miss the  DTLB and hit the STLB (4K).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "ITLB_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code misses that miss the  DTLB and hit the STLB (2M).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x60",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in the L1+FB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in the L2.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x14",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in Memory.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in the L1+FB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x22",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in the L2.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "Errata": "BDM69, BDM98",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "STLB flush attempts",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
new file mode 100644
index 0000000..49c5f12
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+	"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+	"MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellde/cache.json b/pmu-events/arch/x86/broadwellde/cache.json
new file mode 100644
index 0000000..bf243fe
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/cache.json
@@ -0,0 +1,809 @@
+[
+    {
+        "EventCode": "0x24",
+        "UMask": "0x21",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x22",
+        "BriefDescription": "RFO requests that miss L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x24",
+        "BriefDescription": "L2 cache misses when fetching instructions.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x27",
+        "BriefDescription": "Demand requests that miss L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x30",
+        "BriefDescription": "L2 prefetch requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.L2_PF_MISS",
+        "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x3f",
+        "BriefDescription": "All requests that miss L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x41",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x42",
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x44",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x50",
+        "BriefDescription": "L2 prefetch requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.L2_PF_HIT",
+        "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe1",
+        "BriefDescription": "Demand Data Read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe2",
+        "BriefDescription": "RFO requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe4",
+        "BriefDescription": "L2 code requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "This event counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe7",
+        "BriefDescription": "Demand requests to L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xf8",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xff",
+        "BriefDescription": "All L2 requests.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "UMask": "0x50",
+        "BriefDescription": "Not rejected writebacks that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+        "PublicDescription": "This event counts the number of WB requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x41",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x4f",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "UMask": "0x1",
+        "BriefDescription": "L1D data line replacements",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "CounterMask": "1",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "CounterMask": "6",
+        "Errata": "BDM76",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "CounterMask": "1",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "CounterMask": "1",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles when L1D is locked",
+        "Counter": "0,1,2,3",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x2",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x4",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x8",
+        "BriefDescription": "Demand and prefetch data reads",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb2",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x11",
+        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x12",
+        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "SampleAfterValue": "100003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x21",
+        "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "Errata": "BDM35",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x41",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x42",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "SampleAfterValue": "100003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x81",
+        "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x82",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+        "SampleAfterValue": "2000003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "Errata": "BDM35",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x4",
+        "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x20",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+        "Errata": "BDM100, BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x40",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x1",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "Errata": "BDE70, BDM100",
+        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM",
+        "Errata": "BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM",
+        "Errata": "BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x20",
+        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD",
+        "Errata": "BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "PublicDescription": "This event counts Demand Data Read requests that access L2 cache, including rejects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x2",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.RFO",
+        "PublicDescription": "This event counts Read for Ownership (RFO) requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x4",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.CODE_RD",
+        "PublicDescription": "This event counts the number of L2 cache accesses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x8",
+        "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.ALL_PF",
+        "PublicDescription": "This event counts L2 or L3 HW prefetches that access L2 cache including rejects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x10",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L1D_WB",
+        "PublicDescription": "This event counts L1D writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x20",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_FILL",
+        "PublicDescription": "This event counts L2 fill requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x40",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "This event counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x80",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "PublicDescription": "This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x1",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.I",
+        "PublicDescription": "This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x2",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.S",
+        "PublicDescription": "This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x4",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.E",
+        "PublicDescription": "This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x7",
+        "BriefDescription": "L2 cache lines filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x5",
+        "BriefDescription": "Clean L2 cache lines evicted by demand.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf4",
+        "UMask": "0x10",
+        "BriefDescription": "Split locks in SQ",
+        "Counter": "0,1,2,3",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "This event counts the number of split locks in the super queue.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/floating-point.json b/pmu-events/arch/x86/broadwellde/floating-point.json
new file mode 100644
index 0000000..d7b9d9c
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/floating-point.json
@@ -0,0 +1,165 @@
+[
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x8",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "Errata": "BDM30",
+        "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x10",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "Errata": "BDM30",
+        "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x1",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x2",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x3",
+        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x4",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x10",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x15",
+        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.  ?.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
+        "SampleAfterValue": "2000006",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc7",
+        "UMask": "0x20",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x2a",
+        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
+        "SampleAfterValue": "2000005",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x3c",
+        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.PACKED",
+        "SampleAfterValue": "2000004",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x2",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x4",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x10",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x1e",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.ANY",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/frontend.json b/pmu-events/arch/x86/broadwellde/frontend.json
new file mode 100644
index 0000000..72781e1
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/frontend.json
@@ -0,0 +1,286 @@
+[
+    {
+        "EventCode": "0x79",
+        "UMask": "0x2",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.EMPTY",
+        "PublicDescription": "This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end.  It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of cycles  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x20",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of cycles  uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_SWITCHES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x3c",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.HIT",
+        "PublicDescription": "This event counts the number of both cacheable and noncacheable Instruction Cache, Streaming Buffer and Victim Cache Reads including UC fetches.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x2",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.MISSES",
+        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes UC accesses.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.IFDATA_STALL",
+        "PublicDescription": "This event counts cycles during which the demand fetch waits for data (wfdM104H) from L2 or iSB (opportunistic hit).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterMask": "3",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is  delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xAB",
+        "UMask": "0x2",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "Counter": "0,1,2,3",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/memory.json b/pmu-events/arch/x86/broadwellde/memory.json
new file mode 100644
index 0000000..e44f73c
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/memory.json
@@ -0,0 +1,432 @@
+[
+    {
+        "EventCode": "0x05",
+        "UMask": "0x1",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "PublicDescription": "This event counts speculative cache-line split load uops dispatched to the L1 cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "UMask": "0x2",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "PublicDescription": "This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times a TSX line had a cache conflict",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times we could not allocate Lock Buffer",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by  a vzeroupper instruction.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x4",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x8",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x10",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times we entered an HLE region; does not count nested transactions",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times HLE commit succeeded",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC1",
+        "PublicDescription": "Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC2",
+        "PublicDescription": "Number of times the TSX watchdog signaled an HLE abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC3",
+        "PublicDescription": "Number of times a disallowed operation caused an HLE abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC4",
+        "PublicDescription": "Number of times HLE caused a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC5",
+        "PublicDescription": "Number of times HLE aborted and was not due to the abort conditions in subevents 3-6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times we entered an RTM region; does not count nested transactions",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times RTM commit succeeded",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC1",
+        "PublicDescription": "Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC2",
+        "PublicDescription": "Number of times the TSX watchdog signaled an RTM abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC3",
+        "PublicDescription": "Number of times a disallowed operation caused an RTM abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC4",
+        "PublicDescription": "Number of times a RTM caused a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC5",
+        "PublicDescription": "Number of times RTM aborted and was not due to the abort conditions in subevents 3-6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 4",
+        "PEBS": "2",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above four.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 8",
+        "PEBS": "2",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above eight.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 16",
+        "PEBS": "2",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 16.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 32",
+        "PEBS": "2",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 32.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 64",
+        "PEBS": "2",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 64.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "2003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 128",
+        "PEBS": "2",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 128.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "1009",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 256",
+        "PEBS": "2",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 256.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "503",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 512",
+        "PEBS": "2",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 512.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "101",
+        "CounterHTOff": "3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/other.json b/pmu-events/arch/x86/broadwellde/other.json
new file mode 100644
index 0000000..4475249
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/other.json
@@ -0,0 +1,44 @@
+[
+    {
+        "EventCode": "0x5C",
+        "UMask": "0x1",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING0",
+        "PublicDescription": "This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x5C",
+        "UMask": "0x1",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "UMask": "0x2",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING123",
+        "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "Counter": "0,1,2,3",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/pipeline.json b/pmu-events/arch/x86/broadwellde/pipeline.json
new file mode 100644
index 0000000..920c89d
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/pipeline.json
@@ -0,0 +1,1427 @@
+[
+    {
+        "EventCode": "0x00",
+        "UMask": "0x1",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x3",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x2",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x8",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x07",
+        "UMask": "0x1",
+        "BriefDescription": "False dependencies in MOB due to partial compare",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x3",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x3",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x10",
+        "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x20",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x40",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when divider is busy executing divide operations",
+        "Counter": "0,1,2,3",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3c",
+        "UMask": "0x2",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x2",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4c",
+        "UMask": "0x1",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4C",
+        "UMask": "0x2",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the hardware prefetch.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x1",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x2",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x4",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "Invert": "1",
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "CounterMask": "1",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "UMask": "0x1",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "Counter": "0,1,2,3",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x41",
+        "BriefDescription": "Not taken macro-conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts not taken macro-conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x81",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x82",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x84",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts taken speculative and retired indirect branches excluding calls and return branches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x88",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "PublicDescription": "This event counts taken speculative and retired indirect branches that have a return mnemonic.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x90",
+        "BriefDescription": "Taken speculative and retired direct near calls",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "PublicDescription": "This event counts taken speculative and retired direct near calls.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xa0",
+        "BriefDescription": "Taken speculative and retired indirect calls",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "PublicDescription": "This event counts taken speculative and retired indirect calls including both register and memory indirect.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc1",
+        "BriefDescription": "Speculative and retired macro-conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired macro-conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc2",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc4",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc8",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xd0",
+        "BriefDescription": "Speculative and retired direct near calls",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired direct near calls.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xff",
+        "BriefDescription": "Speculative and retired  branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x41",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts not taken speculative and retired mispredicted macro conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x81",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts taken speculative and retired mispredicted macro conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x84",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x88",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches that have a return mnemonic.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xa0",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xc1",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xc4",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xff",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA0",
+        "UMask": "0x3",
+        "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports",
+        "Counter": "0,1,2,3",
+        "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF",
+        "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file.  The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*.  See the Broadwell Optimization Guide for more information.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 1.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 4.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 5.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 6.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x1",
+        "BriefDescription": "Resource-related stall cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.RS",
+        "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "CounterMask": "1",
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand* load request missing the L2 cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "CounterMask": "2",
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x4",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "CounterMask": "4",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x4",
+        "BriefDescription": "Total execution stalls.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x5",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "CounterMask": "5",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x5",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "CounterMask": "5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x6",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "CounterMask": "6",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x6",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "CounterMask": "6",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "CounterMask": "8",
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand load request missing the L1 data cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "CounterMask": "8",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0xc",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "CounterMask": "12",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0xc",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "CounterMask": "12",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "CounterMask": "3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Number of uops executed on the core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "CounterMask": "3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x0",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.ANY_P",
+        "Errata": "BDM61",
+        "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x1",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "PEBS": "2",
+        "Counter": "1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "Errata": "BDM11, BDM55",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x2",
+        "BriefDescription": "FP operations  retired. X87 FP operations that have no exceptions:",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.X87",
+        "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.ALL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterMask": "10",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x2",
+        "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "CounterMask": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x4",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x20",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x0",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "This event counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x1",
+        "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x4",
+        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "Errata": "BDW98",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x8",
+        "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x10",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x20",
+        "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x40",
+        "BriefDescription": "Far branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "Errata": "BDW98",
+        "PublicDescription": "This event counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x0",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "This event counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x1",
+        "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x4",
+        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x8",
+        "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.RET",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x20",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCC",
+        "UMask": "0x20",
+        "BriefDescription": "Count cases of saving new LBR",
+        "Counter": "0,1,2,3",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xe6",
+        "UMask": "0x1f",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellde/uncore-cache.json b/pmu-events/arch/x86/broadwellde/uncore-cache.json
new file mode 100644
index 0000000..58ed6d3
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/uncore-cache.json
@@ -0,0 +1,317 @@
+[
+    {
+        "BriefDescription": "Uncore cache clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_C_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "UNC_C_LLC_LOOKUP.ANY",
+        "Filter": "filter_state=0x1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x11",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x37",
+        "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.UNCACHEABLE",
+        "Filter": "filter_opc=0x187",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_READ",
+        "Filter": "filter_opc=0x187,filter_nc=1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_WRITE",
+        "Filter": "filter_opc=0x18f,filter_nc=1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+        "Filter": "filter_opc=0x190",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x191",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+        "Filter": "filter_opc=0x192",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+        "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+        "Filter": "filter_opc=0x180,filter_tid=0x3e",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x181",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_FULL",
+        "Filter": "filter_opc=0x18c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+        "Filter": "filter_opc=0x18d",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_WRITE",
+        "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "read requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "read requests to local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "read requests to remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "UMask": "0xC",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x20",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line response from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPS",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line forwarded from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x8",
+        "Unit": "HA"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellde/uncore-memory.json b/pmu-events/arch/x86/broadwellde/uncore-memory.json
new file mode 100644
index 0000000..f4b0745
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/uncore-memory.json
@@ -0,0 +1,86 @@
+[
+    {
+        "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0xC",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory controller clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_M_DCLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_POWER_CHANNEL_PPD",
+        "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_DCLOCKTICKS) * 100.",
+        "MetricName": "power_channel_ppd %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+        "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_DCLOCKTICKS) * 100.",
+        "MetricName": "power_critical_throttle_cycles %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles Memory is in self refresh power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x43",
+        "EventName": "UNC_M_POWER_SELF_REFRESH",
+        "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_DCLOCKTICKS) * 100.",
+        "MetricName": "power_self_refresh %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charges due to page misses",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for reads",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for writes",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "iMC"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellde/uncore-power.json b/pmu-events/arch/x86/broadwellde/uncore-power.json
new file mode 100644
index 0000000..dd1b956
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/uncore-power.json
@@ -0,0 +1,92 @@
+[
+    {
+        "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c0 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c3 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c6 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode.  This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xA",
+        "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+        "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "prochot_external_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_limit_thermal_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x6",
+        "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_os_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5",
+        "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_trans_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellde/virtual-memory.json b/pmu-events/arch/x86/broadwellde/virtual-memory.json
new file mode 100644
index 0000000..7d79c70
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellde/virtual-memory.json
@@ -0,0 +1,388 @@
+[
+    {
+        "EventCode": "0x08",
+        "UMask": "0x1",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x2",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x4",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x8",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0xe",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "Errata": "BDM69",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x20",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x40",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (2M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x60",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x1",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x2",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x4",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x8",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0xe",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "Errata": "BDM69",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x20",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x40",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (2M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x60",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "UMask": "0x10",
+        "BriefDescription": "Cycle count for an Extended Page table walk.",
+        "Counter": "0,1,2,3",
+        "EventName": "EPT.WALK_CYCLES",
+        "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x1",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x2",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x4",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x8",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0xe",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "Errata": "BDM69",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x20",
+        "BriefDescription": "Core misses that miss the  DTLB and hit the STLB (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x40",
+        "BriefDescription": "Code misses that miss the  DTLB and hit the STLB (2M).",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x60",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAE",
+        "UMask": "0x1",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x11",
+        "BriefDescription": "Number of DTLB page walker hits in the L1+FB.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x12",
+        "BriefDescription": "Number of DTLB page walker hits in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x14",
+        "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x18",
+        "BriefDescription": "Number of DTLB page walker hits in Memory.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x21",
+        "BriefDescription": "Number of ITLB page walker hits in the L1+FB.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x22",
+        "BriefDescription": "Number of ITLB page walker hits in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x24",
+        "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x1",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x20",
+        "BriefDescription": "STLB flush attempts",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/pmu-events/arch/x86/broadwellx/bdx-metrics.json
new file mode 100644
index 0000000..5a7f1ec
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellx/cache.json b/pmu-events/arch/x86/broadwellx/cache.json
new file mode 100644
index 0000000..bf0c512
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/cache.json
@@ -0,0 +1,965 @@
+[
+    {
+        "EventCode": "0x24",
+        "UMask": "0x21",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x22",
+        "BriefDescription": "RFO requests that miss L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x24",
+        "BriefDescription": "L2 cache misses when fetching instructions.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x27",
+        "BriefDescription": "Demand requests that miss L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x30",
+        "BriefDescription": "L2 prefetch requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.L2_PF_MISS",
+        "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x3f",
+        "BriefDescription": "All requests that miss L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x41",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x42",
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x44",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x50",
+        "BriefDescription": "L2 prefetch requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.L2_PF_HIT",
+        "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe1",
+        "BriefDescription": "Demand Data Read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe2",
+        "BriefDescription": "RFO requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe4",
+        "BriefDescription": "L2 code requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "This event counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe7",
+        "BriefDescription": "Demand requests to L2 cache.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xf8",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xff",
+        "BriefDescription": "All L2 requests.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "UMask": "0x50",
+        "BriefDescription": "Not rejected writebacks that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+        "PublicDescription": "This event counts the number of WB requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x41",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x4f",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "UMask": "0x1",
+        "BriefDescription": "L1D data line replacements",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "CounterMask": "1",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "CounterMask": "6",
+        "Errata": "BDM76",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "CounterMask": "1",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "CounterMask": "1",
+        "Errata": "BDM76",
+        "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles when L1D is locked",
+        "Counter": "0,1,2,3",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x2",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x4",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x8",
+        "BriefDescription": "Demand and prefetch data reads",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb2",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x11",
+        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x12",
+        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "SampleAfterValue": "100003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x21",
+        "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "Errata": "BDM35",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x41",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x42",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "SampleAfterValue": "100003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x81",
+        "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x82",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+        "SampleAfterValue": "2000003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "Errata": "BDM35",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x4",
+        "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x20",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+        "Errata": "BDM100, BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x40",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+        "Errata": "BDM100",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x1",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "Errata": "BDE70, BDM100",
+        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM",
+        "Errata": "BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM",
+        "Errata": "BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x20",
+        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD",
+        "Errata": "BDE70",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "PublicDescription": "This event counts Demand Data Read requests that access L2 cache, including rejects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x2",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.RFO",
+        "PublicDescription": "This event counts Read for Ownership (RFO) requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x4",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.CODE_RD",
+        "PublicDescription": "This event counts the number of L2 cache accesses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x8",
+        "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.ALL_PF",
+        "PublicDescription": "This event counts L2 or L3 HW prefetches that access L2 cache including rejects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x10",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L1D_WB",
+        "PublicDescription": "This event counts L1D writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x20",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_FILL",
+        "PublicDescription": "This event counts L2 fill requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x40",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "This event counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x80",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "PublicDescription": "This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x1",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.I",
+        "PublicDescription": "This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x2",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.S",
+        "PublicDescription": "This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x4",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.E",
+        "PublicDescription": "This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x7",
+        "BriefDescription": "L2 cache lines filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x5",
+        "BriefDescription": "Clean L2 cache lines evicted by demand.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf4",
+        "UMask": "0x10",
+        "BriefDescription": "Split locks in SQ",
+        "Counter": "0,1,2,3",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "This event counts the number of split locks in the super queue.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all requests that hit in the L3",
+        "MSRValue": "0x3f803c8fff",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c07f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c07f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0244",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "MSRValue": "0x3f803c0200",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
+        "MSRValue": "0x3f803c0100",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3",
+        "MSRValue": "0x3f803c0002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/floating-point.json b/pmu-events/arch/x86/broadwellx/floating-point.json
new file mode 100644
index 0000000..d7b9d9c
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/floating-point.json
@@ -0,0 +1,165 @@
+[
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x8",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "Errata": "BDM30",
+        "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x10",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "Errata": "BDM30",
+        "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x1",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x2",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x3",
+        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x4",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x10",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x15",
+        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.  ?.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
+        "SampleAfterValue": "2000006",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc7",
+        "UMask": "0x20",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x2a",
+        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
+        "SampleAfterValue": "2000005",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x3c",
+        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.PACKED",
+        "SampleAfterValue": "2000004",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x2",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x4",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x10",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x1e",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.ANY",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/frontend.json b/pmu-events/arch/x86/broadwellx/frontend.json
new file mode 100644
index 0000000..72781e1
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/frontend.json
@@ -0,0 +1,286 @@
+[
+    {
+        "EventCode": "0x79",
+        "UMask": "0x2",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.EMPTY",
+        "PublicDescription": "This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end.  It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of cycles  uops were  delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x20",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts the number of cycles 4  uops were  delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of cycles  uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_SWITCHES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x3c",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.HIT",
+        "PublicDescription": "This event counts the number of both cacheable and noncacheable Instruction Cache, Streaming Buffer and Victim Cache Reads including UC fetches.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x2",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.MISSES",
+        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes UC accesses.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.IFDATA_STALL",
+        "PublicDescription": "This event counts cycles during which the demand fetch waits for data (wfdM104H) from L2 or iSB (opportunistic hit).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterMask": "3",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is  delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xAB",
+        "UMask": "0x2",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "Counter": "0,1,2,3",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/memory.json b/pmu-events/arch/x86/broadwellx/memory.json
new file mode 100644
index 0000000..d79a5cf
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/memory.json
@@ -0,0 +1,679 @@
+[
+    {
+        "EventCode": "0x05",
+        "UMask": "0x1",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "PublicDescription": "This event counts speculative cache-line split load uops dispatched to the L1 cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "UMask": "0x2",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "PublicDescription": "This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times a TSX line had a cache conflict",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times we could not allocate Lock Buffer",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by  a vzeroupper instruction.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x4",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x8",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x10",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times we entered an HLE region; does not count nested transactions",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times HLE commit succeeded",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC1",
+        "PublicDescription": "Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC2",
+        "PublicDescription": "Number of times the TSX watchdog signaled an HLE abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC3",
+        "PublicDescription": "Number of times a disallowed operation caused an HLE abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC4",
+        "PublicDescription": "Number of times HLE caused a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC5",
+        "PublicDescription": "Number of times HLE aborted and was not due to the abort conditions in subevents 3-6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times we entered an RTM region; does not count nested transactions",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times RTM commit succeeded",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC1",
+        "PublicDescription": "Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC2",
+        "PublicDescription": "Number of times the TSX watchdog signaled an RTM abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC3",
+        "PublicDescription": "Number of times a disallowed operation caused an RTM abort.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC4",
+        "PublicDescription": "Number of times a RTM caused a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC5",
+        "PublicDescription": "Number of times RTM aborted and was not due to the abort conditions in subevents 3-6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 4",
+        "PEBS": "2",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above four.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 8",
+        "PEBS": "2",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above eight.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 16",
+        "PEBS": "2",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 16.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 32",
+        "PEBS": "2",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 32.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 64",
+        "PEBS": "2",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 64.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "2003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 128",
+        "PEBS": "2",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 128.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "1009",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 256",
+        "PEBS": "2",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 256.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "503",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 512",
+        "PEBS": "2",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "Errata": "BDM100, BDM35",
+        "PublicDescription": "This event counts loads with latency value being above 512.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "101",
+        "CounterHTOff": "3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all requests that miss in the L3",
+        "MSRValue": "0x3fbfc08fff",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x087fc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103fc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063bc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x06040007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+        "MSRValue": "0x3fbfc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0604000244",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+        "MSRValue": "0x3fbfc00244",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0604000122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+        "MSRValue": "0x3fbfc00122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x087fc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103fc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063bc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0604000091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+        "MSRValue": "0x3fbfc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "MSRValue": "0x3fbfc00200",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
+        "MSRValue": "0x3fbfc00100",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103fc00002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+        "MSRValue": "0x3fbfc00002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/other.json b/pmu-events/arch/x86/broadwellx/other.json
new file mode 100644
index 0000000..4475249
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/other.json
@@ -0,0 +1,44 @@
+[
+    {
+        "EventCode": "0x5C",
+        "UMask": "0x1",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING0",
+        "PublicDescription": "This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x5C",
+        "UMask": "0x1",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "UMask": "0x2",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING123",
+        "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "Counter": "0,1,2,3",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/pipeline.json b/pmu-events/arch/x86/broadwellx/pipeline.json
new file mode 100644
index 0000000..920c89d
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/pipeline.json
@@ -0,0 +1,1427 @@
+[
+    {
+        "EventCode": "0x00",
+        "UMask": "0x1",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x3",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x2",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x8",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x07",
+        "UMask": "0x1",
+        "BriefDescription": "False dependencies in MOB due to partial compare",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x3",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x3",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x10",
+        "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x20",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x40",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when divider is busy executing divide operations",
+        "Counter": "0,1,2,3",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3c",
+        "UMask": "0x2",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x2",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4c",
+        "UMask": "0x1",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4C",
+        "UMask": "0x2",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the hardware prefetch.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x1",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x2",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x4",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "Invert": "1",
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "CounterMask": "1",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "UMask": "0x1",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "Counter": "0,1,2,3",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x41",
+        "BriefDescription": "Not taken macro-conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts not taken macro-conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x81",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x82",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x84",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts taken speculative and retired indirect branches excluding calls and return branches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x88",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "PublicDescription": "This event counts taken speculative and retired indirect branches that have a return mnemonic.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x90",
+        "BriefDescription": "Taken speculative and retired direct near calls",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "PublicDescription": "This event counts taken speculative and retired direct near calls.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xa0",
+        "BriefDescription": "Taken speculative and retired indirect calls",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "PublicDescription": "This event counts taken speculative and retired indirect calls including both register and memory indirect.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc1",
+        "BriefDescription": "Speculative and retired macro-conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired macro-conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc2",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc4",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc8",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xd0",
+        "BriefDescription": "Speculative and retired direct near calls",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired direct near calls.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xff",
+        "BriefDescription": "Speculative and retired  branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x41",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts not taken speculative and retired mispredicted macro conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x81",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "PublicDescription": "This event counts taken speculative and retired mispredicted macro conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x84",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x88",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches that have a return mnemonic.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xa0",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xc1",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xc4",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "PublicDescription": "This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xff",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted branch instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA0",
+        "UMask": "0x3",
+        "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports",
+        "Counter": "0,1,2,3",
+        "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF",
+        "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file.  The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*.  See the Broadwell Optimization Guide for more information.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 1.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 4.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 5.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per core when uops are exectuted in port 6.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+        "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x1",
+        "BriefDescription": "Resource-related stall cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.RS",
+        "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "CounterMask": "1",
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand* load request missing the L2 cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "CounterMask": "2",
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x4",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "CounterMask": "4",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x4",
+        "BriefDescription": "Total execution stalls.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x5",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "CounterMask": "5",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x5",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "CounterMask": "5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x6",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "CounterMask": "6",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x6",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "CounterMask": "6",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "CounterMask": "8",
+        "PublicDescription": "Counts number of cycles the CPU has at least one pending  demand load request missing the L1 data cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "CounterMask": "8",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0xc",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "CounterMask": "12",
+        "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0xc",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "CounterMask": "12",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "CounterMask": "3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Number of uops executed on the core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "CounterMask": "3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x0",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.ANY_P",
+        "Errata": "BDM61",
+        "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x1",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "PEBS": "2",
+        "Counter": "1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "Errata": "BDM11, BDM55",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x2",
+        "BriefDescription": "FP operations  retired. X87 FP operations that have no exceptions:",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.X87",
+        "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.ALL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterMask": "10",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x2",
+        "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "CounterMask": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x4",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x20",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x0",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "This event counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x1",
+        "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x4",
+        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "Errata": "BDW98",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x8",
+        "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x10",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x20",
+        "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x40",
+        "BriefDescription": "Far branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "Errata": "BDW98",
+        "PublicDescription": "This event counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x0",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "This event counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x1",
+        "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x4",
+        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x8",
+        "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.RET",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x20",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCC",
+        "UMask": "0x20",
+        "BriefDescription": "Count cases of saving new LBR",
+        "Counter": "0,1,2,3",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xe6",
+        "UMask": "0x1f",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/broadwellx/uncore-cache.json b/pmu-events/arch/x86/broadwellx/uncore-cache.json
new file mode 100644
index 0000000..58ed6d3
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/uncore-cache.json
@@ -0,0 +1,317 @@
+[
+    {
+        "BriefDescription": "Uncore cache clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_C_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "UNC_C_LLC_LOOKUP.ANY",
+        "Filter": "filter_state=0x1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x11",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x37",
+        "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.UNCACHEABLE",
+        "Filter": "filter_opc=0x187",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_READ",
+        "Filter": "filter_opc=0x187,filter_nc=1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_WRITE",
+        "Filter": "filter_opc=0x18f,filter_nc=1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+        "Filter": "filter_opc=0x190",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x191",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+        "Filter": "filter_opc=0x192",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+        "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+        "Filter": "filter_opc=0x180,filter_tid=0x3e",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x181",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_FULL",
+        "Filter": "filter_opc=0x18c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+        "Filter": "filter_opc=0x18d",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_WRITE",
+        "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "read requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "read requests to local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "read requests to remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "UMask": "0xC",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x20",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line response from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPS",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line forwarded from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x8",
+        "Unit": "HA"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellx/uncore-interconnect.json b/pmu-events/arch/x86/broadwellx/uncore-interconnect.json
new file mode 100644
index 0000000..8249613
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/uncore-interconnect.json
@@ -0,0 +1,28 @@
+[
+    {
+        "BriefDescription": "QPI clock ticks",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x14",
+        "EventName": "UNC_Q_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data",
+        "Counter": "0,1,2,3",
+        "EventName": "QPI_DATA_BANDWIDTH_TX",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x2",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data",
+        "Counter": "0,1,2,3",
+        "EventName": "QPI_CTL_BANDWIDTH_TX",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x4",
+        "Unit": "QPI LL"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellx/uncore-memory.json b/pmu-events/arch/x86/broadwellx/uncore-memory.json
new file mode 100644
index 0000000..66eed39
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/uncore-memory.json
@@ -0,0 +1,86 @@
+[
+    {
+        "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0xC",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory controller clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_POWER_CHANNEL_PPD",
+        "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_channel_ppd %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+        "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_critical_throttle_cycles %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles Memory is in self refresh power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x43",
+        "EventName": "UNC_M_POWER_SELF_REFRESH",
+        "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_self_refresh %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charges due to page misses",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for reads",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for writes",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "iMC"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellx/uncore-power.json b/pmu-events/arch/x86/broadwellx/uncore-power.json
new file mode 100644
index 0000000..dd1b956
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/uncore-power.json
@@ -0,0 +1,92 @@
+[
+    {
+        "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c0 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c3 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c6 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode.  This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xA",
+        "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+        "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "prochot_external_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_limit_thermal_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x6",
+        "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_os_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5",
+        "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_trans_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    }
+]
diff --git a/pmu-events/arch/x86/broadwellx/virtual-memory.json b/pmu-events/arch/x86/broadwellx/virtual-memory.json
new file mode 100644
index 0000000..7d79c70
--- /dev/null
+++ b/pmu-events/arch/x86/broadwellx/virtual-memory.json
@@ -0,0 +1,388 @@
+[
+    {
+        "EventCode": "0x08",
+        "UMask": "0x1",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x2",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x4",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x8",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0xe",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "Errata": "BDM69",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x20",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x40",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (2M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x60",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x1",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x2",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x4",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x8",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0xe",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "Errata": "BDM69",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x20",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x40",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (2M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x60",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "UMask": "0x10",
+        "BriefDescription": "Cycle count for an Extended Page table walk.",
+        "Counter": "0,1,2,3",
+        "EventName": "EPT.WALK_CYCLES",
+        "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x1",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x2",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x4",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x8",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G  page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0xe",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "Errata": "BDM69",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "Errata": "BDM69",
+        "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x20",
+        "BriefDescription": "Core misses that miss the  DTLB and hit the STLB (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x40",
+        "BriefDescription": "Code misses that miss the  DTLB and hit the STLB (2M).",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x60",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAE",
+        "UMask": "0x1",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x11",
+        "BriefDescription": "Number of DTLB page walker hits in the L1+FB.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x12",
+        "BriefDescription": "Number of DTLB page walker hits in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x14",
+        "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x18",
+        "BriefDescription": "Number of DTLB page walker hits in Memory.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x21",
+        "BriefDescription": "Number of ITLB page walker hits in the L1+FB.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x22",
+        "BriefDescription": "Number of ITLB page walker hits in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x24",
+        "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+        "Errata": "BDM69, BDM98",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x1",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x20",
+        "BriefDescription": "STLB flush attempts",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmont/cache.json b/pmu-events/arch/x86/goldmont/cache.json
new file mode 100644
index 0000000..f8bbe08
--- /dev/null
+++ b/pmu-events/arch/x86/goldmont/cache.json
@@ -0,0 +1,2045 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache request misses"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.",
+        "EventCode": "0x30",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "L2_REJECT_XQ.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests rejected by the XQ"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to ensure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.",
+        "EventCode": "0x31",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CORE_REJECT_L2Q.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests rejected by the L2Q"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory.  No count will occur if the evicted line is clean, and hence does not require a writeback.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DL1.DIRTY_EVICTION",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1 Cache evictions for dirty data"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss.  Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.",
+        "EventCode": "0x86",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "EventCode": "0xB7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts locked memory uops retired.  This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.)  A locked access is one with a lock prefix, or an exchange to memory.  See the SDM for a complete description of which memory load accesses are locks.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Locked load uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x43",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of load uops retired.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of store uops retired.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x83",
+        "EventName": "MEM_UOPS_RETIRED.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that hit the L1 data cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that hit in the L2 cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that miss the L1 data cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that miss in the L2 cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired where the cache line containing the data was in the modified state of another core or modules cache (HITM).  More specifically, this means that when the load address was checked by other caching agents (typically another processor) in the system, one of those caching agents indicated that they had a dirty copy of the data.  Loads that obtain a HITM response incur greater latency than most is typical for a load.  In addition, since HITM indicates that some other processor had this data in its cache, it implies that the data was shared between processors, or potentially was a lock or semaphore value.  This event is useful for locating sharing, false sharing, and contended locks.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts memory load uops retired where the data is retrieved from the WCB (or fill buffer), indicating that the load found its data while that data was in the process of being brought into the L1 cache.  Typically a load will receive this indication when some other load or prefetch missed the L1 cache and was in the process of retrieving the cache line containing the data, but that process had not yet finished (and written the data back to the cache). For example, consider load X and Y, both referencing the same cache line that is not in the L1 cache.  If load X misses cache first, it obtains and WCB (or fill buffer) and begins the process of requesting the data.  When load Y requests the data, it will either hit the WCB, or the L1 cache, depending on exactly what time the request to Y occurs.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts memory load uops retired where the data is retrieved from DRAM.  Event is counted at retirement, so the speculative loads are ignored.  A memory load can hit (or miss) the L1 cache, hit (or miss) the L2 cache, hit DRAM, hit in the WCB or receive a HITM response.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x40000032b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x36000032b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x10000032b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x04000032b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x02000032b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x00000432b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x00000132b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000043091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000013091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000003010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600003010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000003010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400003010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200003010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000043010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000013010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000048000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000018000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000004800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600004800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000004800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400004800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200004800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000044800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000014800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000004000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600004000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000004000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400004000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200004000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000044000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000014000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000002000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600002000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000002000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400002000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200002000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000042000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000012000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000001000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600001000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000001000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400001000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200001000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000041000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000011000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.ANY",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x3600000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmont/frontend.json b/pmu-events/arch/x86/goldmont/frontend.json
new file mode 100644
index 0000000..9ba0851
--- /dev/null
+++ b/pmu-events/arch/x86/goldmont/frontend.json
@@ -0,0 +1,52 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit).  The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT.  Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the Instruction Cache (ICache)  for one or more bytes in an ICache Line and that cache line is not in the ICache (miss).  The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS.  Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line.  The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS.  Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "ICACHE.ACCESSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM.  The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine.  Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops.  The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.",
+        "EventCode": "0xE7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MS_DECODED.MS_ENTRY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "MS decode starts"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.",
+        "EventCode": "0xE9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Decode restrictions due to predicting wrong instruction length"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmont/memory.json b/pmu-events/arch/x86/goldmont/memory.json
new file mode 100644
index 0000000..690cebd
--- /dev/null
+++ b/pmu-events/arch/x86/goldmont/memory.json
@@ -0,0 +1,294 @@
+[
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.",
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops that split a page (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.",
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops that split a page (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears due to memory ordering issues.  This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved as another core is in the process of modifying the data.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Machine clears due to memory ordering issue"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x20000032b7 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000022 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000003010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000008000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000004800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000004000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000002000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000001000 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000800 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000400 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000200 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000100 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000080 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000020 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000010 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000008 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000004 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000002 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address.",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmont/other.json b/pmu-events/arch/x86/goldmont/other.json
new file mode 100644
index 0000000..959cadd
--- /dev/null
+++ b/pmu-events/arch/x86/goldmont/other.json
@@ -0,0 +1,82 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes.  This will include cycles due to an ITLB miss, ICache miss and other events.",
+        "EventCode": "0x86",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "FETCH_STALL.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to any reason."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss.  Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
+        "EventCode": "0x86",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to an outstanding ITLB miss."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource  in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Unfilled issue slots per cycle"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend.  Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable.   Note that uops must be available for consumption in order for this event to fire.  If a uop is not available (Instruction Queue is empty), this event will not count.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows).   Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Unfilled issue slots per cycle to recover"
+    },
+    {
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts hardware interrupts received by the processor.",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "SampleAfterValue": "203",
+        "BriefDescription": "Hardware interrupts received"
+    },
+    {
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HW_INTERRUPTS.MASKED",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles hardware interrupts are masked"
+    },
+    {
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles pending interrupts are masked"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmont/pipeline.json b/pmu-events/arch/x86/goldmont/pipeline.json
new file mode 100644
index 0000000..254788a
--- /dev/null
+++ b/pmu-events/arch/x86/goldmont/pipeline.json
@@ -0,0 +1,455 @@
+[
+    {
+        "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers.  This event uses fixed counter 0.  You cannot collect a PEBs record for this event.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired (Fixed event)"
+    },
+    {
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.  This event uses fixed counter 1.  You cannot collect a PEBs record for this event.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when core is not halted  (Fixed event)"
+    },
+    {
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction.  In mobile systems the core frequency may change from time.  This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  This event uses fixed counter 2.  You cannot collect a PEBs record for this event.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when core is not halted  (Fixed event)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time.  The forward might occur subsequently when the data is available.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "LD_BLOCKS.4K_ALIAS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.UTLB_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts anytime a load that retires is blocked for any reason.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "LD_BLOCKS.ALL_BLOCK",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine.  This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Uops issued to the back end per cycle"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Core cycles when core is not halted.  This event uses a (_P)rogrammable general purpose performance counter.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when core is not halted"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Reference cycles when core is not halted.  This event uses a programmable general purpose performance counter.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when core is not halted"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound.  When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources.  When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks.  However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all.  Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots.  These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots.  A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock.  The low uop issue rate for a stream of INC instructions is considered to be a back end issue.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "UOPS_NOT_DELIVERED.ANY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Uops requested but not-delivered to the back-end per cycle"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers.  This is an architectural performance event.  This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable:  The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event.  Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts uops which retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts uops retired that are from the complex flows issued by the micro-sequencer (MS).  Counts both the uops from a micro-coded instruction, and the uops that might be generated from a micro-coded assist.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.MS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "MS uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of floating point divide uops retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Floating point divide uops retired. (Precise Event Capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of integer divide uops retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_RETIRED.IDIV",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Integer divide uops retired. (Precise Event Capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears for any reason.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "MACHINE_CLEARS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All machine clears"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification.  Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears due to floating point (FP) operations needing assists.  For instance, if the result was a floating point denormal, the hardware clears the pipeline and reissues uops to produce the correct IEEE compliant denormal result.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Machine clears due to FP assists"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears due to memory disambiguation.  Memory disambiguation happens when a load which has been issued conflicts with a previous unretired store in the pipeline whose address was not known at issue time, but is later resolved to be the same as the load address.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Machine clears due to memory disambiguation"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts branch instructions retired for all branch types.  This is an architectural performance event.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7e",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired conditional branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired taken branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts far branch instructions retired.  This includes far jump, far call and return, and Interrupt call and return.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xbf",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired far branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xeb",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near return branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf7",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near return instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near CALL branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf9",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near indirect CALL branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfb",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near indirect call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near relative CALL branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfd",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near relative call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfe",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7e",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xeb",
+        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf7",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfb",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfe",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts core cycles if either divide unit is busy.",
+        "EventCode": "0xCD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CYCLES_DIV_BUSY.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a divider is busy"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts core cycles the integer divide unit is busy.",
+        "EventCode": "0xCD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLES_DIV_BUSY.IDIV",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles the integer divide unit is busy"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts core cycles the floating point divide unit is busy.",
+        "EventCode": "0xCD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLES_DIV_BUSY.FPDIV",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles the FP divide unit is busy"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call,  Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEARS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "BACLEARs asserted for any branch type"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts BACLEARS on return instructions.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BACLEARS.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "BACLEARs asserted for return branch"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BACLEARS.COND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "BACLEARs asserted for conditional branch"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmont/virtual-memory.json b/pmu-events/arch/x86/goldmont/virtual-memory.json
new file mode 100644
index 0000000..9805198
--- /dev/null
+++ b/pmu-events/arch/x86/goldmont/virtual-memory.json
@@ -0,0 +1,75 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts every core cycle when a Data-side (walks due to a data operation) page walk is in progress.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Duration of D-side page-walks in cycles"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts every core cycle when a Instruction-side (walks due to an instruction fetch) page walk is in progress.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Duration of I-side pagewalks in cycles"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts every core cycle a page-walk is in progress due to either a data memory operation or an instruction fetch.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Duration of page-walks in cycles"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch.  It counts when new translation are filled into the ITLB.  The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+        "EventCode": "0x81",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "ITLB misses"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that caused a DTLB miss.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts store uops retired that caused a DTLB miss.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts uops retired that had a DTLB miss on load, store or either.  Note that when two distinct memory operations to the same page miss the DTLB, only one of them will be recorded as a DTLB miss.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x13",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmontplus/cache.json b/pmu-events/arch/x86/goldmontplus/cache.json
new file mode 100644
index 0000000..b4791b4
--- /dev/null
+++ b/pmu-events/arch/x86/goldmontplus/cache.json
@@ -0,0 +1,1453 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache request misses"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache requests"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.",
+        "EventCode": "0x30",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "L2_REJECT_XQ.ALL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests rejected by the XQ"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.",
+        "EventCode": "0x31",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "CORE_REJECT_L2Q.ALL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests rejected by the L2Q"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory.  No count will occur if the evicted line is clean, and hence does not require a writeback.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DL1.REPLACEMENT",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1 Cache evictions for dirty data"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss.  Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.",
+        "EventCode": "0x86",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "EventCode": "0xB7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts locked memory uops retired.  This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.)  A locked access is one with a lock prefix, or an exchange to memory.  See the SDM for a complete description of which memory load accesses are locks.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Locked load uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x43",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of load uops retired.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of store uops retired.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x83",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that hit the L1 data cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that hit in the L2 cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that miss the L1 data cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that miss in the L2 cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired where the cache line containing the data was in the modified state of another core or modules cache (HITM).  More specifically, this means that when the load address was checked by other caching agents (typically another processor) in the system, one of those caching agents indicated that they had a dirty copy of the data.  Loads that obtain a HITM response incur greater latency than most is typical for a load.  In addition, since HITM indicates that some other processor had this data in its cache, it implies that the data was shared between processors, or potentially was a lock or semaphore value.  This event is useful for locating sharing, false sharing, and contended locks.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts memory load uops retired where the data is retrieved from the WCB (or fill buffer), indicating that the load found its data while that data was in the process of being brought into the L1 cache.  Typically a load will receive this indication when some other load or prefetch missed the L1 cache and was in the process of retrieving the cache line containing the data, but that process had not yet finished (and written the data back to the cache). For example, consider load X and Y, both referencing the same cache line that is not in the L1 cache.  If load X misses cache first, it obtains and WCB (or fill buffer) and begins the process of requesting the data.  When load Y requests the data, it will either hit the WCB, or the L1 cache, depending on exactly what time the request to Y occurs.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts memory load uops retired where the data is retrieved from DRAM.  Event is counted at retirement, so the speculative loads are ignored.  A memory load can hit (or miss) the L1 cache, hit (or miss) the L2 cache, hit DRAM, hit in the WCB or receive a HITM response.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000011000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000041000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200001000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000001000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000001000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000012000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000042000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200002000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000002000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000002000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000014800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000044800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200004800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000004800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000004800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000018000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000013010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000043010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200003010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000003010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000003010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000013091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000043091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000003091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000040022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x00000132b7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x00000432b7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x02000032b7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x10000032b7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "Offcore": "1"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "EventCode": "0xB7",
+        "MSRValue": "0x40000032b7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
+        "PDIR_COUNTER": "na",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmontplus/frontend.json b/pmu-events/arch/x86/goldmontplus/frontend.json
new file mode 100644
index 0000000..a787896
--- /dev/null
+++ b/pmu-events/arch/x86/goldmontplus/frontend.json
@@ -0,0 +1,62 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit).  The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT.  Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ICACHE.HIT",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the Instruction Cache (ICache)  for one or more bytes in an ICache Line and that cache line is not in the ICache (miss).  The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS.  Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ICACHE.MISSES",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line.  The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS.  Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ICACHE.ACCESSES",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM.  The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine.  Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops.  The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.",
+        "EventCode": "0xE7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MS_DECODED.MS_ENTRY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "MS decode starts"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.",
+        "EventCode": "0xE9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Decode restrictions due to predicting wrong instruction length"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmontplus/memory.json b/pmu-events/arch/x86/goldmontplus/memory.json
new file mode 100644
index 0000000..91e0815
--- /dev/null
+++ b/pmu-events/arch/x86/goldmontplus/memory.json
@@ -0,0 +1,38 @@
+[
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.",
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops that split a page (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.",
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops that split a page (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears due to memory ordering issues.  This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "Machine clears due to memory ordering issue"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmontplus/other.json b/pmu-events/arch/x86/goldmontplus/other.json
new file mode 100644
index 0000000..b860374
--- /dev/null
+++ b/pmu-events/arch/x86/goldmontplus/other.json
@@ -0,0 +1,98 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes.  This will include cycles due to an ITLB miss, ICache miss and other events.",
+        "EventCode": "0x86",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "FETCH_STALL.ALL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to any reason."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss.  Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
+        "EventCode": "0x86",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles the code-fetch stalls and an ITLB miss is outstanding."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource  in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Unfilled issue slots per cycle"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend.  Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable.   Note that uops must be available for consumption in order for this event to fire.  If a uop is not available (Instruction Queue is empty), this event will not count.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows).   Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Unfilled issue slots per cycle to recover"
+    },
+    {
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts hardware interrupts received by the processor.",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "203",
+        "BriefDescription": "Hardware interrupts received"
+    },
+    {
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "HW_INTERRUPTS.MASKED",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles hardware interrupts are masked"
+    },
+    {
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles pending interrupts are masked"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmontplus/pipeline.json b/pmu-events/arch/x86/goldmontplus/pipeline.json
new file mode 100644
index 0000000..ccf1aed
--- /dev/null
+++ b/pmu-events/arch/x86/goldmontplus/pipeline.json
@@ -0,0 +1,544 @@
+[
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers.  This event uses fixed counter 0.  You cannot collect a PEBs record for this event.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "PEBScounters": "32",
+        "EventName": "INST_RETIRED.ANY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired (Fixed event)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.  This event uses fixed counter 1.  You cannot collect a PEBs record for this event.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "PEBScounters": "33",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when core is not halted  (Fixed event)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction.  In mobile systems the core frequency may change from time.  This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  This event uses fixed counter 2.  You cannot collect a PEBs record for this event.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "PEBScounters": "34",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when core is not halted  (Fixed event)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time.  The forward might occur subsequently when the data is available.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LD_BLOCKS.4K_ALIAS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LD_BLOCKS.UTLB_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts anytime a load that retires is blocked for any reason.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "LD_BLOCKS.ALL_BLOCK",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine.  This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Uops issued to the back end per cycle"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Core cycles when core is not halted.  This event uses a (_P)rogrammable general purpose performance counter.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when core is not halted"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Reference cycles when core is not halted.  This event uses a (_P)rogrammable general purpose performance counter.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when core is not halted"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound.  When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources.  When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks.  However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all.  Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots.  These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots.  A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock.  The low uop issue rate for a stream of INC instructions is considered to be a back end issue.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "UOPS_NOT_DELIVERED.ANY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Uops requested but not-delivered to the back-end per cycle"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers.  This is an architectural performance event.  This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable:  The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event.  Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts INST_RETIRED.ANY using the Reduced Skid PEBS feature that reduces the shadow in which events aren't counted allowing for a more unbiased distribution of samples across instructions retired.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired - using Reduced Skid PEBS feature"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts uops which retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.ANY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts uops retired that are from the complex flows issued by the micro-sequencer (MS).  Counts both the uops from a micro-coded instruction, and the uops that might be generated from a micro-coded assist.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.MS",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "MS uops retired (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of floating point divide uops retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Floating point divide uops retired (Precise Event Capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of integer divide uops retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.IDIV",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Integer divide uops retired (Precise Event Capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears for any reason.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.ALL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "All machine clears"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification.  Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears due to floating point (FP) operations needing assists.  For instance, if the result was a floating point denormal, the hardware clears the pipeline and reissues uops to produce the correct IEEE compliant denormal result.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "Machine clears due to FP assists"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts machine clears due to memory disambiguation.  Memory disambiguation happens when a load which has been issued conflicts with a previous unretired store in the pipeline whose address was not known at issue time, but is later resolved to be the same as the load address.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "Machine clears due to memory disambiguation"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times that the machines clears due to a page fault. Covers both I-side and D-side(Loads/Stores) page faults. A page fault occurs when either page is not present, or an access violation",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "Machines clear due to a page fault"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts branch instructions retired for all branch types.  This is an architectural performance event.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7e",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired conditional branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts the number of taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired taken branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts far branch instructions retired.  This includes far jump, far call and return, and Interrupt call and return.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xbf",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired far branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xeb",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near return branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf7",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near return instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near CALL branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf9",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near indirect CALL branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfb",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near indirect call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts near relative CALL branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfd",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired near relative call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfe",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7e",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xeb",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf7",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfb",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0xfe",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts core cycles if either divide unit is busy.",
+        "EventCode": "0xCD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "CYCLES_DIV_BUSY.ALL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a divider is busy"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts core cycles the integer divide unit is busy.",
+        "EventCode": "0xCD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "CYCLES_DIV_BUSY.IDIV",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles the integer divide unit is busy"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts core cycles the floating point divide unit is busy.",
+        "EventCode": "0xCD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "CYCLES_DIV_BUSY.FPDIV",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles the FP divide unit is busy"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call,  Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BACLEARS.ALL",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "BACLEARs asserted for any branch type"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts BACLEARS on return instructions.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BACLEARS.RETURN",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "BACLEARs asserted for return branch"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "BACLEARS.COND",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "BACLEARs asserted for conditional branch"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/goldmontplus/virtual-memory.json b/pmu-events/arch/x86/goldmontplus/virtual-memory.json
new file mode 100644
index 0000000..0b53a3b
--- /dev/null
+++ b/pmu-events/arch/x86/goldmontplus/virtual-memory.json
@@ -0,0 +1,218 @@
+[
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walk completed due to a demand load to a 4K page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 2M or 4M pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walk completed due to a demand load to a 2M or 4M page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 1GB pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1GB",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walk completed due to a demand load to a 1GB page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts once per cycle for each page walk occurring due to a load (demand data loads or SW prefetches). Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walks outstanding due to a demand load every cycle."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to a demand data store to a 4K page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M or 4M pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M or 4M page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1GB pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1GB",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1GB page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts once per cycle for each page walk occurring due to a demand data store. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walks outstanding due to a demand data store every cycle."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts once per cycle for each page walk only while traversing the Extended Page Table (EPT), and does not count during the rest of the translation.  The EPT is used for translating Guest-Physical Addresses to Physical Addresses for Virtual Machine Monitors (VMMs).  Average cycles per walk can be calculated by dividing the count by number of walks.",
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "EPT.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walks outstanding due to walking the EPT every cycle"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch.  It counts when new translation are filled into the ITLB.  The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+        "EventCode": "0x81",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ITLB.MISS",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "ITLB misses"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to an instruction fetch in a 4K page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 2M or 4M pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to an instruction fetch in a 2M or 4M page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 1GB pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1GB",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to an instruction fetch in a 1GB page"
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts once per cycle for each page walk occurring due to an instruction fetch. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Page walks outstanding due to an instruction fetch every cycle."
+    },
+    {
+        "CollectPEBSRecord": "1",
+        "PublicDescription": "Counts STLB flushes.  The TLBs are flushed on instructions like INVLPG and MOV to CR3.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "TLB_FLUSHES.STLB_ANY",
+        "PDIR_COUNTER": "na",
+        "SampleAfterValue": "20003",
+        "BriefDescription": "STLB flushes"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts load uops retired that caused a DTLB miss.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts store uops retired that caused a DTLB miss.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
+    },
+    {
+        "PEBS": "2",
+        "CollectPEBSRecord": "2",
+        "PublicDescription": "Counts uops retired that had a DTLB miss on load, store or either.  Note that when two distinct memory operations to the same page miss the DTLB, only one of them will be recorded as a DTLB miss.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x13",
+        "PEBScounters": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/cache.json b/pmu-events/arch/x86/haswell/cache.json
new file mode 100644
index 0000000..da4d6dd
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/cache.json
@@ -0,0 +1,1064 @@
+[
+    {
+        "PublicDescription": "Demand data read requests that missed L2, no rejects.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x22",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand requests that miss L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x27",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.L2_PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 prefetch requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "All requests that missed L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand data read requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x44",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x50",
+        "EventName": "L2_RQSTS.L2_PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 prefetch requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe1",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 store RFO requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe2",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 code requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe4",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand requests to L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe7",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf8",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "All requests to L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "Errata": "HSD78",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All L2 requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks that hit L2 cache.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x50",
+        "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.REQUEST_FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch. HWP are e.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts when new data lines are brought into the L1 Data cache, which cause other lines to be evicted from the cache.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding demand data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD78, HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD78, HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD78, HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding Demand code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "HSD62, HSD61",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles in which the L1D is locked.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand data read requests sent to uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD78",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand code read requests sent to uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "Errata": "HSD76, HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with locked access. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts all store uops retired. This is a precise event.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store uops. (precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD76, HSD29, HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops misses in L1 cache as data sources.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "HSD29, HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "Errata": "HSM30",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD29, HSD25, HSM26, HSM30",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD29, HSD25, HSM26, HSM30",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "HSD29, HSD25, HSM26, HSM30",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD74, HSD29, HSD25, HSM30",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PublicDescription": "Demand data read requests that access L2 cache.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFO requests that access L2 cache.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANS.RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache accesses when fetching instructions.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANS.CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Any MLC or L3 HW prefetch accessing L2, including rejects.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L1D writebacks that access L2 cache.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANS.L1D_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 fill requests that access L2 cache.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANS.L2_FILL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 writebacks that access L2 cache.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Transactions accessing L2 pipe.",
+        "EventCode": "0xf0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in I state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_IN.I",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in S state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in E state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache.  Lines are filled into the L2 cache when there was an L2 miss.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Clean L2 cache lines evicted by demand.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by demand",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by demand",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "",
+        "EventCode": "0xf4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Split locks in SQ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c8fff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all requests that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c07f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c07f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/floating-point.json b/pmu-events/arch/x86/haswell/floating-point.json
new file mode 100644
index 0000000..f9843e5
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/floating-point.json
@@ -0,0 +1,83 @@
+[
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "HSD56, HSM57",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "HSD56, HSM57",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.",
+        "EventCode": "0xC6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "AVX_INSTS.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores.  May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of X87 FP assists due to output values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of X87 FP assists due to input values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD FP assists due to output values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD FP assists due to input values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/frontend.json b/pmu-events/arch/x86/haswell/frontend.json
new file mode 100644
index 0000000..c0a5bed
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/frontend.json
@@ -0,0 +1,294 @@
+[
+    {
+        "PublicDescription": "Counts cycles the IDQ is empty.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD135",
+        "EventName": "IDQ.EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles MITE is delivered at least one uop. Set Cmask = 1.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops delivered to IDQ from any path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts Instruction Cache (ICACHE) misses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE.IFETCH_STALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE.IFDATA_STALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts the number cycles during which the Front-end allocated exactly zero uops to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled.  This event is counted on a per-core basis.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/hsw-metrics.json b/pmu-events/arch/x86/haswell/hsw-metrics.json
new file mode 100644
index 0000000..5ab5c78
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -0,0 +1,158 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/haswell/memory.json b/pmu-events/arch/x86/haswell/memory.json
new file mode 100644
index 0000000..e5f9fa6
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/memory.json
@@ -0,0 +1,676 @@
+[
+    {
+        "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative cache-line split store-address uops dispatched to L1D.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional writes.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "TX_EXEC.MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "TX_EXEC.MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "TX_EXEC.MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "TX_EXEC.MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of memory ordering machine clears detected. Memory ordering machine clears can result from memory address aliasing or snoops from another hardware thread or core to data inflight in the pipeline.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "HLE_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution started.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution successfully committed.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "HLE_RETIRED.ABORTED_MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "HLE_RETIRED.ABORTED_MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "HLE_RETIRED.ABORTED_MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "Errata": "HSD65",
+        "EventName": "HLE_RETIRED.ABORTED_MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts).",
+        "EventCode": "0xc8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "HLE_RETIRED.ABORTED_MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RTM_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution started.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution successfully committed.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RTM_RETIRED.ABORTED_MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RTM_RETIRED.ABORTED_MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RTM_RETIRED.ABORTED_MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "Errata": "HSD65",
+        "EventName": "RTM_RETIRED.ABORTED_MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "EventCode": "0xc9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RTM_RETIRED.ABORTED_MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 4.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Loads with latency value being above 8.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Loads with latency value being above 16.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 32.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Loads with latency value being above 64.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Loads with latency value being above 128.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Loads with latency value being above 256.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x1",
+        "Errata": "HSD76, HSD25, HSM26",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Loads with latency value being above 512.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc08fff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all requests that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01004007f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc007f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss in the L3",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/other.json b/pmu-events/arch/x86/haswell/other.json
new file mode 100644
index 0000000..8a4d898
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/other.json
@@ -0,0 +1,43 @@
+[
+    {
+        "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPL_CYCLES.RING0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPL_CYCLES.RING123",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/pipeline.json b/pmu-events/arch/x86/haswell/pipeline.json
new file mode 100644
index 0000000..a4dcfce
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/pipeline.json
@@ -0,0 +1,1338 @@
+[
+    {
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "Errata": "HSD140, HSD143",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "loads blocked by overlapping with store buffer that cannot be forwarded",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K.  This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline which can have a performance impact.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles spent waiting for a recovery after an event such as a processor nuke, JEClear, assist, hle/rtm abort etc.",
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops issued by the Front-end of the pipeline to the Back-end. This event is counted at the allocation stage and will count both retired and non-retired uops.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of flags-merge uops allocated. Such uops add delay.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (for example, 2 sources + immediate) regardless of whether it is a result of LEA instruction or not.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ARITH.DIVIDER_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3c",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+        "EventCode": "0x4c",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+        "EventCode": "0x4c",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of integer move elimination candidate uops that were eliminated.",
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD move elimination candidate uops that were eliminated.",
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of integer move elimination candidate uops that were not eliminated.",
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD move elimination candidate uops that were not eliminated.",
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the Reservation Station ( RS ) is empty for the thread. The RS is a structure that buffers allocated micro-ops from the Front-end. If there are many cycles when the RS is empty, it may represent an underflow of instructions delivered from the Front-end.",
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).",
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Stall cycles due to IQ is full.",
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stall cycles because IQ is full",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 0 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are exectuted in port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are executed in port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are exectuted in port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are executed in port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are exectuted in port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are executed in port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are exectuted in port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are executed in port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are exectuted in port 6.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are executed in port 6.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles allocation is stalled due to resource related reason.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD135",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD78",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending L2 cache miss loads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with pending memory loads. Set Cmask=2 to count cycle.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending memory loads.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of loads missed L2.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to L2 cache misses.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline and there were memory instructions pending (waiting for data).",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to memory subsystem.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending L1 cache miss loads.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to L1 data cache misses",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Number of uops delivered by the LSD.",
+        "EventCode": "0xa8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of uops executed on the core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "HSD30, HSM31",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instructions at retirement.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "Errata": "HSD11, HSD140",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "Errata": "HSD140",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "CounterHTOff": "1"
+    },
+    {
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.",
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Actually retired uops.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Data_LA": "1"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Branch instructions at retirement.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Return instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of not taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of far branches retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Mispredicted branch instructions at retirement.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This event counts all mispredicted branch instructions retired. This is a precise event.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count cases of saving new LBR records by hardware.",
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count cases of saving new LBR",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+        "EventCode": "0xe6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/uncore.json b/pmu-events/arch/x86/haswell/uncore.json
new file mode 100644
index 0000000..3ef5c21
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/uncore.json
@@ -0,0 +1,374 @@
+[
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x21",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL",
+    "BriefDescription": "An external snoop misses in some processor core.",
+    "PublicDescription": "An external snoop misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x41",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x81",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+    "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+    "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x24",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL",
+    "BriefDescription": "An external snoop hits a non-modified line in some processor core.",
+    "PublicDescription": "An external snoop hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x44",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x84",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EVICTION",
+    "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x28",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL",
+    "BriefDescription": "An external snoop hits a modified line in some processor core.",
+    "PublicDescription": "An external snoop hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x48",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x88",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EVICTION",
+    "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x11",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in M-state.",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x21",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in M-state.",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x41",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_M",
+    "BriefDescription": "L3 Lookup external snoop request that access cache and found line in M-state.",
+    "PublicDescription": "L3 Lookup external snoop request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x81",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in M-state.",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x18",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in I-state.",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x28",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_I",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in I-state.",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x48",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_I",
+    "BriefDescription": "L3 Lookup external snoop request that access cache and found line in I-state.",
+    "PublicDescription": "L3 Lookup external snoop request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x88",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in I-state.",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x1f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x2f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x4f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI",
+    "BriefDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.",
+    "PublicDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x8f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x86",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x46",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_ES",
+    "BriefDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.",
+    "PublicDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x16",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x26",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+    "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+    "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+    "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x20",
+    "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+    "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+    "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x83",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.All",
+    "BriefDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory)",
+    "PublicDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory).",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x84",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+    "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+    "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "NCU",
+    "EventCode": "0x0",
+    "UMask": "0x01",
+    "EventName": "UNC_CLOCK.SOCKET",
+    "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "Counter": "FIXED",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswell/virtual-memory.json b/pmu-events/arch/x86/haswell/virtual-memory.json
new file mode 100644
index 0000000..777b500
--- /dev/null
+++ b/pmu-events/arch/x86/haswell/virtual-memory.json
@@ -0,0 +1,484 @@
+[
+    {
+        "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to demand load misses that caused 4K page walks in any TLB levels.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to demand load misses that caused 2M/4M page walks in any TLB levels.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (2M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of cache load STLB hits. No page walk.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x60",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 4K page structure.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 2M/4M page structure.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks. (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB store misses.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts store operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (2M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x60",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "DTLB store misses with low part of linear-to-physical address translation missed.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DTLB store misses with low part of linear-to-physical address translation missed",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4f",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in ITLB that causes a page walk of any page size.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to misses in ITLB 4K page entries.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks due to misses in ITLB 2M/4M page entries.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks in ITLB of any page size.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by ITLB misses.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "ITLB misses that hit STLB (4K).",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISSES.STLB_HIT_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core misses that miss the  DTLB and hit the STLB (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "ITLB misses that hit STLB (2M).",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "ITLB_MISSES.STLB_HIT_2M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code misses that miss the  DTLB and hit the STLB (2M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "ITLB misses that hit STLB. No page walk.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x60",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+        "EventCode": "0xae",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of DTLB page walker loads that hit in the L1+FB.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in the L1+FB",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of DTLB page walker loads that hit in the L2.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in the L2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of DTLB page walker loads that hit in the L3.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x14",
+        "Errata": "HSD25",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of DTLB page walker loads from memory.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "Errata": "HSD25",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of DTLB page walker hits in Memory",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in the L1+FB",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of ITLB page walker loads that hit in the L2.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x22",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in the L2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of ITLB page walker loads that hit in the L3.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "Errata": "HSD25",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of ITLB page walker loads from memory.",
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x28",
+        "Errata": "HSD25",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of ITLB page walker hits in Memory",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x44",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L3.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x48",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in memory.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count number of STLB flush attempts.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "STLB flush attempts",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/cache.json b/pmu-events/arch/x86/haswellx/cache.json
new file mode 100644
index 0000000..b2fbd61
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/cache.json
@@ -0,0 +1,1100 @@
+[
+    {
+        "EventCode": "0x24",
+        "UMask": "0x21",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "Errata": "HSD78",
+        "PublicDescription": "Demand data read requests that missed L2, no rejects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x22",
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x24",
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x27",
+        "BriefDescription": "Demand requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "Errata": "HSD78",
+        "PublicDescription": "Demand requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x30",
+        "BriefDescription": "L2 prefetch requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.L2_PF_MISS",
+        "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x3f",
+        "BriefDescription": "All requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.MISS",
+        "Errata": "HSD78",
+        "PublicDescription": "All requests that missed L2.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x41",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "Errata": "HSD78",
+        "PublicDescription": "Demand data read requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x42",
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x44",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x50",
+        "BriefDescription": "L2 prefetch requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.L2_PF_HIT",
+        "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe1",
+        "BriefDescription": "Demand Data Read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "Errata": "HSD78",
+        "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe2",
+        "BriefDescription": "RFO requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "Counts all L2 store RFO requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe4",
+        "BriefDescription": "L2 code requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "Counts all L2 code requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe7",
+        "BriefDescription": "Demand requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "Errata": "HSD78",
+        "PublicDescription": "Demand requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xf8",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "Counts all L2 HW prefetcher requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xff",
+        "BriefDescription": "All L2 requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "Errata": "HSD78",
+        "PublicDescription": "All requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "UMask": "0x50",
+        "BriefDescription": "Not rejected writebacks that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+        "PublicDescription": "Not rejected writebacks that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x41",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x4f",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "Counter": "2",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch. HWP are e.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.REQUEST_FB_FULL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "UMask": "0x1",
+        "BriefDescription": "L1D data line replacements",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "This event counts when new data lines are brought into the L1 Data cache, which cause other lines to be evicted from the cache.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "Errata": "HSD78, HSD62, HSD61",
+        "PublicDescription": "Offcore outstanding demand data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "CounterMask": "1",
+        "Errata": "HSD78, HSD62, HSD61",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "CounterMask": "6",
+        "Errata": "HSD78, HSD62, HSD61",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "Errata": "HSD62, HSD61",
+        "PublicDescription": "Offcore outstanding Demand code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "Errata": "HSD62, HSD61",
+        "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "CounterMask": "1",
+        "Errata": "HSD62, HSD61",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "Errata": "HSD62, HSD61",
+        "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "CounterMask": "1",
+        "Errata": "HSD62, HSD61",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles when L1D is locked",
+        "Counter": "0,1,2,3",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "PublicDescription": "Cycles in which the L1D is locked.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "Errata": "HSD78",
+        "PublicDescription": "Demand data read requests sent to uncore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x2",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Demand code read requests sent to uncore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x4",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x8",
+        "BriefDescription": "Demand and prefetch data reads",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb2",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x11",
+        "BriefDescription": "Retired load uops that miss the STLB. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "Errata": "HSD29, HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x12",
+        "BriefDescription": "Retired store uops that miss the STLB. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "Errata": "HSD29, HSM30",
+        "L1_Hit_Indication": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x21",
+        "BriefDescription": "Retired load uops with locked access. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "Errata": "HSD76, HSD29, HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x41",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "Errata": "HSD29, HSM30",
+        "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x42",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "Errata": "HSD29, HSM30",
+        "L1_Hit_Indication": "1",
+        "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x81",
+        "BriefDescription": "All retired load uops. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "Errata": "HSD29, HSM30",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x82",
+        "BriefDescription": "All retired store uops. (precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "Errata": "HSD29, HSM30",
+        "L1_Hit_Indication": "1",
+        "PublicDescription": "This event counts all store uops retired. This is a precise event.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "Errata": "HSD29, HSM30",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "Errata": "HSD76, HSD29, HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x4",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load uops misses in L1 cache as data sources.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "Errata": "HSM30",
+        "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "Errata": "HSD29, HSM30",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x20",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+        "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x40",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "Errata": "HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+        "Errata": "HSD29, HSD25, HSM26, HSM30",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+        "Errata": "HSD29, HSD25, HSM26, HSM30",
+        "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+        "Errata": "HSD29, HSD25, HSM26, HSM30",
+        "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+        "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x1",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "Errata": "HSD74, HSD29, HSD25, HSM30",
+        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM",
+        "Errata": "HSD29, HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM",
+        "Errata": "HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x20",
+        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD",
+        "Errata": "HSM30",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "PublicDescription": "Demand data read requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x2",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.RFO",
+        "PublicDescription": "RFO requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x4",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.CODE_RD",
+        "PublicDescription": "L2 cache accesses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x8",
+        "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.ALL_PF",
+        "PublicDescription": "Any MLC or L3 HW prefetch accessing L2, including rejects.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x10",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L1D_WB",
+        "PublicDescription": "L1D writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x20",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_FILL",
+        "PublicDescription": "L2 fill requests that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x40",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf0",
+        "UMask": "0x80",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "PublicDescription": "Transactions accessing L2 pipe.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x1",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.I",
+        "PublicDescription": "L2 cache lines in I state filling L2.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x2",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.S",
+        "PublicDescription": "L2 cache lines in S state filling L2.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x4",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.E",
+        "PublicDescription": "L2 cache lines in E state filling L2.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x7",
+        "BriefDescription": "L2 cache lines filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache.  Lines are filled into the L2 cache when there was an L2 miss.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x5",
+        "BriefDescription": "Clean L2 cache lines evicted by demand",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "PublicDescription": "Clean L2 cache lines evicted by demand.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x6",
+        "BriefDescription": "Dirty L2 cache lines evicted by demand",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xf4",
+        "UMask": "0x10",
+        "BriefDescription": "Split locks in SQ",
+        "Counter": "0,1,2,3",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0001",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0001",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0004",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0004",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3",
+        "MSRValue": "0x3f803c0010",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3",
+        "MSRValue": "0x3f803c0020",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "MSRValue": "0x3f803c0040",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3",
+        "MSRValue": "0x3f803c0080",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
+        "MSRValue": "0x3f803c0100",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "MSRValue": "0x3f803c0200",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c0122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c0244",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003c07f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003c07f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all requests that hit in the L3",
+        "MSRValue": "0x3f803c8fff",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/floating-point.json b/pmu-events/arch/x86/haswellx/floating-point.json
new file mode 100644
index 0000000..bc08cc1
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/floating-point.json
@@ -0,0 +1,83 @@
+[
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x8",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "Errata": "HSD56, HSM57",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x10",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "Errata": "HSD56, HSM57",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x7",
+        "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores.  May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.",
+        "Counter": "0,1,2,3",
+        "EventName": "AVX_INSTS.ALL",
+        "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x2",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "PublicDescription": "Number of X87 FP assists due to output values.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x4",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "PublicDescription": "Number of X87 FP assists due to input values.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "PublicDescription": "Number of SIMD FP assists due to output values.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x10",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "PublicDescription": "Number of SIMD FP assists due to input values.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x1e",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.ANY",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/frontend.json b/pmu-events/arch/x86/haswellx/frontend.json
new file mode 100644
index 0000000..a4d9f1f
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/frontend.json
@@ -0,0 +1,294 @@
+[
+    {
+        "EventCode": "0x79",
+        "UMask": "0x2",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.EMPTY",
+        "Errata": "HSD135",
+        "PublicDescription": "Counts cycles the IDQ is empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_CYCLES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_CYCLES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x20",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles MITE is delivered at least one uop. Set Cmask = 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_SWITCHES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x3c",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "PublicDescription": "Number of uops delivered to IDQ from any path.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x2",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.MISSES",
+        "PublicDescription": "This event counts Instruction Cache (ICACHE) misses.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.IFETCH_STALL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE.IFDATA_STALL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "Errata": "HSD135",
+        "PublicDescription": "This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "CounterMask": "4",
+        "Errata": "HSD135",
+        "PublicDescription": "This event counts the number cycles during which the Front-end allocated exactly zero uops to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled.  This event is counted on a per-core basis.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterMask": "3",
+        "Errata": "HSD135",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterMask": "2",
+        "Errata": "HSD135",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "CounterMask": "1",
+        "Errata": "HSD135",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterMask": "1",
+        "Errata": "HSD135",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xAB",
+        "UMask": "0x2",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "Counter": "0,1,2,3",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/hsx-metrics.json b/pmu-events/arch/x86/haswellx/hsx-metrics.json
new file mode 100644
index 0000000..5ab5c78
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -0,0 +1,158 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/haswellx/memory.json b/pmu-events/arch/x86/haswellx/memory.json
new file mode 100644
index 0000000..56b0f24
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/memory.json
@@ -0,0 +1,767 @@
+[
+    {
+        "EventCode": "0x05",
+        "UMask": "0x1",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "UMask": "0x2",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "PublicDescription": "Speculative cache-line split store-address uops dispatched to L1D.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional writes.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x4",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x8",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x10",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "This event counts the number of memory ordering machine clears detected. Memory ordering machine clears can result from memory address aliasing or snoops from another hardware thread or core to data inflight in the pipeline.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times an HLE execution started.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times an HLE execution successfully committed.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC4",
+        "Errata": "HSD65",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc8",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MISC5",
+        "PublicDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times an RTM execution started.",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times an RTM execution successfully committed.",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC1",
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC4",
+        "Errata": "HSD65",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xc9",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MISC5",
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 4.",
+        "PEBS": "2",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 8.",
+        "PEBS": "2",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 16.",
+        "PEBS": "2",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 32.",
+        "PEBS": "2",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 64.",
+        "PEBS": "2",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "2003",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 128.",
+        "PEBS": "2",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "1009",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 256.",
+        "PEBS": "2",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "503",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Loads with latency value being above 512.",
+        "PEBS": "2",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "Errata": "HSD76, HSD25, HSM26",
+        "TakenAlone": "1",
+        "SampleAfterValue": "101",
+        "CounterHTOff": "3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss in the L3",
+        "MSRValue": "0x3fbfc00001",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0600400001",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+        "MSRValue": "0x3fbfc00002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0600400002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103fc00002",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss in the L3",
+        "MSRValue": "0x3fbfc00004",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0600400004",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3",
+        "MSRValue": "0x3fbfc00010",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3",
+        "MSRValue": "0x3fbfc00020",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "MSRValue": "0x3fbfc00040",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3",
+        "MSRValue": "0x3fbfc00080",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
+        "MSRValue": "0x3fbfc00100",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "MSRValue": "0x3fbfc00200",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+        "MSRValue": "0x3fbfc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0600400091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063f800091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103fc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x083fc00091",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+        "MSRValue": "0x3fbfc00122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0600400122",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+        "MSRValue": "0x3fbfc00244",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x0600400244",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+        "MSRValue": "0x3fbfc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x06004007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063f8007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103fc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x083fc007f7",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all requests that miss in the L3",
+        "MSRValue": "0x3fbfc08fff",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/other.json b/pmu-events/arch/x86/haswellx/other.json
new file mode 100644
index 0000000..800e65d
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/other.json
@@ -0,0 +1,43 @@
+[
+    {
+        "EventCode": "0x5C",
+        "UMask": "0x1",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING0",
+        "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x5C",
+        "UMask": "0x1",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "CounterMask": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "UMask": "0x2",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "Counter": "0,1,2,3",
+        "EventName": "CPL_CYCLES.RING123",
+        "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "Counter": "0,1,2,3",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/pipeline.json b/pmu-events/arch/x86/haswellx/pipeline.json
new file mode 100644
index 0000000..8a18bfe
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/pipeline.json
@@ -0,0 +1,1338 @@
+[
+    {
+        "EventCode": "0x00",
+        "UMask": "0x1",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "Errata": "HSD140, HSD143",
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when the thread is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x3",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x2",
+        "BriefDescription": "loads blocked by overlapping with store buffer that cannot be forwarded",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x8",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x07",
+        "UMask": "0x1",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K.  This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline which can have a performance impact.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x3",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This event counts the number of cycles spent waiting for a recovery after an event such as a processor nuke, JEClear, assist, hle/rtm abort etc.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x3",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "This event counts the number of uops issued by the Front-end of the pipeline to the Back-end. This event is counted at the allocation stage and will count both retired and non-retired uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x10",
+        "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "PublicDescription": "Number of flags-merge uops allocated. Such uops add delay.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x20",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (for example, 2 sources + immediate) regardless of whether it is a result of LEA instruction or not.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x40",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "UMask": "0x2",
+        "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)",
+        "Counter": "0,1,2,3",
+        "EventName": "ARITH.DIVIDER_UOPS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3c",
+        "UMask": "0x2",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x2",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4c",
+        "UMask": "0x1",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4c",
+        "UMask": "0x2",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x1",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "PublicDescription": "Number of integer move elimination candidate uops that were eliminated.",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x2",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "PublicDescription": "Number of SIMD move elimination candidate uops that were eliminated.",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x4",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "PublicDescription": "Number of integer move elimination candidate uops that were not eliminated.",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "Counter": "0,1,2,3",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "PublicDescription": "Number of SIMD move elimination candidate uops that were not eliminated.",
+        "SampleAfterValue": "1000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "This event counts cycles when the Reservation Station ( RS ) is empty for the thread. The RS is a structure that buffers allocated micro-ops from the Front-end. If there are many cycles when the RS is empty, it may represent an underflow of instructions delivered from the Front-end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "Invert": "1",
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "CounterMask": "1",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "UMask": "0x1",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "Counter": "0,1,2,3",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "UMask": "0x4",
+        "BriefDescription": "Stall cycles because IQ is full",
+        "Counter": "0,1,2,3",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "PublicDescription": "Stall cycles due to IQ is full.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x41",
+        "BriefDescription": "Not taken macro-conditional branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x81",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x82",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x84",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x88",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0x90",
+        "BriefDescription": "Taken speculative and retired direct near calls.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xa0",
+        "BriefDescription": "Taken speculative and retired indirect calls.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc1",
+        "BriefDescription": "Speculative and retired macro-conditional branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc2",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc4",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xc8",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xd0",
+        "BriefDescription": "Speculative and retired direct near calls.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "UMask": "0xff",
+        "BriefDescription": "Speculative and retired  branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x41",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x81",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x84",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0x88",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xa0",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xc1",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xc4",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "UMask": "0xff",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+        "PublicDescription": "Cycles which a uop is dispatched on port 0 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per core when uops are executed in port 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+        "AnyThread": "1",
+        "PublicDescription": "Cycles per core when uops are exectuted in port 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+        "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per core when uops are executed in port 1.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+        "AnyThread": "1",
+        "PublicDescription": "Cycles per core when uops are exectuted in port 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+        "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+        "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+        "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per core when uops are executed in port 4.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+        "AnyThread": "1",
+        "PublicDescription": "Cycles per core when uops are exectuted in port 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+        "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per core when uops are executed in port 5.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+        "AnyThread": "1",
+        "PublicDescription": "Cycles per core when uops are exectuted in port 5.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+        "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per core when uops are executed in port 6.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+        "AnyThread": "1",
+        "PublicDescription": "Cycles per core when uops are exectuted in port 6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+        "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x1",
+        "BriefDescription": "Resource-related stall cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "Errata": "HSD135",
+        "PublicDescription": "Cycles allocation is stalled due to resource related reason.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with pending L2 cache miss loads.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "CounterMask": "1",
+        "Errata": "HSD78",
+        "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with pending memory loads.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "CounterMask": "2",
+        "PublicDescription": "Cycles with pending memory loads. Set Cmask=2 to count cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x4",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "CounterMask": "4",
+        "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x5",
+        "BriefDescription": "Execution stalls due to L2 cache misses.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "CounterMask": "5",
+        "PublicDescription": "Number of loads missed L2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x6",
+        "BriefDescription": "Execution stalls due to memory subsystem.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "CounterMask": "6",
+        "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline and there were memory instructions pending (waiting for data).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles with pending L1 cache miss loads.",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "CounterMask": "8",
+        "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0xc",
+        "BriefDescription": "Execution stalls due to L1 data cache misses",
+        "Counter": "2",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "CounterMask": "12",
+        "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xa8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Number of uops delivered by the LSD.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "CounterMask": "1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "CounterMask": "1",
+        "Errata": "HSD144, HSD30, HSM31",
+        "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "CounterMask": "2",
+        "Errata": "HSD144, HSD30, HSM31",
+        "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "CounterMask": "3",
+        "Errata": "HSD144, HSD30, HSM31",
+        "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "CounterMask": "4",
+        "Errata": "HSD144, HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Number of uops executed on the core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "Errata": "HSD30, HSM31",
+        "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "CounterMask": "1",
+        "Errata": "HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "CounterMask": "2",
+        "Errata": "HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "CounterMask": "3",
+        "Errata": "HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterMask": "4",
+        "Errata": "HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xb1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "Errata": "HSD30, HSM31",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x0",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.ANY_P",
+        "Errata": "HSD11, HSD140",
+        "PublicDescription": "Number of instructions at retirement.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x1",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "PEBS": "2",
+        "Counter": "1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "Errata": "HSD140",
+        "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x2",
+        "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.X87",
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Actually retired uops.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterMask": "10",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x2",
+        "BriefDescription": "Retirement slots used.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "CounterMask": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x4",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x20",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x0",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Branch instructions at retirement.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x1",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x4",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x8",
+        "BriefDescription": "Return instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x10",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "Counts the number of not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x20",
+        "BriefDescription": "Taken branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x40",
+        "BriefDescription": "Far branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PublicDescription": "Number of far branches retired.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x0",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Mispredicted branch instructions at retirement.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x1",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x4",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PublicDescription": "This event counts all mispredicted branch instructions retired. This is a precise event.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x20",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCC",
+        "UMask": "0x20",
+        "BriefDescription": "Count cases of saving new LBR",
+        "Counter": "0,1,2,3",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "Count cases of saving new LBR records by hardware.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xe6",
+        "UMask": "0x1f",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/haswellx/uncore-cache.json b/pmu-events/arch/x86/haswellx/uncore-cache.json
new file mode 100644
index 0000000..58ed6d3
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/uncore-cache.json
@@ -0,0 +1,317 @@
+[
+    {
+        "BriefDescription": "Uncore cache clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_C_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x34",
+        "EventName": "UNC_C_LLC_LOOKUP.ANY",
+        "Filter": "filter_state=0x1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x11",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x37",
+        "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.UNCACHEABLE",
+        "Filter": "filter_opc=0x187",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_READ",
+        "Filter": "filter_opc=0x187,filter_nc=1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_WRITE",
+        "Filter": "filter_opc=0x18f,filter_nc=1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+        "Filter": "filter_opc=0x190",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x191",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+        "Filter": "filter_opc=0x192",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+        "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+        "Filter": "filter_opc=0x180,filter_tid=0x3e",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x181",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_FULL",
+        "Filter": "filter_opc=0x18c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+        "Filter": "filter_opc=0x18d",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_WRITE",
+        "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "read requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "read requests to local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "read requests to remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "UMask": "0xC",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x20",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line response from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPS",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line forwarded from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x8",
+        "Unit": "HA"
+    }
+]
diff --git a/pmu-events/arch/x86/haswellx/uncore-interconnect.json b/pmu-events/arch/x86/haswellx/uncore-interconnect.json
new file mode 100644
index 0000000..8249613
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/uncore-interconnect.json
@@ -0,0 +1,28 @@
+[
+    {
+        "BriefDescription": "QPI clock ticks",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x14",
+        "EventName": "UNC_Q_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data",
+        "Counter": "0,1,2,3",
+        "EventName": "QPI_DATA_BANDWIDTH_TX",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x2",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data",
+        "Counter": "0,1,2,3",
+        "EventName": "QPI_CTL_BANDWIDTH_TX",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x4",
+        "Unit": "QPI LL"
+    }
+]
diff --git a/pmu-events/arch/x86/haswellx/uncore-memory.json b/pmu-events/arch/x86/haswellx/uncore-memory.json
new file mode 100644
index 0000000..66eed39
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/uncore-memory.json
@@ -0,0 +1,86 @@
+[
+    {
+        "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0xC",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory controller clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_POWER_CHANNEL_PPD",
+        "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_channel_ppd %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+        "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_critical_throttle_cycles %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles Memory is in self refresh power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x43",
+        "EventName": "UNC_M_POWER_SELF_REFRESH",
+        "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_self_refresh %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charges due to page misses",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for reads",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for writes",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "iMC"
+    }
+]
diff --git a/pmu-events/arch/x86/haswellx/uncore-power.json b/pmu-events/arch/x86/haswellx/uncore-power.json
new file mode 100644
index 0000000..dd1b956
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/uncore-power.json
@@ -0,0 +1,92 @@
+[
+    {
+        "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c0 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c3 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c6 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode.  This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xA",
+        "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+        "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "prochot_external_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_limit_thermal_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x6",
+        "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_os_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5",
+        "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x74",
+        "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_trans_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    }
+]
diff --git a/pmu-events/arch/x86/haswellx/virtual-memory.json b/pmu-events/arch/x86/haswellx/virtual-memory.json
new file mode 100644
index 0000000..168df55
--- /dev/null
+++ b/pmu-events/arch/x86/haswellx/virtual-memory.json
@@ -0,0 +1,484 @@
+[
+    {
+        "EventCode": "0x08",
+        "UMask": "0x1",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x2",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Completed page walks due to demand load misses that caused 4K page walks in any TLB levels.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x4",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Completed page walks due to demand load misses that caused 2M/4M page walks in any TLB levels.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x8",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0xe",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x20",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+        "PublicDescription": "This event counts load operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x40",
+        "BriefDescription": "Load misses that miss the  DTLB and hit the STLB (2M)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+        "PublicDescription": "This event counts load operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x60",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PublicDescription": "Number of cache load STLB hits. No page walk.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x80",
+        "BriefDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+        "PublicDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x1",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x2",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 4K page structure.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x4",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 2M/4M page structure.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x8",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0xe",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB store misses.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x20",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+        "PublicDescription": "This event counts store operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x40",
+        "BriefDescription": "Store misses that miss the  DTLB and hit the STLB (2M)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+        "PublicDescription": "This event counts store operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x60",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x80",
+        "BriefDescription": "DTLB store misses with low part of linear-to-physical address translation missed",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+        "PublicDescription": "DTLB store misses with low part of linear-to-physical address translation missed.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4f",
+        "UMask": "0x10",
+        "BriefDescription": "Cycle count for an Extended Page table walk.",
+        "Counter": "0,1,2,3",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x1",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Misses in ITLB that causes a page walk of any page size.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x2",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Completed page walks due to misses in ITLB 4K page entries.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x4",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Completed page walks due to misses in ITLB 2M/4M page entries.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x8",
+        "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0xe",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Completed page walks in ITLB of any page size.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by ITLB misses.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x20",
+        "BriefDescription": "Core misses that miss the  DTLB and hit the STLB (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT_4K",
+        "PublicDescription": "ITLB misses that hit STLB (4K).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x40",
+        "BriefDescription": "Code misses that miss the  DTLB and hit the STLB (2M)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT_2M",
+        "PublicDescription": "ITLB misses that hit STLB (2M).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x60",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "PublicDescription": "ITLB misses that hit STLB. No page walk.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xae",
+        "UMask": "0x1",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x11",
+        "BriefDescription": "Number of DTLB page walker hits in the L1+FB",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+        "PublicDescription": "Number of DTLB page walker loads that hit in the L1+FB.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x12",
+        "BriefDescription": "Number of DTLB page walker hits in the L2",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+        "PublicDescription": "Number of DTLB page walker loads that hit in the L2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x14",
+        "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+        "Errata": "HSD25",
+        "PublicDescription": "Number of DTLB page walker loads that hit in the L3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x18",
+        "BriefDescription": "Number of DTLB page walker hits in Memory",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+        "Errata": "HSD25",
+        "PublicDescription": "Number of DTLB page walker loads from memory.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x21",
+        "BriefDescription": "Number of ITLB page walker hits in the L1+FB",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+        "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x22",
+        "BriefDescription": "Number of ITLB page walker hits in the L2",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+        "PublicDescription": "Number of ITLB page walker loads that hit in the L2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x24",
+        "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+        "Errata": "HSD25",
+        "PublicDescription": "Number of ITLB page walker loads that hit in the L3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x28",
+        "BriefDescription": "Number of ITLB page walker hits in Memory",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY",
+        "Errata": "HSD25",
+        "PublicDescription": "Number of ITLB page walker loads from memory.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x41",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x42",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x44",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L3.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x48",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x81",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x82",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x84",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBC",
+        "UMask": "0x88",
+        "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in memory.",
+        "Counter": "0,1,2,3",
+        "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_MEMORY",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x1",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x20",
+        "BriefDescription": "STLB flush attempts",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "Count number of STLB flush attempts.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/cache.json b/pmu-events/arch/x86/ivybridge/cache.json
new file mode 100644
index 0000000..999a01b
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/cache.json
@@ -0,0 +1,1102 @@
+[
+    {
+        "PublicDescription": "Demand Data Read requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFO requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 store RFO requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 code requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFOs that miss cache lines.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that miss cache lines",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFOs that hit cache lines in M state.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in M state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFOs that access cache lines in any state.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that access cache lines in any state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks that missed LLC.",
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_L1D_WB_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_L1D_WB_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed LLC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to LLC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of lines brought into the L1 data cache.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding Demand Data Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles in which the L1D is locked.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand data read requests sent to uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand code read requests sent to uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with locked access. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store uops. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Retired load uops whose data source was local memory (cross-socket snoop not needed or missed).",
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops which data sources missed LLC but serviced from local dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Demand Data Read requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFO requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANS.RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache accesses when fetching instructions.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANS.CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Any MLC or LLC HW prefetch accessing L2, including rejects.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 or LLC HW prefetches that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L1D writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANS.L1D_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 fill requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANS.L2_FILL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Transactions accessing L2 pipe.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in I state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_IN.I",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in S state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in E state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Clean L2 cache lines evicted by demand.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by demand",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by demand",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Clean L2 cache lines evicted by the MLC prefetcher.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PF_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines evicted by the MLC prefetcher.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PF_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines filling the L2.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "L2_LINES_OUT.DIRTY_ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines filling the L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Split locks in SQ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all writebacks from the core to the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x18000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts non-temporal stores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads ",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand rfo's ",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x000105B3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch prefetch RFOs ",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x000107F7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/floating-point.json b/pmu-events/arch/x86/ivybridge/floating-point.json
new file mode 100644
index 0000000..950b62c
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/floating-point.json
@@ -0,0 +1,151 @@
+[
+    {
+        "PublicDescription": "Counts number of X87 uops executed.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of SSE* or AVX-128 double precision FP scalar uops executed.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 256-bit packed single-precision floating-point instructions.",
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_FP_256.PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "number of GSSE-256 Computational FP single precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 256-bit packed double-precision floating-point instructions.",
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "number of AVX-256 Computational FP double precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of assists associated with 256-bit AVX store operations.",
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OTHER_ASSISTS.AVX_STORE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of X87 FP assists due to output values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of X87 FP assists due to input values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD FP assists due to output values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD FP assists due to input values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/frontend.json b/pmu-events/arch/x86/ivybridge/frontend.json
new file mode 100644
index 0000000..efaa949
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/frontend.json
@@ -0,0 +1,305 @@
+[
+    {
+        "PublicDescription": "Counts cycles the IDQ is empty.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "IDQ.EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles MITE is delivered at least one uops. Set Cmask = 1.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops delivered to IDQ from any path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes UC accesses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction cache, streaming buffer and victim cache misses",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE.IFETCH_STALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count issue pipeline slots where no uop was delivered from the front end to the back end when there is no back-end stall.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of DSB to MITE switches.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles DSB to MITE switches caused delay.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "DSB Fill encountered > 3 DSB lines.",
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/pmu-events/arch/x86/ivybridge/ivb-metrics.json
new file mode 100644
index 0000000..7c26795
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/ivybridge/memory.json b/pmu-events/arch/x86/ivybridge/memory.json
new file mode 100644
index 0000000..a74d54f
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/memory.json
@@ -0,0 +1,236 @@
+[
+    {
+        "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative cache-line split Store-address uops dispatched to L1D.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.LLC_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of any page walk that had a miss in LLC.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 4.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 4",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 8.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Loads with latency value being above 8",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 16.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Loads with latency value being above 16",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 32.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Loads with latency value being above 32",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 64.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Loads with latency value being above 64",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 128.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Loads with latency value being above 128",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 256.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Loads with latency value being above 256",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 512.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Loads with latency value being above 512",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "Counter": "3",
+        "UMask": "0x2",
+        "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.",
+        "PRECISE_STORE": "1",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC  and the data returned from dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC  and the data returned from dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3004003f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC  and the data returned from dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6004001b3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts LLC replacements",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/other.json b/pmu-events/arch/x86/ivybridge/other.json
new file mode 100644
index 0000000..4eb83ee
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/other.json
@@ -0,0 +1,44 @@
+[
+    {
+        "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPL_CYCLES.RING0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPL_CYCLES.RING123",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/pipeline.json b/pmu-events/arch/x86/ivybridge/pipeline.json
new file mode 100644
index 0000000..0afbfd9
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/pipeline.json
@@ -0,0 +1,1309 @@
+[
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "False dependencies in MOB due to partial compare on address.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EdgeDetect": "1",
+        "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of flags-merge uops allocated. Such uops adds delay.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of flags-merge uops being allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles that the divider is active, includes INT and FP. Set 'edge =1, cmask=1' to count the number of divides.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divider is busy executing divide operations",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Divide operations executed.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EdgeDetect": "1",
+        "EventName": "ARITH.FPU_DIV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Divide operations executed",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles the RS is empty for the thread.",
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Stall cycles due to IQ is full.",
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stall cycles because IQ is full",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not taken macro-conditional branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired macro-conditional branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired direct near calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired indirect calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired macro-conditional branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired direct near calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 2.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles Allocation is stalled due to Resource Related reason.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles stalled due to no store buffers available (not including draining form sync).",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with pending L2 miss loads. Set AnyThread to count per core.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending L2 cache miss loads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L2 cache miss load* is outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending memory loads.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Total execution stalls.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Total execution stalls.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of loads missed L2.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to L2 cache misses.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to memory subsystem.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending L1 cache miss loads.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to L1 data cache misses",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of uops executed on the core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instructions at retirement.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired uops.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of self-modifying-code machine clears detected.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of executed AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Branch instructions at retirement.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of not taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of far branches retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Mispredicted branch instructions at retirement.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count cases of saving new LBR records by hardware.",
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count cases of saving new LBR",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/uncore.json b/pmu-events/arch/x86/ivybridge/uncore.json
new file mode 100644
index 0000000..42c70ee
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/uncore.json
@@ -0,0 +1,314 @@
+[
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x01",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS",
+    "BriefDescription": "A snoop misses in some processor core.",
+    "PublicDescription": "A snoop misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x02",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL",
+    "BriefDescription": "A snoop invalidates a non-modified line in some processor core.",
+    "PublicDescription": "A snoop invalidates a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x04",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT",
+    "BriefDescription": "A snoop hits a non-modified line in some processor core.",
+    "PublicDescription": "A snoop hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x08",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM",
+    "BriefDescription": "A snoop hits a modified line in some processor core.",
+    "PublicDescription": "A snoop hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x10",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M",
+    "BriefDescription": "A snoop invalidates a modified line in some processor core.",
+    "PublicDescription": "A snoop invalidates a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x20",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER",
+    "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+    "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x40",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER",
+    "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+    "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x80",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER",
+    "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+    "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x01",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.M",
+    "BriefDescription": "LLC lookup request that access cache and found line in M-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x02",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.E",
+    "BriefDescription": "LLC lookup request that access cache and found line in E-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in E-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x04",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.S",
+    "BriefDescription": "LLC lookup request that access cache and found line in S-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x08",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.I",
+    "BriefDescription": "LLC lookup request that access cache and found line in I-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x10",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER",
+    "BriefDescription": "Filter on processor core initiated cacheable read requests.",
+    "PublicDescription": "Filter on processor core initiated cacheable read requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x20",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER",
+    "BriefDescription": "Filter on processor core initiated cacheable write requests.",
+    "PublicDescription": "Filter on processor core initiated cacheable write requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x40",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER",
+    "BriefDescription": "Filter on external snoop requests.",
+    "PublicDescription": "Filter on external snoop requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x80",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER",
+    "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+    "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x81",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+    "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x81",
+    "UMask": "0x20",
+    "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+    "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+    "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x81",
+    "UMask": "0x80",
+    "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS",
+    "BriefDescription": "Counts the number of LLC evictions allocated.",
+    "PublicDescription": "Counts the number of LLC evictions allocated.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x83",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+    "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x84",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+    "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+    "PublicDescription": "Number of requests allocated in Coherency Tracker.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+    "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "Counter": "0,1",
+    "CounterMask": "1",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL",
+    "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "Counter": "0,1",
+    "CounterMask": "10",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x0",
+    "UMask": "0x01",
+    "EventName": "UNC_CLOCK.SOCKET",
+    "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "Counter": "Fixed",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x06",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ES",
+    "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivybridge/virtual-memory.json b/pmu-events/arch/x86/ivybridge/virtual-memory.json
new file mode 100644
index 0000000..f243551
--- /dev/null
+++ b/pmu-events/arch/x86/ivybridge/virtual-memory.json
@@ -0,0 +1,180 @@
+[
+    {
+        "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "DTLB_LOAD_MISSES.LARGE_PAGE_WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Page walk for a large page completed for Demand load.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles PMH is busy with this walk.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts load operations that missed 1st level DTLB but hit the 2nd level.",
+        "EventCode": "0x5F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in all ITLB levels that cause page walks.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycle PMH is busy with a walk.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of cache load STLB hits. No page walk.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks in ITLB due to STLB load misses for large pages.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "ITLB_MISSES.LARGE_PAGE_WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Completed page walks in ITLB due to STLB load misses for large pages",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count number of STLB flush attempts.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "STLB flush attempts",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/cache.json b/pmu-events/arch/x86/ivytown/cache.json
new file mode 100644
index 0000000..6dad3ad
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/cache.json
@@ -0,0 +1,1260 @@
+[
+    {
+        "PublicDescription": "Demand Data Read requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFO requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 store RFO requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 code requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all L2 HW prefetcher requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFOs that miss cache lines.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that miss cache lines",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFOs that hit cache lines in M state.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in M state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFOs that access cache lines in any state.",
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that access cache lines in any state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks that missed LLC.",
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_L1D_WB_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_L1D_WB_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed LLC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to LLC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss oustandings duration in cycles",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of lines brought into the L1 data cache.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding Demand Data Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles in which the L1D is locked.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand data read requests sent to uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand code read requests sent to uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with locked access. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store uops. (Precise Event)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops whose data source was local DRAM (Snoop not needed, Snoop Miss, or Snoop Hit data not forwarded).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops whose data source was remote DRAM (Snoop not needed, Snoop Miss, or Snoop Hit data not forwarded).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Remote cache HITM.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Data forwarded from remote cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Demand Data Read requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RFO requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANS.RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache accesses when fetching instructions.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANS.CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache accesses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Any MLC or LLC HW prefetch accessing L2, including rejects.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 or LLC HW prefetches that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L1D writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANS.L1D_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1D writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 fill requests that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANS.L2_FILL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 fill requests that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Transactions accessing L2 pipe.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Transactions accessing L2 pipe",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in I state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_IN.I",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in I state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in S state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in S state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines in E state filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in E state filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "L2 cache lines filling L2.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Clean L2 cache lines evicted by demand.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by demand",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by demand",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Clean L2 cache lines evicted by the MLC prefetcher.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PF_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines evicted by the MLC prefetcher.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PF_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Dirty L2 cache lines filling the L2.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "L2_LINES_OUT.DIRTY_ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines filling the L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Split locks in SQ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all writebacks from the core to the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x803c8000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x23ffc08000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts non-temporal stores",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/floating-point.json b/pmu-events/arch/x86/ivytown/floating-point.json
new file mode 100644
index 0000000..950b62c
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/floating-point.json
@@ -0,0 +1,151 @@
+[
+    {
+        "PublicDescription": "Counts number of X87 uops executed.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts number of SSE* or AVX-128 double precision FP scalar uops executed.",
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 256-bit packed single-precision floating-point instructions.",
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_FP_256.PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "number of GSSE-256 Computational FP single precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 256-bit packed double-precision floating-point instructions.",
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "number of AVX-256 Computational FP double precision uops issued this cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of assists associated with 256-bit AVX store operations.",
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OTHER_ASSISTS.AVX_STORE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of X87 FP assists due to output values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of X87 FP assists due to input values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD FP assists due to output values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of SIMD FP assists due to input values.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/frontend.json b/pmu-events/arch/x86/ivytown/frontend.json
new file mode 100644
index 0000000..efaa949
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/frontend.json
@@ -0,0 +1,305 @@
+[
+    {
+        "PublicDescription": "Counts cycles the IDQ is empty.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "IDQ.EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles MITE is delivered at least one uops. Set Cmask = 1.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops delivered to IDQ from any path.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes UC accesses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction cache, streaming buffer and victim cache misses",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE.IFETCH_STALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count issue pipeline slots where no uop was delivered from the front end to the back end when there is no back-end stall.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of DSB to MITE switches.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles DSB to MITE switches caused delay.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "DSB Fill encountered > 3 DSB lines.",
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/ivt-metrics.json b/pmu-events/arch/x86/ivytown/ivt-metrics.json
new file mode 100644
index 0000000..7c26795
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/ivytown/memory.json b/pmu-events/arch/x86/ivytown/memory.json
new file mode 100644
index 0000000..3a7b86a
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/memory.json
@@ -0,0 +1,503 @@
+[
+    {
+        "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative cache-line split Store-address uops dispatched to L1D.",
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 4.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 4",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 8.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Loads with latency value being above 8",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 16.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Loads with latency value being above 16",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 32.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Loads with latency value being above 32",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 64.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Loads with latency value being above 64",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 128.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Loads with latency value being above 128",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 256.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Loads with latency value being above 256",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Loads with latency value being above 512.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Loads with latency value being above 512",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "Counter": "3",
+        "UMask": "0x2",
+        "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.",
+        "PRECISE_STORE": "1",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc00244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f800244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hits the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc203f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6004003f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f8203f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc003f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f820004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc00004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67fc00001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data returned from remote & local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f820001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc00001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc20002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the LLC and the data is found in M state in remote cache and forwarded from there.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC  and the data returned from remote & local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67fc00010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  and the data returned from remote & local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f820010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc00010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/other.json b/pmu-events/arch/x86/ivytown/other.json
new file mode 100644
index 0000000..4eb83ee
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/other.json
@@ -0,0 +1,44 @@
+[
+    {
+        "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPL_CYCLES.RING0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPL_CYCLES.RING123",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/pipeline.json b/pmu-events/arch/x86/ivytown/pipeline.json
new file mode 100644
index 0000000..0afbfd9
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/pipeline.json
@@ -0,0 +1,1309 @@
+[
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "False dependencies in MOB due to partial compare on address.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EdgeDetect": "1",
+        "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of flags-merge uops allocated. Such uops adds delay.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of flags-merge uops being allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_ISSUED.SINGLE_MUL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles that the divider is active, includes INT and FP. Set 'edge =1, cmask=1' to count the number of divides.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divider is busy executing divide operations",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Divide operations executed.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EdgeDetect": "1",
+        "EventName": "ARITH.FPU_DIV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Divide operations executed",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x58",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+        "SampleAfterValue": "1000003",
+        "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles the RS is empty for the thread.",
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Stall cycles due to IQ is full.",
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stall cycles because IQ is full",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not taken macro-conditional branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired macro-conditional branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired direct near calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired indirect calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired macro-conditional branches.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired direct near calls.",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 2.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles which a Uop is dispatched on port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles per core when uops are dispatched to port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles Allocation is stalled due to Resource Related reason.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles stalled due to no store buffers available (not including draining form sync).",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with pending L2 miss loads. Set AnyThread to count per core.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending L2 cache miss loads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L2 cache miss load* is outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending memory loads.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Total execution stalls.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Total execution stalls.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Number of loads missed L2.",
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to L2 cache misses.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to memory subsystem.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with pending L1 cache miss loads.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "CounterMask": "8",
+        "CounterHTOff": "2"
+    },
+    {
+        "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls due to L1 data cache misses",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "CounterMask": "12",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of uops executed on the core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of instructions at retirement.",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired uops.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of self-modifying-code machine clears detected.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of executed AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Branch instructions at retirement.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of not taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of far branches retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Mispredicted branch instructions at retirement.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count cases of saving new LBR records by hardware.",
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count cases of saving new LBR",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/ivytown/uncore-cache.json b/pmu-events/arch/x86/ivytown/uncore-cache.json
new file mode 100644
index 0000000..2674105
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/uncore-cache.json
@@ -0,0 +1,322 @@
+[
+    {
+        "BriefDescription": "Uncore cache clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_C_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+        "Counter": "0,1",
+        "EventCode": "0x34",
+        "EventName": "UNC_C_LLC_LOOKUP.ANY",
+        "Filter": "filter_state=0x1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x11",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+        "Counter": "0,1",
+        "EventCode": "0x37",
+        "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode.demand",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - Uncacheable reads. Derived from unc_c_tor_inserts.miss_opcode.uncacheable",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.UNCACHEABLE",
+        "Filter": "filter_opc=0x187",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode.rfo_prefetch",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+        "Filter": "filter_opc=0x190",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode.code",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+        "Filter": "filter_opc=0x191",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode.data_read",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+        "Filter": "filter_opc=0x192",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe allocating writes that miss LLC - DDIO misses. Derived from unc_c_tor_inserts.miss_opcode.ddio_miss",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_WRITE",
+        "Filter": "filter_opc=0x19c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode.pcie_read",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for ItoM writes (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.miss_opcode.itom_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.ITOM_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for PCIe non-snoop reads. Derived from unc_c_tor_inserts.miss_opcode.pcie_read",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_NON_SNOOP_READ",
+        "Filter": "filter_opc=0x1e4",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for PCIe non-snoop writes (full line). Derived from unc_c_tor_inserts.miss_opcode.pcie_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+        "Filter": "filter_opc=0x1e6",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode.streaming_full",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_FULL",
+        "Filter": "filter_opc=0x18c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode.streaming_partial",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+        "Filter": "filter_opc=0x18d",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Partial PCIe reads. Derived from unc_c_tor_inserts.opcode.pcie_partial",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_PARTIAL_READ",
+        "Filter": "filter_opc=0x195",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe allocating writes that hit in LLC (DDIO hits). Derived from unc_c_tor_inserts.opcode.ddio_hit",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_WRITE",
+        "Filter": "filter_opc=0x19c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode.pcie_read_current",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "ItoM write hits (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.opcode.itom_write_hit",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.ITOM_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe non-snoop reads. Derived from unc_c_tor_inserts.opcode.pcie_read",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_READ",
+        "Filter": "filter_opc=0x1e4",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe non-snoop writes (partial). Derived from unc_c_tor_inserts.opcode.pcie_partial_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+        "Filter": "filter_opc=0x1e5",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe non-snoop writes (full line). Derived from unc_c_tor_inserts.opcode.pcie_full_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_WRITE",
+        "Filter": "filter_opc=0x1e6",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x2A",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode.llc_data_read",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x8A",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Read requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Write requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "UMask": "0xC",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x20",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x4",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line response from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPS",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x2",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "Shared line forwarded from remote cache",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x8",
+        "Unit": "HA"
+    }
+]
diff --git a/pmu-events/arch/x86/ivytown/uncore-interconnect.json b/pmu-events/arch/x86/ivytown/uncore-interconnect.json
new file mode 100644
index 0000000..b798a86
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/uncore-interconnect.json
@@ -0,0 +1,48 @@
+[
+    {
+        "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x14",
+        "EventName": "UNC_Q_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Cycles where receiving QPI link is in half-width mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x10",
+        "EventName": "UNC_Q_RxL0P_POWER_CYCLES",
+        "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+        "MetricName": "rxl0p_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Cycles where transmitting QPI link is in half-width mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_Q_TxL0P_POWER_CYCLES",
+        "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+        "MetricName": "txl0p_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of data flits transmitted ",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_Q_TxL_FLITS_G0.DATA",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x2",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of non data (control) flits transmitted ",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x4",
+        "Unit": "QPI LL"
+    }
+]
diff --git a/pmu-events/arch/x86/ivytown/uncore-memory.json b/pmu-events/arch/x86/ivytown/uncore-memory.json
new file mode 100644
index 0000000..df4b432
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/uncore-memory.json
@@ -0,0 +1,78 @@
+[
+    {
+        "BriefDescription": "Memory page activates for reads and writes",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_M_ACT_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Umask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0xC",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_POWER_CHANNEL_PPD",
+        "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_channel_ppd %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+        "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_critical_throttle_cycles %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles Memory is in self refresh power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x43",
+        "EventName": "UNC_M_POWER_SELF_REFRESH",
+        "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_self_refresh %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory page conflicts",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    }
+]
diff --git a/pmu-events/arch/x86/ivytown/uncore-power.json b/pmu-events/arch/x86/ivytown/uncore-power.json
new file mode 100644
index 0000000..d40498f
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/uncore-power.json
@@ -0,0 +1,274 @@
+[
+    {
+        "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_BAND0_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band0_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_BAND1_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band1_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_BAND2_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band2_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_BAND3_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band3_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band0_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band1_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band2_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band3_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c0 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c3 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c6 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode.  This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xa",
+        "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+        "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "prochot_external_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_limit_thermal_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x6",
+        "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_os_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5",
+        "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x7",
+        "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_current_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Cycles spent changing Frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x60",
+        "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_trans_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
+        "Filter": "filter_band0=1200",
+        "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_1200mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
+        "Filter": "filter_band1=2000",
+        "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_2000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
+        "Filter": "filter_band2=3000",
+        "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_3000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
+        "Filter": "filter_band3=4000",
+        "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_4000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band0=1200",
+        "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_1200mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band1=2000",
+        "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_2000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band2=4000",
+        "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_3000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band3=4000",
+        "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_4000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    }
+]
diff --git a/pmu-events/arch/x86/ivytown/virtual-memory.json b/pmu-events/arch/x86/ivytown/virtual-memory.json
new file mode 100644
index 0000000..4645e9d
--- /dev/null
+++ b/pmu-events/arch/x86/ivytown/virtual-memory.json
@@ -0,0 +1,198 @@
+[
+    {
+        "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "DTLB_LOAD_MISSES.DEMAND_LD_WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "DTLB_LOAD_MISSES.DEMAND_LD_WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "DTLB_LOAD_MISSES.LARGE_PAGE_WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Page walk for a large page completed for Demand load.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles PMH is busy with this walk.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts load operations that missed 1st level DTLB but hit the 2nd level.",
+        "EventCode": "0x5F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in all ITLB levels that cause page walks.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycle PMH is busy with a walk.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of cache load STLB hits. No page walk.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Completed page walks in ITLB due to STLB load misses for large pages.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "ITLB_MISSES.LARGE_PAGE_WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Completed page walks in ITLB due to STLB load misses for large pages",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Count number of STLB flush attempts.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "STLB flush attempts",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/cache.json b/pmu-events/arch/x86/jaketown/cache.json
new file mode 100644
index 0000000..f723e8f
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/cache.json
@@ -0,0 +1,1290 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with locked access.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load uops retired",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of store uops retired.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store uops.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a non-modified state.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Data from remote DRAM either Snoop not needed or Snoop Miss (RspI)",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts L1D data line replacements.  Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.  ",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D.ALLOCATED_IN_M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Allocated L1D data cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D.EVICTION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data cache lines in M state evicted due to replacement.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D.ALL_M_REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss oustandings duration in cycles.",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that miss cache lines.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in E state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that access cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_L1D_WB_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_S",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_L1D_WB_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANS.RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANS.CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache accesses when fetching instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 or LLC HW prefetches that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANS.L1D_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1D writebacks that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANS.L2_FILL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 fill requests that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Transactions accessing L2 pipe.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_IN.I",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in I state filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in S state filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in E state filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache.  Lines are filled into the L2 cache when there was an L2 miss.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by demand.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by demand.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PF_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PF_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "L2_LINES_OUT.DIRTY_ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines filling the L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed LLC.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Split locks in SQ.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all writebacks from the core to the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x803c8000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x23ffc08000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts non-temporal stores",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand rfo's",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x000105B3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch prefetch RFOs",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x000107F7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo references (demand & prefetch)",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/floating-point.json b/pmu-events/arch/x86/jaketown/floating-point.json
new file mode 100644
index 0000000..982eda4
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/floating-point.json
@@ -0,0 +1,138 @@
+[
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OTHER_ASSISTS.AVX_STORE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_FP_256.PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of GSSE-256 Computational FP single precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of AVX-256 Computational FP double precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/frontend.json b/pmu-events/arch/x86/jaketown/frontend.json
new file mode 100644
index 0000000..1b7b1dd
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/frontend.json
@@ -0,0 +1,305 @@
+[
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "IDQ.EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.  See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more information.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled.  In the ideal case 4 uops can be delivered each cycle.  The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them.  This event is used in determining the front-end bound category of the top-down pipeline slots characterization.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline.  It excludes cycles when the back-end cannot  accept new micro-ops.  The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB_FILL.OTHER_CANCEL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_GE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when 1 or more uops were delivered to the by the front end.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "DSB_FILL.ALL_CANCEL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/jkt-metrics.json b/pmu-events/arch/x86/jaketown/jkt-metrics.json
new file mode 100644
index 0000000..fd7d7c4
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -0,0 +1,140 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/jaketown/memory.json b/pmu-events/arch/x86/jaketown/memory.json
new file mode 100644
index 0000000..27e6364
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/memory.json
@@ -0,0 +1,422 @@
+[
+    {
+        "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from memory disambiguation, external snoops, or cross SMT-HW-thread snoop (stores) hitting load buffers.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 4 .",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Loads with latency value being above 8.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Loads with latency value being above 16.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Loads with latency value being above 32.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Loads with latency value being above 64.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Loads with latency value being above 128.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Loads with latency value being above 256.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Loads with latency value being above 512.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "Counter": "3",
+        "UMask": "0x2",
+        "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only. (Precise Event - PEBS).",
+        "PRECISE_STORE": "1",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f820004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc00004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67fc00001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data returned from remote & local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f820001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc00001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC  and the data returned from remote & local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67fc00010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  and the data returned from remote & local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  and the data returned from local dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x67f800010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads  that miss the LLC  and the data returned from remote dram",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x87f820010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  and the data forwarded from remote cache",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x107fc00010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC  the data is found in M state in remote cache and forwarded from there",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fffc20080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x600400077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DEMAND_MLC_PREF_READS.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all local dram accesses for all demand and L2 prefetches. LLC prefetches are excluded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FFFC20077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DEMAND_MLC_PREF_READS.LLC_MISS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts all LLC misses for all demand and L2 prefetches. LLC prefetches are excluded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x187FC20077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DEMAND_MLC_PREF_READS.LLC_MISS.REMOTE_HITM_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts all remote cache-to-cache transfers (includes HITM and HIT-Forward) for all demand and L2 prefetches. LLC prefetches are excluded.",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/other.json b/pmu-events/arch/x86/jaketown/other.json
new file mode 100644
index 0000000..64b195b
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/other.json
@@ -0,0 +1,58 @@
+[
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INSTS_WRITTEN_TO_IQ.INSTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Valid instructions written to IQ per cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPL_CYCLES.RING0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPL_CYCLES.RING123",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HW_PRE_REQ.DL1_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/pipeline.json b/pmu-events/arch/x86/jaketown/pipeline.json
new file mode 100644
index 0000000..8a597e4
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/pipeline.json
@@ -0,0 +1,1220 @@
+[
+    {
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 3"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stall cycles because IQ is full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.",
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least one slow LEA uop being allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "PARTIAL_RAT_STALLS.MUL_SINGLE_UOP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Multiply packed/scalar single precision uops allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the cycles of stall due to lack of load buffers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS2.BOB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Allocator is stalled if BOB is full and new branch needs it.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count cases of saving new LBR.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of micro-ops retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Actually retired uops.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle.  This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect mispredicted near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired instructions experiencing ITLB misses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divider is busy executing divide operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of the divide operations executed.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "ARITH.FPU_DIV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Divide operations executed.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops dispatched per thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops dispatched from any thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+        "CounterMask": "2",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+        "CounterMask": "6",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store.  See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual.  The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "LD_BLOCKS.ALL_BLOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K.  This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline.  The enhanced address check typically has a performance penalty of 5 cycles.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "AGU_BYPASS_CANCEL.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired. (Precise Event - PEBS).",
+        "TakenAlone": "1",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls2 control structures full for physical registers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with either free list is empty.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "RESOURCE_STALLS.MEM_RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "RESOURCE_STALLS.OOO_RSRC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "RESOURCE_STALLS2.OOO_RSRC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls out of order resources full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "RESOURCE_STALLS.LB_SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to load or store buffers all being in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel? 64 and IA-32 Architectures Optimization Reference Manual.",
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EdgeDetect": "1",
+        "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/jaketown/uncore-cache.json b/pmu-events/arch/x86/jaketown/uncore-cache.json
new file mode 100644
index 0000000..3fa61d9
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/uncore-cache.json
@@ -0,0 +1,210 @@
+[
+    {
+        "BriefDescription": "Uncore cache clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_C_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+        "Counter": "0,1",
+        "EventCode": "0x34",
+        "EventName": "UNC_C_LLC_LOOKUP.ANY",
+        "Filter": "filter_state=0x1",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x11",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+        "Counter": "0,1",
+        "EventCode": "0x37",
+        "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode.demand",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.DATA_READ",
+        "Filter": "filter_opc=0x182",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses - Uncacheable reads. Derived from unc_c_tor_inserts.miss_opcode.uncacheable",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.UNCACHEABLE",
+        "Filter": "filter_opc=0x187",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe allocating writes that miss LLC - DDIO misses. Derived from unc_c_tor_inserts.miss_opcode.ddio_miss",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.PCIE_WRITE",
+        "Filter": "filter_opc=0x19c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "LLC misses for ItoM writes (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.miss_opcode.itom_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.ITOM_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode.streaming_full",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_FULL",
+        "Filter": "filter_opc=0x18c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode.streaming_partial",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+        "Filter": "filter_opc=0x18d",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Partial PCIe reads. Derived from unc_c_tor_inserts.opcode.pcie_partial",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_PARTIAL_READ",
+        "Filter": "filter_opc=0x195",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe allocating writes that hit in LLC (DDIO hits). Derived from unc_c_tor_inserts.opcode.ddio_hit",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_WRITE",
+        "Filter": "filter_opc=0x19c",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode.pcie_read_current",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_READ",
+        "Filter": "filter_opc=0x19e",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "ItoM write hits (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.opcode.itom_write_hit",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.ITOM_WRITE",
+        "Filter": "filter_opc=0x1c8",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe non-snoop reads. Derived from unc_c_tor_inserts.opcode.pcie_read",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_READ",
+        "Filter": "filter_opc=0x1e4",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe non-snoop writes (partial). Derived from unc_c_tor_inserts.opcode.pcie_partial_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+        "Filter": "filter_opc=0x1e5",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "PCIe non-snoop writes (full line). Derived from unc_c_tor_inserts.opcode.pcie_full_write",
+        "Counter": "0,1",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.PCIE_NS_WRITE",
+        "Filter": "filter_opc=0x1e6",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x1",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.MISS_ALL",
+        "Filter": "filter_opc=0x182",
+        "MetricExpr": "(UNC_C_TOR_OCCUPANCY.MISS_ALL / UNC_C_CLOCKTICKS) * 100.",
+        "MetricName": "tor_occupancy.miss_all %",
+        "PerPkg": "1",
+        "UMask": "0xa",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode.llc_data_read",
+        "EventCode": "0x36",
+        "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "CBO"
+    },
+    {
+        "BriefDescription": "read requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.READS",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "HA"
+    },
+    {
+        "BriefDescription": "write requests to home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_H_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "UMask": "0xc",
+        "Unit": "HA"
+    }
+]
diff --git a/pmu-events/arch/x86/jaketown/uncore-interconnect.json b/pmu-events/arch/x86/jaketown/uncore-interconnect.json
new file mode 100644
index 0000000..1b53c0e
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/uncore-interconnect.json
@@ -0,0 +1,48 @@
+[
+    {
+        "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x14",
+        "EventName": "UNC_Q_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Cycles where receiving QPI link is in half-width mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x10",
+        "EventName": "UNC_Q_RxL0P_POWER_CYCLES",
+        "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+        "MetricName": "rxl0p_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Cycles where transmitting QPI link is in half-width mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_Q_TxL0P_POWER_CYCLES",
+        "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+        "MetricName": "txl0p_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of data flits transmitted ",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_Q_TxL_FLITS_G0.DATA",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x2",
+        "Unit": "QPI LL"
+    },
+    {
+        "BriefDescription": "Number of non data (control) flits transmitted ",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA",
+        "PerPkg": "1",
+        "ScaleUnit": "8Bytes",
+        "UMask": "0x4",
+        "Unit": "QPI LL"
+    }
+]
diff --git a/pmu-events/arch/x86/jaketown/uncore-memory.json b/pmu-events/arch/x86/jaketown/uncore-memory.json
new file mode 100644
index 0000000..8551ceb
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/uncore-memory.json
@@ -0,0 +1,82 @@
+[
+    {
+        "BriefDescription": "Memory page activates",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_M_ACT_COUNT",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "UMask": "0xc",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_POWER_CHANNEL_PPD",
+        "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_channel_ppd %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+        "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_critical_throttle_cycles %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles Memory is in self refresh power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x43",
+        "EventName": "UNC_M_POWER_SELF_REFRESH",
+        "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_self_refresh %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory page conflicts",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Occupancy counter for memory read queue",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_M_RPQ_OCCUPANCY",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    }
+]
diff --git a/pmu-events/arch/x86/jaketown/uncore-power.json b/pmu-events/arch/x86/jaketown/uncore-power.json
new file mode 100644
index 0000000..16034bf
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/uncore-power.json
@@ -0,0 +1,273 @@
+[
+    {
+        "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_BAND0_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band0_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_BAND1_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band1_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_BAND2_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band2_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_BAND3_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band3_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band0_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transistioned to a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band1_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band2_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter.  (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS",
+        "Filter": "edge=1",
+        "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_band3_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c0 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c3 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6.  It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
+        "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "power_state_occupancy.cores_c6 %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode.  This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xa",
+        "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+        "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "prochot_external_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_limit_thermal_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x6",
+        "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_os_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5",
+        "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_power_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x7",
+        "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_max_current_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Cycles spent changing Frequency",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+        "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_trans_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
+        "Filter": "filter_band0=1200",
+        "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_1200mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
+        "Filter": "filter_band1=2000",
+        "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_2000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
+        "Filter": "filter_band2=3000",
+        "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_3000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
+        "Filter": "filter_band3=4000",
+        "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_4000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xb",
+        "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band0=1200",
+        "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_1200mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xc",
+        "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band1=2000",
+        "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_2000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xd",
+        "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band2=4000",
+        "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_3000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xe",
+        "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
+        "Filter": "edge=1,filter_band3=4000",
+        "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+        "MetricName": "freq_ge_4000mhz_cycles %",
+        "PerPkg": "1",
+        "Unit": "PCU"
+    }
+]
diff --git a/pmu-events/arch/x86/jaketown/virtual-memory.json b/pmu-events/arch/x86/jaketown/virtual-memory.json
new file mode 100644
index 0000000..a654ab7
--- /dev/null
+++ b/pmu-events/arch/x86/jaketown/virtual-memory.json
@@ -0,0 +1,149 @@
+[
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "STLB flush attempts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/knightslanding/cache.json b/pmu-events/arch/x86/knightslanding/cache.json
new file mode 100644
index 0000000..88ba599
--- /dev/null
+++ b/pmu-events/arch/x86/knightslanding/cache.json
@@ -0,0 +1,2305 @@
+[
+    {
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "L2_REQUESTS_REJECT.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of MEC requests from the L2Q that reference a cache line (cacheable requests) exlcuding SW prefetches filling only to L2 cache and L1 evictions (automatically exlcudes L2HWP, UC, WC) that were rejected - Multiple repeated rejects should be counted multiple times"
+    },
+    {
+        "EventCode": "0x31",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "CORE_REJECT_L2Q.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of MEC requests that were not accepted into the L2Q because of any L2  queue reject condition. There is no concept of at-ret here. It might include requests due to instructions in the speculative path."
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "L2_REQUESTS.REFERENCE",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the total number of L2 cache references."
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "L2_REQUESTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of L2 cache misses"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses. ",
+        "EventCode": "0x86",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses. "
+    },
+    {
+        "PublicDescription": "This event counts the number of load micro-ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted. ",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MEM_UOPS_RETIRED.L1_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of load micro-ops retired that miss in L1 D cache"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "MEM_UOPS_RETIRED.L2_HIT_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of load micro-ops retired that hit in the L2",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "MEM_UOPS_RETIRED.L2_MISS_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of load micro-ops retired that miss in the L2",
+        "Data_LA": "1"
+    },
+    {
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "MEM_UOPS_RETIRED.UTLB_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of load micro-ops retired that caused micro TLB miss"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "MEM_UOPS_RETIRED.HITM",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the loads retired that get the data from the other core in the same tile in M state",
+        "Data_LA": "1"
+    },
+    {
+        "PublicDescription": "This event counts the number of load micro-ops retired.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all the load micro-ops retired"
+    },
+    {
+        "PublicDescription": "This event counts the number of store micro-ops retired.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all the store micro-ops retired"
+    },
+    {
+        "EventCode": "0xB7",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the matrix events specified by MSR_OFFCORE_RESPx"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x40000032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x10004032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x08004032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x10000832f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x08000832f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00000132f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000003091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000403091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800403091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000083091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800083091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000013091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000008000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000408000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800408000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000088000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800088000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000018000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000014800 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts all streaming stores (WC and should be programmed on PMC1) that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000014000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial streaming stores (WC and should be programmed on PMC1) that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000002000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000402000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800402000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000082000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800082000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000012000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000001000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000401000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800401000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000081000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800081000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000011000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010800 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Full streaming stores (WC and should be programmed on PMC1) that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000020020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that provides no supplier details",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000400001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800400001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000080001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0800080001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_E_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for any response",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002001000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002002000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002008000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002003091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00020032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0002000070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_M",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in M state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004001000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004002000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004008000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004003091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00040032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0004000070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_E",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in E state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008001000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008002000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008008000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008003091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0008000044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00080032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_S",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010001000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010002000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010008000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010003091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00100032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0010000070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_F",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in F state ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800181000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800182000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800188000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800183091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x18001832f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800180070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800401000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800402000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800408000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800403091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x18004032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1800400070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/knightslanding/frontend.json b/pmu-events/arch/x86/knightslanding/frontend.json
new file mode 100644
index 0000000..6d38636
--- /dev/null
+++ b/pmu-events/arch/x86/knightslanding/frontend.json
@@ -0,0 +1,34 @@
+[
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "ICACHE.ACCESSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all instruction fetches, including uncacheable fetches."
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all instruction fetches that hit the instruction cache."
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all instruction fetches that miss the instruction cache or produce memory requests. An instruction fetch miss is counted only once and not once for every cycle it is outstanding."
+    },
+    {
+        "EventCode": "0xE7",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MS_DECODED.MS_ENTRY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times the MSROM starts a flow of uops."
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/knightslanding/memory.json b/pmu-events/arch/x86/knightslanding/memory.json
new file mode 100644
index 0000000..7006525
--- /dev/null
+++ b/pmu-events/arch/x86/knightslanding/memory.json
@@ -0,0 +1,1110 @@
+[
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times the machine clears due to memory ordering hazards"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x01004032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00802032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x01010032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x00808032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100403091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080203091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101003091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080803091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100408000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080208000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101008000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080808000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100402000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080202000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101002000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080802000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100401000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080201000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101001000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080801000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_FAR",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_NEAR",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_FAR",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_NEAR",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000020080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from any NON_DRAM system address. This includes MMIO transactions",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2000020020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from any NON_DRAM system address. This includes MMIO transactions",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0100400001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080200001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0101000001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_FAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Far. ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080800001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_NEAR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Local.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600100 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM",
+        "MSRIndex": "0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180601000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180608000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180603091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x01806032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0180600070 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from MCDRAM (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800001 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800002 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800004 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800020 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800040 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800080 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800200 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800400 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181801000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181802000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181808000 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181803091 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800022 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0181800044 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x01818032f7 ",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any Read request  that accounts for responses from DDR (local and far)",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/knightslanding/pipeline.json b/pmu-events/arch/x86/knightslanding/pipeline.json
new file mode 100644
index 0000000..bb5494c
--- /dev/null
+++ b/pmu-events/arch/x86/knightslanding/pipeline.json
@@ -0,0 +1,435 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x7e",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of branch instructions retired that were conditional jumps."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xfe",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of branch instructions retired that were conditional jumps and predicted taken."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xf9",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near CALL branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xfd",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near relative CALL branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xfb",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xf7",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near RET branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xeb",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of branch instructions retired that were near indirect CALL or near indirect JMP."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xbf",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of far branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0x7e",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted branch instructions retired that were conditional jumps."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xfe",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted branch instructions retired that were conditional jumps and predicted taken."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xfb",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xf7",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xeb",
+        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted branch instructions retired that were near indirect CALL or near indirect JMP."
+    },
+    {
+        "PublicDescription": "This event counts the number of micro-ops retired that were supplied from MSROM.",
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.MS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of micro-ops retired that are from the complex flows issued by the micro-sequencer (MS)."
+    },
+    {
+        "PublicDescription": "This event counts the number of micro-ops (uops) retired. The processor decodes complex macro instructions into a sequence of simpler uops. Most instructions are composed of one or two uops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. ",
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of micro-ops retired"
+    },
+    {
+        "PublicDescription": "This event counts the number of scalar SSE, AVX, AVX2, AVX-512 micro-ops retired (floating point, integer and store) except for loads (memory-to-register mov-type micro ops), division, sqrt.",
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "UOPS_RETIRED.SCALAR_SIMD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of scalar SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts scalar SSE, AVX, AVX2, AVX-512 micro-ops except for loads (memory-to-register mov-type micro ops), division, sqrt."
+    },
+    {
+        "PublicDescription": "This event counts the number of packed vector SSE, AVX, AVX2, and AVX-512 micro-ops retired (floating point, integer and store) except for loads (memory-to-register mov-type micro-ops), packed byte and word multiplies.",
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "UOPS_RETIRED.PACKED_SIMD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of vector SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts packed SSE, AVX, AVX2, AVX-512 micro-ops (both floating point and integer) except for loads (memory-to-register mov-type micro-ops), packed byte and word multiplies."
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times that the machine clears due to program modifying data within 1K of a recently fetched code page"
+    },
+    {
+        "PublicDescription": "This event counts the number of times that the pipeline stalled due to FP operations needing assists.",
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of floating operations retired that required microcode assists"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "MACHINE_CLEARS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all nukes"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "NO_ALLOC_CYCLES.ROB_FULL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and the ROB is full"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted branch to retire.",
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "NO_ALLOC_CYCLES.MISPREDICTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and the alloc pipe is stalled waiting for a mispredicted branch to retire."
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "NO_ALLOC_CYCLES.RAT_STALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted.  "
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles when no uops are allocated, the instruction queue is empty and the alloc pipe is stalled waiting for instructions to be fetched.",
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x90",
+        "EventName": "NO_ALLOC_CYCLES.NOT_DELIVERED",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated, the IQ is empty, and no other condition is blocking allocation."
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x7f",
+        "EventName": "NO_ALLOC_CYCLES.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the total number of core cycles when no micro-ops are allocated for any reason."
+    },
+    {
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "RS_FULL_STALL.MEC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of core cycles when allocation pipeline is stalled and is waiting for a free MEC reservation station entry."
+    },
+    {
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x1f",
+        "EventName": "RS_FULL_STALL.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full. "
+    },
+    {
+        "EventCode": "0xC0",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the total number of instructions retired"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the divider is busy. More specifically cycles when the divide unit is unable to accept a new divide uop because it is busy processing a previously dispatched uop. The cycles will be counted irrespective of whether or not another divide uop is waiting to enter the divide unit (from the RS). This event counts integer divides, x87 divides, divss, divsd, sqrtss, sqrtsd event and does not count vector divides.",
+        "EventCode": "0xCD",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CYCLES_DIV_BUSY.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles the number of core cycles when divider is busy.  Does not imply a stall waiting for the divider.  "
+    },
+    {
+        "PublicDescription": "This event counts the number of instructions that retire.  For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires.  The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Fixed Counter: Counts the number of instructions retired"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of unhalted core clock cycles"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of unhalted reference clock cycles"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter\r\n",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BACLEARS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times the front end resteers for any branch as a result of another branch handling mechanism in the front end."
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "BACLEARS.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times the front end resteers for RET branches as a result of another branch handling mechanism in the front end."
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "BACLEARS.COND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times the front end resteers for conditional branches as a result of another branch handling mechanism in the front end."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "RECYCLEQ.LD_BLOCK_ST_FORWARD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store ",
+        "Data_LA": "1"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "RECYCLEQ.LD_BLOCK_STD_NOTREADY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address overlaps with a store whose data is not ready"
+    },
+    {
+        "PublicDescription": "This event counts the number of retired store that experienced a cache line boundary split(Precise Event). Note that each spilt should be counted only once.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "RECYCLEQ.ST_SPLITS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of occurences a retired store that is a cache line split. Each split should be counted only once."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "RECYCLEQ.LD_SPLITS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of occurences a retired load that is a cache line split. Each split should be counted only once.",
+        "Data_LA": "1"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "RECYCLEQ.LOCK",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all the retired locked loads. It does not include stores because we would double count if we count stores"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "RECYCLEQ.STA_FULL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the store micro-ops retired that were pushed in the rehad queue because the store address buffer is full"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "RECYCLEQ.ANY_LD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts any retired load that was pushed into the recycle queue for any reason."
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "RECYCLEQ.ANY_ST",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts any retired store that was pushed into the recycle queue for any reason."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xf9",
+        "EventName": "BR_MISP_RETIRED.CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near CALL branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xfd",
+        "EventName": "BR_MISP_RETIRED.REL_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near relative CALL branch instructions retired."
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xbf",
+        "EventName": "BR_MISP_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted far branch instructions retired."
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/knightslanding/uncore-memory.json b/pmu-events/arch/x86/knightslanding/uncore-memory.json
new file mode 100644
index 0000000..e3bcd86
--- /dev/null
+++ b/pmu-events/arch/x86/knightslanding/uncore-memory.json
@@ -0,0 +1,42 @@
+[
+    {
+        "BriefDescription": "ddr bandwidth read (CPU traffic only) (MB/sec). ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_CAS_COUNT.RD",
+        "PerPkg": "1",
+        "ScaleUnit": "6.4e-05MiB",
+        "UMask": "0x01",
+        "Unit": "imc"
+    },
+    {
+        "BriefDescription": "ddr bandwidth write (CPU traffic only) (MB/sec). ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_CAS_COUNT.WR",
+        "PerPkg": "1",
+        "ScaleUnit": "6.4e-05MiB",
+        "UMask": "0x02",
+        "Unit": "imc"
+    },
+    {
+        "BriefDescription": "mcdram bandwidth read (CPU traffic only) (MB/sec). ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x01",
+        "EventName": "UNC_E_RPQ_INSERTS",
+        "PerPkg": "1",
+        "ScaleUnit": "6.4e-05MiB",
+        "UMask": "0x01",
+        "Unit": "edc_eclk"
+    },
+    {
+        "BriefDescription": "mcdram bandwidth write (CPU traffic only) (MB/sec). ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x02",
+        "EventName": "UNC_E_WPQ_INSERTS",
+        "PerPkg": "1",
+        "ScaleUnit": "6.4e-05MiB",
+        "UMask": "0x01",
+        "Unit": "edc_eclk"
+    }
+]
diff --git a/pmu-events/arch/x86/knightslanding/virtual-memory.json b/pmu-events/arch/x86/knightslanding/virtual-memory.json
new file mode 100644
index 0000000..f315945
--- /dev/null
+++ b/pmu-events/arch/x86/knightslanding/virtual-memory.json
@@ -0,0 +1,65 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of load micro-ops retired that cause a DTLB miss",
+        "Data_LA": "1"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_WALKS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total D-side page walks that are completed or started. The page walks started in the speculative path will also be counted",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the total number of core cycles for all the D-side page walks. The cycles for page walks started in speculative path will also be included."
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_WALKS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total I-side page walks that are completed.",
+        "EdgeDetect": "1"
+    },
+    {
+        "PublicDescription": "This event counts every cycle when an I-side (walks due to an instruction fetch) page walk is in progress. ",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the total number of core cycles for all the I-side page walks. The cycles for page walks started in speculative path will also be included."
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.WALKS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total page walks that are completed (I-side and D-side)",
+        "EdgeDetect": "1"
+    },
+    {
+        "PublicDescription": "This event counts every cycle when a data (D) page walk or instruction (I) page walk is in progress.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the total number of core cycles for all the page walks. The cycles for page walks started in speculative path will also be included."
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/mapfile.csv b/pmu-events/arch/x86/mapfile.csv
new file mode 100644
index 0000000..7e3cce3
--- /dev/null
+++ b/pmu-events/arch/x86/mapfile.csv
@@ -0,0 +1,34 @@
+Family-model,Version,Filename,EventType
+GenuineIntel-6-56,v5,broadwellde,core
+GenuineIntel-6-3D,v17,broadwell,core
+GenuineIntel-6-47,v17,broadwell,core
+GenuineIntel-6-4F,v10,broadwellx,core
+GenuineIntel-6-1C,v4,bonnell,core
+GenuineIntel-6-26,v4,bonnell,core
+GenuineIntel-6-27,v4,bonnell,core
+GenuineIntel-6-36,v4,bonnell,core
+GenuineIntel-6-35,v4,bonnell,core
+GenuineIntel-6-5C,v8,goldmont,core
+GenuineIntel-6-7A,v1,goldmontplus,core
+GenuineIntel-6-3C,v24,haswell,core
+GenuineIntel-6-45,v24,haswell,core
+GenuineIntel-6-46,v24,haswell,core
+GenuineIntel-6-3F,v17,haswellx,core
+GenuineIntel-6-3A,v18,ivybridge,core
+GenuineIntel-6-3E,v19,ivytown,core
+GenuineIntel-6-2D,v20,jaketown,core
+GenuineIntel-6-57,v9,knightslanding,core
+GenuineIntel-6-85,v9,knightslanding,core
+GenuineIntel-6-1E,v2,nehalemep,core
+GenuineIntel-6-1F,v2,nehalemep,core
+GenuineIntel-6-1A,v2,nehalemep,core
+GenuineIntel-6-2E,v2,nehalemex,core
+GenuineIntel-6-[4589]E,v24,skylake,core
+GenuineIntel-6-37,v13,silvermont,core
+GenuineIntel-6-4D,v13,silvermont,core
+GenuineIntel-6-4C,v13,silvermont,core
+GenuineIntel-6-2A,v15,sandybridge,core
+GenuineIntel-6-2C,v2,westmereep-dp,core
+GenuineIntel-6-25,v2,westmereep-sp,core
+GenuineIntel-6-2F,v2,westmereex,core
+GenuineIntel-6-55,v1,skylakex,core
diff --git a/pmu-events/arch/x86/nehalemep/cache.json b/pmu-events/arch/x86/nehalemep/cache.json
new file mode 100644
index 0000000..a11029e
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/cache.json
@@ -0,0 +1,3229 @@
+[
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "CACHE_LOCK_CYCLES.L1D",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D locked"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D and L2 locked"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D.M_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines replaced in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D.M_REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines allocated in the M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D.M_SNOOP_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D snoop eviction of cache lines in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D.REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache lines allocated"
+    },
+    {
+        "EventCode": "0x43",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_ALL_REF.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All references to the L1 data cache"
+    },
+    {
+        "EventCode": "0x43",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_ALL_REF.CACHEABLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cacheable reads and writes"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_CACHE_LD.E_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in E state"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_LD.I_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in I state (misses)"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE_LD.M_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in M state"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0xf",
+        "EventName": "L1D_CACHE_LD.MESI",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache reads"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_CACHE_LD.S_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in S state"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_CACHE_LOCK.E_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load locks in E state"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_LOCK.HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load lock hits"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE_LOCK.M_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load locks in M state"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_CACHE_LOCK.S_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load locks in S state"
+    },
+    {
+        "EventCode": "0x53",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x52",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x41",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_CACHE_ST.E_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache stores in E state"
+    },
+    {
+        "EventCode": "0x41",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE_ST.M_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache stores in M state"
+    },
+    {
+        "EventCode": "0x41",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_CACHE_ST.S_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache stores in S state"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_PREFETCH.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch misses"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_PREFETCH.REQUESTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_PREFETCH.TRIGGERS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests triggered"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D_WB_L2.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in E state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_WB_L2.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D_WB_L2.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in M state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L1D_WB_L2.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L1 writebacks to L2"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_WB_L2.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_DATA_RQSTS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data prefetches"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the S state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines alloacated"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the E state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the S state"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_LINES_OUT.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.IFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.IFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.IFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.LD_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_RQSTS.LD_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.LOADS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xaa",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PREFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PREFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.PREFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 prefetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.RFOS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO requests"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANSACTIONS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANSACTIONS.FILL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 fill transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANSACTIONS.IFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANSACTIONS.L1D_WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D writeback to L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANSACTIONS.LOAD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 Load transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANSACTIONS.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANSACTIONS.RFO",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANSACTIONS.WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 writeback to LLC transactions"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_WRITE.LOCK.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in E state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe0",
+        "EventName": "L2_WRITE.LOCK.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_WRITE.LOCK.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_WRITE.LOCK.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_WRITE.LOCK.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_WRITE.LOCK.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in S state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "L2_WRITE.RFO.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_WRITE.RFO.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_WRITE.RFO.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_WRITE.RFO.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_WRITE.RFO.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in S state"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Longest latency cache miss"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Longest latency cache reference"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_INST_RETIRED.LOADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_INST_RETIRED.STORES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_UNCORE_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Load instructions retired that HIT modified data in sibling core (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Load instructions retired remote cache HIT data source (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_UNCORE_RETIRED.REMOTE_DRAM",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_UNCORE_RETIRED.UNCACHEABLE",
+        "SampleAfterValue": "4000",
+        "BriefDescription": "Load instructions retired IO (Precise Event)"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore L1 data cache writebacks"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests blocked due to Super Queue full"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue lock splits across a cache line"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "STORE_BLOCKS.AT_RET",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads delayed with at-Retirement block code"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "STORE_BLOCKS.L1D_BLOCK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cacheable loads delayed with L1D block code"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x0",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x400",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100",
+        "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1000",
+        "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5",
+        "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x800",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50",
+        "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "500",
+        "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5000",
+        "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "3",
+        "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50000",
+        "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x1000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20",
+        "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "200",
+        "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x2000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10",
+        "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F11",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF11",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x111",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x211",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x411",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x711",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4711",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F44",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF44",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x144",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x244",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x444",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x744",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4744",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7FFF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFFFF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x80FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x47FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x18FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x38FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x10FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F22",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF22",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x122",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x222",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x422",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x722",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4722",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F08",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF08",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore writebacks",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x108",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x408",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x708",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4708",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F77",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF77",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code or data read requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x177",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x277",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x477",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x777",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4777",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F33",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any cache_dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF33",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any location",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x133",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x233",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x433",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x733",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4733",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F03",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF03",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x103",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x203",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x403",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x703",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4703",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F01",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF01",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x101",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x201",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x401",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x701",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4701",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F04",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF04",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x104",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x204",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x404",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x704",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4704",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F02",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF02",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x102",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x202",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x402",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x702",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4702",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F80",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF80",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore other requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x180",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x280",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x480",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x780",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4780",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F30",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF30",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x130",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x230",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x430",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x730",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4730",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F10",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF10",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x110",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x210",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x410",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x710",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4710",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F40",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF40",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x140",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x240",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x440",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x740",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4740",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F20",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF20",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x120",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x220",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x420",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x720",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4720",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F70",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF70",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x170",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x270",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x470",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x770",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4770",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemep/floating-point.json b/pmu-events/arch/x86/nehalemep/floating-point.json
new file mode 100644
index 0000000..7d2f71a
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/floating-point.json
@@ -0,0 +1,229 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ASSIST.ALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.INPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.OUTPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_COMP_OPS_EXE.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MMX Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP double precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE and SSE2 FP Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP packed Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP scalar Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP single precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE2 integer Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Computational floating-point operations executed"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "FP_MMX_TRANS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All Floating Point to and from MMX transitions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_MMX_TRANS.TO_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from MMX to Floating Point instructions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_MMX_TRANS.TO_MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from Floating Point to MMX instructions"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_128.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer pack operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_128.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer arithmetic operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer logical operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_128.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer multiply operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_128.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shift operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_128.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer unpack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_64.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit pack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_64.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit logical operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_64.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_64.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shift operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_64.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit unpack operations"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemep/frontend.json b/pmu-events/arch/x86/nehalemep/frontend.json
new file mode 100644
index 0000000..e5e21e0
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/frontend.json
@@ -0,0 +1,26 @@
+[
+    {
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions decoded"
+    },
+    {
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused instructions decoded"
+    },
+    {
+        "EventCode": "0x19",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TWO_UOP_INSTS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Two Uop instructions decoded"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemep/memory.json b/pmu-events/arch/x86/nehalemep/memory.json
new file mode 100644
index 0000000..f914a45
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/memory.json
@@ -0,0 +1,739 @@
+[
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x60FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF8FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x40FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x20FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any LLC miss",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemep/other.json b/pmu-events/arch/x86/nehalemep/other.json
new file mode 100644
index 0000000..af08606
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/other.json
@@ -0,0 +1,210 @@
+[
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_CLEARS.EARLY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Early Branch Prediciton Unit clears"
+    },
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BPU_CLEARS.LATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Late Branch Prediction Unit clears"
+    },
+    {
+        "EventCode": "0xE5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_MISSED_CALL_RET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch prediction unit missed call or return"
+    },
+    {
+        "EventCode": "0xD5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ES_REG_RENAMES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ES segment renames"
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IO_TRANSACTIONS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "I/O transactions"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1I.CYCLES_STALLED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch stall cycles"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1I.HITS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch hits"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1I.MISSES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch misses"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L1I.READS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I Instruction fetches"
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LARGE_ITLB.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Large ITLB hit"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "LOAD_DISPATCH.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All loads dispatched"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "LOAD_DISPATCH.MOB",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_DISPATCH.RS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched that bypass the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_DISPATCH.RS_DELAYED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from stage 305"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PARTIAL_ADDRESS_ALIAS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "False dependencies due to partial address aliasing"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RAT_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All RAT stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RAT_STALLS.FLAGS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Flag stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RAT_STALLS.REGISTERS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Partial register stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RAT_STALLS.ROB_READ_PORT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB read port stalls cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RAT_STALLS.SCOREBOARD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Scoreboard stall cycles"
+    },
+    {
+        "EventCode": "0x4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "SB_DRAIN.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All Store buffer stall cycles"
+    },
+    {
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SEG_RENAME_STALLS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Segment rename stall cycles"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOP_RESPONSE.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HIT to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOP_RESPONSE.HITE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITE to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOP_RESPONSE.HITM",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITM to snoop"
+    },
+    {
+        "EventCode": "0xF6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SQ_FULL_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue full stall cycles"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemep/pipeline.json b/pmu-events/arch/x86/nehalemep/pipeline.json
new file mode 100644
index 0000000..41006dd
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/pipeline.json
@@ -0,0 +1,881 @@
+[
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.CYCLES_DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles the divider is busy"
+    },
+    {
+        "EventCode": "0x14",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.DIV",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide Operations executed",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ARITH.MUL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations executed"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BACLEAR.BAD_TARGET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted with bad target address"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR.CLEAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted, regardless of cause "
+    },
+    {
+        "EventCode": "0xA7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR_FORCE_IQ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction queue forced BACLEAR"
+    },
+    {
+        "EventCode": "0xE0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch instructions decoded"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_INST_EXEC.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_EXEC.COND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Conditional branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_EXEC.DIRECT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Unconditional branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Unconditional call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_INST_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_INST_EXEC.NON_CALLS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect return branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_EXEC.TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Retired near call instructions (Precise Event)"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_MISP_EXEC.ANY",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_EXEC.COND",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_EXEC.DIRECT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted unconditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_MISP_EXEC.NON_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted return branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_MISP_EXEC.TAKEN",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_P",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total CPU cycles",
+        "CounterMask": "2"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "ILD_STALL.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Any Instruction Length Decoder stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction Queue full stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Length Change Prefix stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ILD_STALL.MRU",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stall cycles due to BPU MRU bypass"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ILD_STALL.REGEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Regen stall cycles"
+    },
+    {
+        "EventCode": "0x18",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_DECODED.DEC0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions that must be decoded by decoder 0"
+    },
+    {
+        "EventCode": "0x1E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles instructions are written to the instruction queue"
+    },
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions written to instruction queue."
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (fixed counter)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "INST_RETIRED.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired MMX instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired floating-point operations (Precise Event)"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load operations conflicting with software prefetches"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xA8",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.INACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD_OVERFLOW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loops that can't stream from the instruction queue"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Cycles machine clear asserted"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEM_ORDER",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RESOURCE_STALLS.FPCW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FPU control word write stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LOAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Load buffer stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS.MXCSR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MXCSR rename stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RESOURCE_STALLS.OTHER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Other Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reservation Station full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.STORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Store buffer stall cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+    },
+    {
+        "EventCode": "0xDB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOP_UNFUSION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uop unfusions due to FP exceptions"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DECODED.ESP_FOLDING",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer instructions decoded"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DECODED.ESP_SYNC",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer sync operations"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops decoded by Microcode Sequencer",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xD1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DECODED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops are decoded",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on any port (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.PORT0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 0"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 0, 1 or 5"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.PORT1",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT2_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 2 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT234_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 2, 3 or 4"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT3_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 3 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT4_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 4 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED.PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 5"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued on any thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops were issued on either thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_ISSUED.FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Fused Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are being retired",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retirement slots used (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemep/virtual-memory.json b/pmu-events/arch/x86/nehalemep/virtual-memory.json
new file mode 100644
index 0000000..0596094
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemep/virtual-memory.json
@@ -0,0 +1,109 @@
+[
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load misses"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss caused by low part of address"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB second level hit"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walks complete"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB first level misses but second level hit"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss page walks"
+    },
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_FLUSH",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB flushes"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISS_RETIRED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss page walks"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/cache.json b/pmu-events/arch/x86/nehalemex/cache.json
new file mode 100644
index 0000000..21a0f8f
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/cache.json
@@ -0,0 +1,3184 @@
+[
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "CACHE_LOCK_CYCLES.L1D",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D locked"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D and L2 locked"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D.M_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines replaced in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D.M_REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines allocated in the M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D.M_SNOOP_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D snoop eviction of cache lines in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D.REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache lines allocated"
+    },
+    {
+        "EventCode": "0x43",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_ALL_REF.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All references to the L1 data cache"
+    },
+    {
+        "EventCode": "0x43",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_ALL_REF.CACHEABLE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cacheable reads and writes"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_CACHE_LD.E_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in E state"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_LD.I_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in I state (misses)"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE_LD.M_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in M state"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0xf",
+        "EventName": "L1D_CACHE_LD.MESI",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache reads"
+    },
+    {
+        "EventCode": "0x40",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_CACHE_LD.S_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache read in S state"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_CACHE_LOCK.E_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load locks in E state"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_LOCK.HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load lock hits"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE_LOCK.M_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load locks in M state"
+    },
+    {
+        "EventCode": "0x42",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_CACHE_LOCK.S_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache load locks in S state"
+    },
+    {
+        "EventCode": "0x53",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x52",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x41",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_CACHE_ST.E_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache stores in E state"
+    },
+    {
+        "EventCode": "0x41",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D_CACHE_ST.M_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache stores in M state"
+    },
+    {
+        "EventCode": "0x41",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_CACHE_ST.S_STATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache stores in S state"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_PREFETCH.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch misses"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_PREFETCH.REQUESTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_PREFETCH.TRIGGERS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests triggered"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D_WB_L2.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in E state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_WB_L2.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D_WB_L2.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in M state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L1D_WB_L2.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L1 writebacks to L2"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_WB_L2.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_DATA_RQSTS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data prefetches"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the S state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines alloacated"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the E state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the S state"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_LINES_OUT.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.IFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.IFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.IFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.LD_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_RQSTS.LD_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.LOADS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xaa",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PREFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PREFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.PREFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 prefetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.RFOS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO requests"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANSACTIONS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANSACTIONS.FILL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 fill transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANSACTIONS.IFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANSACTIONS.L1D_WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D writeback to L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANSACTIONS.LOAD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 Load transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANSACTIONS.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANSACTIONS.RFO",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANSACTIONS.WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 writeback to LLC transactions"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_WRITE.LOCK.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in E state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe0",
+        "EventName": "L2_WRITE.LOCK.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_WRITE.LOCK.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_WRITE.LOCK.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_WRITE.LOCK.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_WRITE.LOCK.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in S state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "L2_WRITE.RFO.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_WRITE.RFO.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_WRITE.RFO.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_WRITE.RFO.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_WRITE.RFO.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in S state"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Longest latency cache miss"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Longest latency cache reference"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_INST_RETIRED.LOADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_INST_RETIRED.STORES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore L1 data cache writebacks"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests blocked due to Super Queue full"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue lock splits across a cache line"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "STORE_BLOCKS.AT_RET",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads delayed with at-Retirement block code"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "STORE_BLOCKS.L1D_BLOCK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cacheable loads delayed with L1D block code"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x0",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x400",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100",
+        "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1000",
+        "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5",
+        "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x800",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50",
+        "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "500",
+        "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5000",
+        "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "3",
+        "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50000",
+        "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x1000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20",
+        "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "200",
+        "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x2000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10",
+        "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F11",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF11",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x111",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x211",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x411",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x711",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4711",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F44",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF44",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x144",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x244",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x444",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x744",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4744",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7FFF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFFFF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x80FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x47FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x18FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x38FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x10FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F22",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF22",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x122",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x222",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x422",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x722",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4722",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F08",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF08",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore writebacks",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x108",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x408",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x708",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4708",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F77",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF77",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code or data read requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x177",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x277",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x477",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x777",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4777",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F33",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any cache_dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF33",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any location",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x133",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x233",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x433",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x733",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4733",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F03",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF03",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x103",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x203",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x403",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x703",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4703",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F01",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF01",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x101",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x201",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x401",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x701",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4701",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F04",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF04",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x104",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x204",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x404",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x704",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4704",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F02",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF02",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x102",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x202",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x402",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x702",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4702",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F80",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF80",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore other requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x180",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x280",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x480",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x780",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4780",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F30",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF30",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x130",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x230",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x430",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x730",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4730",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F10",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF10",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x110",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x210",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x410",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x710",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4710",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F40",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF40",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x140",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x240",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x440",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x740",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4740",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F20",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF20",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x120",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x220",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x420",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x720",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4720",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F70",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF70",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x170",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x270",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x470",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x770",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4770",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/floating-point.json b/pmu-events/arch/x86/nehalemex/floating-point.json
new file mode 100644
index 0000000..7d2f71a
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/floating-point.json
@@ -0,0 +1,229 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ASSIST.ALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.INPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.OUTPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_COMP_OPS_EXE.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MMX Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP double precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE and SSE2 FP Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP packed Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP scalar Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP single precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE2 integer Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Computational floating-point operations executed"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "FP_MMX_TRANS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All Floating Point to and from MMX transitions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_MMX_TRANS.TO_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from MMX to Floating Point instructions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_MMX_TRANS.TO_MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from Floating Point to MMX instructions"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_128.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer pack operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_128.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer arithmetic operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer logical operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_128.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer multiply operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_128.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shift operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_128.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer unpack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_64.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit pack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_64.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit logical operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_64.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_64.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shift operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_64.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit unpack operations"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/frontend.json b/pmu-events/arch/x86/nehalemex/frontend.json
new file mode 100644
index 0000000..e5e21e0
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/frontend.json
@@ -0,0 +1,26 @@
+[
+    {
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions decoded"
+    },
+    {
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused instructions decoded"
+    },
+    {
+        "EventCode": "0x19",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TWO_UOP_INSTS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Two Uop instructions decoded"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/memory.json b/pmu-events/arch/x86/nehalemex/memory.json
new file mode 100644
index 0000000..f914a45
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/memory.json
@@ -0,0 +1,739 @@
+[
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x60FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF8FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x40FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x20FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any LLC miss",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/other.json b/pmu-events/arch/x86/nehalemex/other.json
new file mode 100644
index 0000000..af08606
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/other.json
@@ -0,0 +1,210 @@
+[
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_CLEARS.EARLY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Early Branch Prediciton Unit clears"
+    },
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BPU_CLEARS.LATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Late Branch Prediction Unit clears"
+    },
+    {
+        "EventCode": "0xE5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_MISSED_CALL_RET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch prediction unit missed call or return"
+    },
+    {
+        "EventCode": "0xD5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ES_REG_RENAMES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ES segment renames"
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IO_TRANSACTIONS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "I/O transactions"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1I.CYCLES_STALLED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch stall cycles"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1I.HITS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch hits"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1I.MISSES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch misses"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L1I.READS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I Instruction fetches"
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LARGE_ITLB.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Large ITLB hit"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "LOAD_DISPATCH.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All loads dispatched"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "LOAD_DISPATCH.MOB",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_DISPATCH.RS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched that bypass the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_DISPATCH.RS_DELAYED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from stage 305"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PARTIAL_ADDRESS_ALIAS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "False dependencies due to partial address aliasing"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RAT_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All RAT stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RAT_STALLS.FLAGS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Flag stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RAT_STALLS.REGISTERS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Partial register stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RAT_STALLS.ROB_READ_PORT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB read port stalls cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RAT_STALLS.SCOREBOARD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Scoreboard stall cycles"
+    },
+    {
+        "EventCode": "0x4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "SB_DRAIN.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All Store buffer stall cycles"
+    },
+    {
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SEG_RENAME_STALLS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Segment rename stall cycles"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOP_RESPONSE.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HIT to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOP_RESPONSE.HITE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITE to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOP_RESPONSE.HITM",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITM to snoop"
+    },
+    {
+        "EventCode": "0xF6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SQ_FULL_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue full stall cycles"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/pipeline.json b/pmu-events/arch/x86/nehalemex/pipeline.json
new file mode 100644
index 0000000..41006dd
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/pipeline.json
@@ -0,0 +1,881 @@
+[
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.CYCLES_DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles the divider is busy"
+    },
+    {
+        "EventCode": "0x14",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.DIV",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide Operations executed",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ARITH.MUL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations executed"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BACLEAR.BAD_TARGET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted with bad target address"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR.CLEAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted, regardless of cause "
+    },
+    {
+        "EventCode": "0xA7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR_FORCE_IQ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction queue forced BACLEAR"
+    },
+    {
+        "EventCode": "0xE0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch instructions decoded"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_INST_EXEC.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_EXEC.COND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Conditional branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_EXEC.DIRECT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Unconditional branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Unconditional call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_INST_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_INST_EXEC.NON_CALLS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect return branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_EXEC.TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Retired near call instructions (Precise Event)"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_MISP_EXEC.ANY",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_EXEC.COND",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_EXEC.DIRECT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted unconditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_MISP_EXEC.NON_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted return branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_MISP_EXEC.TAKEN",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_P",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total CPU cycles",
+        "CounterMask": "2"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "ILD_STALL.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Any Instruction Length Decoder stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction Queue full stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Length Change Prefix stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ILD_STALL.MRU",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stall cycles due to BPU MRU bypass"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ILD_STALL.REGEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Regen stall cycles"
+    },
+    {
+        "EventCode": "0x18",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_DECODED.DEC0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions that must be decoded by decoder 0"
+    },
+    {
+        "EventCode": "0x1E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles instructions are written to the instruction queue"
+    },
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions written to instruction queue."
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (fixed counter)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "INST_RETIRED.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired MMX instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired floating-point operations (Precise Event)"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load operations conflicting with software prefetches"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xA8",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.INACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD_OVERFLOW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loops that can't stream from the instruction queue"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Cycles machine clear asserted"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEM_ORDER",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RESOURCE_STALLS.FPCW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FPU control word write stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LOAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Load buffer stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS.MXCSR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MXCSR rename stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RESOURCE_STALLS.OTHER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Other Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reservation Station full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.STORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Store buffer stall cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+    },
+    {
+        "EventCode": "0xDB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOP_UNFUSION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uop unfusions due to FP exceptions"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DECODED.ESP_FOLDING",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer instructions decoded"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DECODED.ESP_SYNC",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer sync operations"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops decoded by Microcode Sequencer",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xD1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DECODED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops are decoded",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on any port (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.PORT0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 0"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 0, 1 or 5"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.PORT1",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT2_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 2 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT234_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 2, 3 or 4"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT3_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 3 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT4_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 4 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED.PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 5"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued on any thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops were issued on either thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_ISSUED.FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Fused Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are being retired",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retirement slots used (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/nehalemex/virtual-memory.json b/pmu-events/arch/x86/nehalemex/virtual-memory.json
new file mode 100644
index 0000000..0596094
--- /dev/null
+++ b/pmu-events/arch/x86/nehalemex/virtual-memory.json
@@ -0,0 +1,109 @@
+[
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load misses"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss caused by low part of address"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB second level hit"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walks complete"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB first level misses but second level hit"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss page walks"
+    },
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_FLUSH",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB flushes"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISS_RETIRED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss page walks"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/cache.json b/pmu-events/arch/x86/sandybridge/cache.json
new file mode 100644
index 0000000..bef73c4
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/cache.json
@@ -0,0 +1,1879 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with locked access.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load uops retired",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of store uops retired.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store uops.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a non-modified state.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired demand loads that missed the  last-level (L3) cache. This means that the load is usually satisfied from memory in a client system or possibly from the remote socket in a server. Demand loads are non speculative load uops.",
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with unknown information as data source in cache serviced the load.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts L1D data line replacements.  Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.  ",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D.ALLOCATED_IN_M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Allocated L1D data cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D.EVICTION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data cache lines in M state evicted due to replacement.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D.ALL_M_REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss oustandings duration in cycles.",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that miss cache lines.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in E state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that access cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_L1D_WB_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_S",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_L1D_WB_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANS.DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANS.RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANS.CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache accesses when fetching instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 or LLC HW prefetches that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANS.L1D_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L1D writebacks that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANS.L2_FILL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 fill requests that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANS.ALL_REQUESTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Transactions accessing L2 pipe.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_IN.I",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in I state filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in S state filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines in E state filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache.  Lines are filled into the L2 cache when there was an L2 miss.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by demand.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by demand.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PF_CLEAN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PF_DIRTY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "L2_LINES_OUT.DIRTY_ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Dirty L2 cache lines filling the L2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed LLC.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Split locks in SQ.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch RFOs that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c03f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "COREWB & ANY_RESPONSE",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x18000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x803c8000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2380408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) data reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3f803c0100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the LLC.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003c0100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10003c0100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003c0100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003c0100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10400",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10800",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts non-temporal stores.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data reads .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand rfo's .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand code reads.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x000105B3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00010122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch prefetch RFOs .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x000107F7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10433",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = DATA_INTO_CORE and RESPONSE = ANY_RESPONSE",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_M.HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_M and SNOOP = HITM",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_RFO and RESPONSE = ANY_RESPONSE",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = ANY_RESPONSE",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = ANY_RESPONSE",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/floating-point.json b/pmu-events/arch/x86/sandybridge/floating-point.json
new file mode 100644
index 0000000..982eda4
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/floating-point.json
@@ -0,0 +1,138 @@
+[
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OTHER_ASSISTS.AVX_STORE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_FP_256.PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of GSSE-256 Computational FP single precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of AVX-256 Computational FP double precision uops issued this cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/frontend.json b/pmu-events/arch/x86/sandybridge/frontend.json
new file mode 100644
index 0000000..1b7b1dd
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/frontend.json
@@ -0,0 +1,305 @@
+[
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "IDQ.EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.  See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more information.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled.  In the ideal case 4 uops can be delivered each cycle.  The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them.  This event is used in determining the front-end bound category of the top-down pipeline slots characterization.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline.  It excludes cycles when the back-end cannot  accept new micro-ops.  The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB_FILL.OTHER_CANCEL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_GE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when 1 or more uops were delivered to the by the front end.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAC",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "DSB_FILL.ALL_CANCEL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/memory.json b/pmu-events/arch/x86/sandybridge/memory.json
new file mode 100644
index 0000000..e6dfa89
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/memory.json
@@ -0,0 +1,445 @@
+[
+    {
+        "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from memory disambiguation, external snoops, or cross SMT-HW-thread snoop (stores) hitting load buffers.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads with latency value being above 4 .",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Loads with latency value being above 8.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Loads with latency value being above 16.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Loads with latency value being above 32.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Loads with latency value being above 64.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Loads with latency value being above 128.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Loads with latency value being above 256.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Loads with latency value being above 512.",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xCD",
+        "Counter": "3",
+        "UMask": "0x2",
+        "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only. (Precise Event - PEBS).",
+        "PRECISE_STORE": "1",
+        "TakenAlone": "1",
+        "CounterHTOff": "3"
+    },
+    {
+        "EventCode": "0xBE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.LLC_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of any page walk that had a miss in LLC. Does not necessary cause a SUSPEND.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400091",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch code reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400090",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch data reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch RFOs that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3004003f7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data writes (RFOs) that miss the LLC and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x300400100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the LLC  and the data returned from dram.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts all data requests (demand/prefetch data reads and demand data writes (RFOs) that miss the LLC  where the data is returned from local DRAM",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6004001b3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts LLC replacements.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "This event counts any requests that miss the LLC where the data was returned from local DRAM",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1f80408fff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_MISS_LOCAL.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = ANY_REQUEST and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x17004001b3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS_LOCAL.ANY_LLC_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = DATA_IN_SOCKET and RESPONSE = LLC_MISS_LOCAL and SNOOP = ANY_LLC_HIT",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1f80400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_MISS_LOCAL.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = DEMAND_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1f80400010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_MISS_LOCAL.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1f80400040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_MISS_LOCAL.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_RFO and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1f80400080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.LLC_MISS_LOCAL.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1f80400200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Offcore": "1",
+        "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.LLC_MISS_LOCAL.DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/other.json b/pmu-events/arch/x86/sandybridge/other.json
new file mode 100644
index 0000000..64b195b
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/other.json
@@ -0,0 +1,58 @@
+[
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INSTS_WRITTEN_TO_IQ.INSTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Valid instructions written to IQ per cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPL_CYCLES.RING0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when the thread is in ring 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "CPL_CYCLES.RING0_TRANS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPL_CYCLES.RING123",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HW_PRE_REQ.DL1_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/pipeline.json b/pmu-events/arch/x86/sandybridge/pipeline.json
new file mode 100644
index 0000000..8a597e4
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/pipeline.json
@@ -0,0 +1,1220 @@
+[
+    {
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 3"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x90",
+        "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd0",
+        "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stall cycles because IQ is full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.",
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least one slow LEA uop being allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "PARTIAL_RAT_STALLS.MUL_SINGLE_UOP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Multiply packed/scalar single precision uops allocated.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the cycles of stall due to lack of load buffers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS2.BOB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Allocator is stalled if BOB is full and new branch needs it.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count cases of saving new LBR.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of micro-ops retired.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Actually retired uops.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle.  This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect mispredicted near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired instructions experiencing ITLB misses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divider is busy executing divide operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of the divide operations executed.",
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "ARITH.FPU_DIV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Divide operations executed.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops dispatched per thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops dispatched from any thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+        "CounterMask": "2",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+        "CounterMask": "6",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store.  See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual.  The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "LD_BLOCKS.ALL_BLOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K.  This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline.  The enhanced address check typically has a performance penalty of 5 cycles.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "AGU_BYPASS_CANCEL.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 0.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 1.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired. (Precise Event - PEBS).",
+        "TakenAlone": "1",
+        "CounterHTOff": "1"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls2 control structures full for physical registers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with either free list is empty.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "RESOURCE_STALLS.MEM_RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "RESOURCE_STALLS.OOO_RSRC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5B",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "RESOURCE_STALLS2.OOO_RSRC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls out of order resources full.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xa",
+        "EventName": "RESOURCE_STALLS.LB_SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to load or store buffers all being in use.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel? 64 and IA-32 Architectures Optimization Reference Manual.",
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EdgeDetect": "1",
+        "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xc3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/snb-metrics.json b/pmu-events/arch/x86/sandybridge/snb-metrics.json
new file mode 100644
index 0000000..fd7d7c4
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -0,0 +1,140 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/sandybridge/uncore.json b/pmu-events/arch/x86/sandybridge/uncore.json
new file mode 100644
index 0000000..42c70ee
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/uncore.json
@@ -0,0 +1,314 @@
+[
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x01",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS",
+    "BriefDescription": "A snoop misses in some processor core.",
+    "PublicDescription": "A snoop misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x02",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL",
+    "BriefDescription": "A snoop invalidates a non-modified line in some processor core.",
+    "PublicDescription": "A snoop invalidates a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x04",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT",
+    "BriefDescription": "A snoop hits a non-modified line in some processor core.",
+    "PublicDescription": "A snoop hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x08",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM",
+    "BriefDescription": "A snoop hits a modified line in some processor core.",
+    "PublicDescription": "A snoop hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x10",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M",
+    "BriefDescription": "A snoop invalidates a modified line in some processor core.",
+    "PublicDescription": "A snoop invalidates a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x20",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER",
+    "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+    "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x40",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER",
+    "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+    "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x80",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER",
+    "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+    "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x01",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.M",
+    "BriefDescription": "LLC lookup request that access cache and found line in M-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x02",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.E",
+    "BriefDescription": "LLC lookup request that access cache and found line in E-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in E-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x04",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.S",
+    "BriefDescription": "LLC lookup request that access cache and found line in S-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x08",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.I",
+    "BriefDescription": "LLC lookup request that access cache and found line in I-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x10",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER",
+    "BriefDescription": "Filter on processor core initiated cacheable read requests.",
+    "PublicDescription": "Filter on processor core initiated cacheable read requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x20",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER",
+    "BriefDescription": "Filter on processor core initiated cacheable write requests.",
+    "PublicDescription": "Filter on processor core initiated cacheable write requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x40",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER",
+    "BriefDescription": "Filter on external snoop requests.",
+    "PublicDescription": "Filter on external snoop requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x80",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER",
+    "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+    "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x81",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+    "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x81",
+    "UMask": "0x20",
+    "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+    "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+    "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x81",
+    "UMask": "0x80",
+    "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS",
+    "BriefDescription": "Counts the number of LLC evictions allocated.",
+    "PublicDescription": "Counts the number of LLC evictions allocated.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x83",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+    "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x84",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+    "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+    "PublicDescription": "Number of requests allocated in Coherency Tracker.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+    "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "Counter": "0,1",
+    "CounterMask": "1",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL",
+    "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "Counter": "0,1",
+    "CounterMask": "10",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "ARB",
+    "EventCode": "0x0",
+    "UMask": "0x01",
+    "EventName": "UNC_CLOCK.SOCKET",
+    "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "Counter": "Fixed",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x06",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ES",
+    "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+    "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/sandybridge/virtual-memory.json b/pmu-events/arch/x86/sandybridge/virtual-memory.json
new file mode 100644
index 0000000..a654ab7
--- /dev/null
+++ b/pmu-events/arch/x86/sandybridge/virtual-memory.json
@@ -0,0 +1,149 @@
+[
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when PMH is busy with page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "STLB flush attempts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/silvermont/cache.json b/pmu-events/arch/x86/silvermont/cache.json
new file mode 100644
index 0000000..82be7d1
--- /dev/null
+++ b/pmu-events/arch/x86/silvermont/cache.json
@@ -0,0 +1,812 @@
+[
+    {
+        "PublicDescription": "This event counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions from the L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims).",
+        "EventCode": "0x30",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "L2_REJECT_XQ.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of request from the L2 that were not accepted into the XQ"
+    },
+    {
+        "PublicDescription": "Counts the number of (demand and L1 prefetchers) core requests rejected by the L2Q due to a full or nearly full w condition which likely indicates back pressure from L2Q.  It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link.  The L2Q may also reject transactions  from a core to insure fairness between cores, or to delay a core?s dirty eviction when the address conflicts incoming external snoops.  (Note that L2 prefetcher requests that are dropped are not counted by this event.)",
+        "EventCode": "0x31",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "CORE_REJECT_L2Q.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of request that were not accepted into the L2Q because the L2Q is FULL."
+    },
+    {
+        "PublicDescription": "This event counts requests originating from the core that references a cache line in the L2 cache.",
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache requests from this core"
+    },
+    {
+        "PublicDescription": "This event counts the total number of L2 cache references and the number of L2 cache misses respectively.",
+        "EventCode": "0x2E",
+        "Counter": "0,1",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache request misses"
+    },
+    {
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss.  Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes.  This will include cycles due to an ITLB miss, ICache miss and other events. \r\n",
+        "EventCode": "0x86",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "REHABQ.LD_BLOCK_ST_FORWARD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked due to store forward restriction"
+    },
+    {
+        "PublicDescription": "This event counts the cases where a forward was technically possible, but did not occur because the store data was not available at the right time.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "REHABQ.LD_BLOCK_STD_NOTREADY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads blocked due to store data not ready"
+    },
+    {
+        "PublicDescription": "This event counts the number of retire stores that experienced cache line boundary splits.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "REHABQ.ST_SPLITS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store uops that split cache line boundary"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of retire loads that experienced cache line boundary splits.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "REHABQ.LD_SPLITS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Load uops that split cache line boundary"
+    },
+    {
+        "PublicDescription": "This event counts the number of retired memory operations with lock semantics. These are either implicit locked instructions such as the XCHG instruction or instructions with an explicit LOCK prefix (0xF0).",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "REHABQ.LOCK",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Uops with lock semantics"
+    },
+    {
+        "PublicDescription": "This event counts the number of retired stores that are delayed because there is not a store address buffer available.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "REHABQ.STA_FULL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Store address buffer full"
+    },
+    {
+        "PublicDescription": "This event counts the number of load uops reissued from Rehabq.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "REHABQ.ANY_LD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Any reissued load uops"
+    },
+    {
+        "PublicDescription": "This event counts the number of store uops reissued from Rehabq.",
+        "EventCode": "0x03",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "REHABQ.ANY_ST",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Any reissued store uops"
+    },
+    {
+        "PublicDescription": "This event counts the number of load ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MEM_UOPS_RETIRED.L1_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads missed L1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load ops retired that hit in the L2.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "MEM_UOPS_RETIRED.L2_HIT_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads hit L2"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load ops retired that miss in the L2.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "MEM_UOPS_RETIRED.L2_MISS_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Loads missed L2"
+    },
+    {
+        "PublicDescription": "This event counts the number of load ops retired that had UTLB miss.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "MEM_UOPS_RETIRED.UTLB_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads missed UTLB"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load ops retired that got data from the other core or from the other module.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "MEM_UOPS_RETIRED.HITM",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cross core or cross module hitm"
+    },
+    {
+        "PublicDescription": "This event counts the number of load ops retired.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x40",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All Loads"
+    },
+    {
+        "PublicDescription": "This event counts the number of store ops retired.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All Stores"
+    },
+    {
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000044",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any code reads (demand & prefetch) that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000044",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any code reads (demand & prefetch) that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000044",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any code reads (demand & prefetch) that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000044",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any code reads (demand & prefetch) that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010044",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any code reads (demand & prefetch) that have any response type.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000022",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any rfo reads (demand & prefetch) that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000022",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any rfo reads (demand & prefetch) that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000022",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any rfo reads (demand & prefetch) that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000022",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any rfo reads (demand & prefetch) that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010022",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any rfo reads (demand & prefetch) that have any response type.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680003091",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data read (demand & prefetch) that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000003091",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data read (demand & prefetch) that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400003091",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data read (demand & prefetch) that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200003091",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data read (demand & prefetch) that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000013091",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any data read (demand & prefetch) that have any response type.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680004800",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts streaming store that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000008008",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400008008",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200008008",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000018008",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts any request that have any response type.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680002000",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts DCU hardware prefetcher data read that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000002000",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts DCU hardware prefetcher data read that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400002000",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts DCU hardware prefetcher data read that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200002000",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts DCU hardware prefetcher data read that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000012000",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts DCU hardware prefetcher data read that have any response type.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000100",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Countsof demand RFO requests to write to partial cache lines that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000080",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand reads of partial cache lines (including UC and WC) that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000040",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads generated by L2 prefetchers that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000040",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads generated by L2 prefetchers that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000040",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts code reads generated by L2 prefetchers that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000020",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts RFO requests generated by L2 prefetchers that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000020",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts RFO requests generated by L2 prefetchers that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000020",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts RFO requests generated by L2 prefetchers that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000020",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts RFO requests generated by L2 prefetchers that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000010",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000010",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000010",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000010",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000008",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts writeback (modified to exclusive) that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0080000008",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts writeback (modified to exclusive) that miss L2 with no details on snoop-related information.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000004",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that are are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000004",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000004",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000004",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010004",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that have any response type.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000002",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch RFOs that are are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000002",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch RFOs that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000002",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch RFOs that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000002",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch RFOs that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000002",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch RFOs that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4000000001",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+        "MSRIndex": "0x1a6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch data read that are are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1680000001",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch data read that miss L2.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1000000001",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch data read that hit in the other module where modified copies were found in other core's L1 cache.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0400000001",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch data read that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0200000001",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch data read that miss L2 with a snoop miss response.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x0000010001",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts demand and DCU prefetch data read that have any response type.",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/silvermont/frontend.json b/pmu-events/arch/x86/silvermont/frontend.json
new file mode 100644
index 0000000..204473b
--- /dev/null
+++ b/pmu-events/arch/x86/silvermont/frontend.json
@@ -0,0 +1,47 @@
+[
+    {
+        "PublicDescription": "This event counts all instruction fetches, not including most uncacheable\r\nfetches.",
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "ICACHE.ACCESSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction fetches"
+    },
+    {
+        "PublicDescription": "This event counts all instruction fetches from the instruction cache.",
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction fetches from Icache"
+    },
+    {
+        "PublicDescription": "This event counts all instruction fetches that miss the Instruction cache or produce memory requests. This includes uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding.",
+        "EventCode": "0x80",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Icache miss"
+    },
+    {
+        "PublicDescription": "Counts the number of times the MSROM starts a flow of UOPS. It does not count every time a UOP is read from the microcode ROM.  The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine.  Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort.  The event will count MSROM startups for UOPS that are speculative, and subsequently cleared by branch mispredict or machine clear.  Background: UOPS are produced by two mechanisms.  Either they are generated by hardware that decodes instructions into UOPS, or they are delivered by a ROM (called the MSROM) that holds UOPS associated with a specific instruction.  MSROM UOPS might also be delivered in response to some condition such as a fault or other exceptional condition.  This event is an excellent mechanism for detecting instructions that require the use of MSROM instructions.",
+        "EventCode": "0xE7",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MS_DECODED.MS_ENTRY",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times entered into a ucode flow in the FEC.  Includes inserted flows due to front-end detected faults or assists.  Speculative count."
+    },
+    {
+        "PublicDescription": "Counts the number of times a decode restriction reduced the decode throughput due to wrong instruction length prediction.",
+        "EventCode": "0xE9",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of times a decode restriction reduced the decode throughput due to wrong instruction length prediction"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/silvermont/memory.json b/pmu-events/arch/x86/silvermont/memory.json
new file mode 100644
index 0000000..d72e09a
--- /dev/null
+++ b/pmu-events/arch/x86/silvermont/memory.json
@@ -0,0 +1,11 @@
+[
+    {
+        "PublicDescription": "This event counts the number of times that pipeline was cleared due to memory ordering issues.",
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Stalls due to Memory ordering"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/silvermont/pipeline.json b/pmu-events/arch/x86/silvermont/pipeline.json
new file mode 100644
index 0000000..7468af9
--- /dev/null
+++ b/pmu-events/arch/x86/silvermont/pipeline.json
@@ -0,0 +1,359 @@
+[
+    {
+        "PEBS": "1",
+        "PublicDescription": "ALL_BRANCHES counts the number of any branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of branch instructions retired..."
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "JCC counts the number of conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x7e",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of JCC branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "TAKEN_JCC counts the number of taken conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xfe",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of taken JCC branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "CALL counts the number of near CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xf9",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near CALL branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "REL_CALL counts the number of near relative CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xfd",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near relative CALL branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "IND_CALL counts the number of near indirect CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xfb",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "RETURN counts the number of near RET branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xf7",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near RET branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "NON_RETURN_IND counts the number of near indirect JMP and near indirect CALL branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xeb",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "FAR counts the number of far branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0xbf",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of far branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "ALL_BRANCHES counts the number of any mispredicted branch instructions retired. This umask is an architecturally defined event. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "JCC counts the number of mispredicted conditional branches (JCC) instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0x7e",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted JCC branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "TAKEN_JCC counts the number of mispredicted taken conditional branch (JCC) instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xfe",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted taken JCC branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "IND_CALL counts the number of mispredicted near indirect CALL branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xfb",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "RETURN counts the number of mispredicted near RET branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xf7",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "NON_RETURN_IND counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.  This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa.  When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1",
+        "UMask": "0xeb",
+        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired"
+    },
+    {
+        "PublicDescription": "This event counts the number of micro-ops retired that were supplied from MSROM.",
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.MS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "MSROM micro-ops retired"
+    },
+    {
+        "PublicDescription": "This event counts the number of micro-ops retired. The processor decodes complex macro instructions into a sequence of simpler micro-ops. Most instructions are composed of one or two micro-ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. In some cases micro-op sequences are fused or whole instructions are fused into one micro-op. See other UOPS_RETIRED events for differentiating retired fused and non-fused micro-ops.",
+        "EventCode": "0xC2",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "UOPS_RETIRED.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Micro-ops retired"
+    },
+    {
+        "PublicDescription": "This event counts the number of times that a program writes to a code section. Self-modifying code causes a severe penalty in all Intel? architecture processors.",
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "PublicDescription": "This event counts the number of times that pipeline stalled due to FP operations needing assists.",
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Stalls due to FP assists"
+    },
+    {
+        "PublicDescription": "Machine clears happen when something happens in the machine that causes the hardware to need to take special care to get the right answer. When such a condition is signaled on an instruction, the front end of the machine is notified that it must restart, so no more instructions will be decoded from the current path.  All instructions \"older\" than this one will be allowed to finish.  This instruction and all \"younger\" instructions must be cleared, since they must not be allowed to complete.  Essentially, the hardware waits until the problematic instruction is the oldest instruction in the machine.  This means all older instructions are retired, and all pending stores (from older instructions) are completed.  Then the new path of instructions from the front end are allowed to start into the machine.  There are many conditions that might cause a machine clear (including the receipt of an interrupt, or a trap or a fault).  All those conditions (including but not limited to MACHINE_CLEARS.MEMORY_ORDERING, MACHINE_CLEARS.SMC, and MACHINE_CLEARS.FP_ASSIST) are captured in the ANY event. In addition, some conditions can be specifically counted (i.e. SMC, MEMORY_ORDERING, FP_ASSIST).  However, the sum of SMC, MEMORY_ORDERING, and FP_ASSIST machine clears will not necessarily equal the number of ANY.",
+        "EventCode": "0xC3",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "MACHINE_CLEARS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts all machine clears"
+    },
+    {
+        "PublicDescription": "Counts the number of cycles when no uops are allocated and the ROB is full (less than 2 entries available).",
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "NO_ALLOC_CYCLES.ROB_FULL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles when no uops are allocated and the ROB is full (less than 2 entries available)"
+    },
+    {
+        "PublicDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire.  After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted.",
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "NO_ALLOC_CYCLES.MISPREDICTS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire.  After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted "
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "NO_ALLOC_CYCLES.RAT_STALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles when no uops are allocated and a RATstall is asserted."
+    },
+    {
+        "PublicDescription": "The NO_ALLOC_CYCLES.NOT_DELIVERED event is used to measure front-end inefficiencies, i.e. when front-end of the machine is not delivering micro-ops to the back-end and the back-end is not stalled. This event can be used to identify if the machine is truly front-end bound.  When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance.  Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into micro-ops (uops) in machine understandable format and putting them into a micro-op queue to be consumed by back end. The back-end then takes these micro-ops, allocates the required resources.  When all resources are ready, micro-ops are executed. If the back-end is not ready to accept micro-ops from the front-end, then we do not want to count these as front-end bottlenecks.  However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more UOPS. This event counts the cycles only when back-end is requesting more uops and front-end is not able to provide them. Some examples of conditions that cause front-end efficiencies are: Icache misses, ITLB misses, and decoder restrictions that limit the the front-end bandwidth.",
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x50",
+        "EventName": "NO_ALLOC_CYCLES.NOT_DELIVERED",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles when no uops are allocated, the IQ is empty, and no other condition is blocking allocation."
+    },
+    {
+        "PublicDescription": "The NO_ALLOC_CYCLES.ALL event counts the number of cycles when the front-end does not provide any instructions to be allocated for any reason. This event indicates the cycles where an allocation stalls occurs, and no UOPS are allocated in that cycle.",
+        "EventCode": "0xCA",
+        "Counter": "0,1",
+        "UMask": "0x3f",
+        "EventName": "NO_ALLOC_CYCLES.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles when no uops are allocated for any reason."
+    },
+    {
+        "PublicDescription": "Counts the number of cycles and allocation pipeline is stalled and is waiting for a free MEC reservation station entry.  The cycles should be appropriately counted in case of the cracked ops e.g. In case of a cracked load-op, the load portion is sent to M.",
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "RS_FULL_STALL.MEC",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles and allocation pipeline is stalled and is waiting for a free MEC reservation station entry.  The cycles should be appropriately counted in case of the cracked ops e.g. In case of a cracked load-op, the load portion is sent to M"
+    },
+    {
+        "EventCode": "0xCB",
+        "Counter": "0,1",
+        "UMask": "0x1f",
+        "EventName": "RS_FULL_STALL.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of cycles the Alloc pipeline is stalled when any one of the RSs (IEC, FPC and MEC) is full. This event is a superset of all the individual RS stall event counts."
+    },
+    {
+        "PublicDescription": "This event counts the number of instructions that retire execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers.",
+        "EventCode": "0xC0",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired"
+    },
+    {
+        "PublicDescription": "Cycles the divider is busy.This event counts the cycles when the divide unit is unable to accept a new divide UOP because it is busy processing a previously dispatched UOP. The cycles will be counted irrespective of whether or not another divide UOP is waiting to enter the divide unit (from the RS). This event might count cycles while a divide is in progress even if the RS is empty.  The divide instruction is one of the longest latency instructions in the machine.  Hence, it has a special event associated with it to help determine if divides are delaying the retirement of instructions.",
+        "EventCode": "0xCD",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CYCLES_DIV_BUSY.ALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles the divider is busy.  Does not imply a stall waiting for the divider."
+    },
+    {
+        "PublicDescription": "This event counts the number of instructions that retire.  For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires.  The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps.  Background: Modern microprocessors employ extensive pipelining and speculative techniques.  Since sometimes an instruction is started but never completed, the notion of \"retirement\" is introduced.  A retired instruction is one that commits its states. Or stated differently, an instruction might be abandoned at some point. No instruction is truly finished until it retires.  This counter measures the number of completed instructions.  The fixed event is INST_RETIRED.ANY and the programmable event is INST_RETIRED.ANY_P.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Fixed Counter: Counts the number of instructions retired"
+    },
+    {
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios.  The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. In systems with a constant core frequency, this event can give you a measurement of the elapsed time while the core was not in halt state by dividing the event count by the core frequency. This event is architecturally defined and is a designated fixed counter.  CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time.  CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles"
+    },
+    {
+        "PublicDescription": "Counts the number of reference cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios.  The core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  Divide this event count by core frequency to determine the elapsed time while the core was not in halt state.  Divide this event count by core frequency to determine the elapsed time while the core was not in halt state.  This event is architecturally defined and is a designated fixed counter.  CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time.  CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles"
+    },
+    {
+        "PublicDescription": "This event counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.",
+        "EventCode": "0x3C",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when core is not halted"
+    },
+    {
+        "PublicDescription": "This event counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.",
+        "EventCode": "0x3C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when core is not halted"
+    },
+    {
+        "PublicDescription": "The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end.  The BACLEARS.ANY event counts the number of baclears for any type of branch.",
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "BACLEARS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of baclears"
+    },
+    {
+        "PublicDescription": "The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end.  The BACLEARS.RETURN event counts the number of RETURN baclears.",
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "BACLEARS.RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of RETURN baclears"
+    },
+    {
+        "PublicDescription": "The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end.  The BACLEARS.COND event counts the number of JCC (Jump on Condtional Code) baclears.",
+        "EventCode": "0xE6",
+        "Counter": "0,1",
+        "UMask": "0x10",
+        "EventName": "BACLEARS.COND",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of JCC baclears"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "ALL_TAKEN_BRANCHES counts the number of all taken branch instructions retired.  Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+        "EventCode": "0xC4",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "PEBScounters": "0,1",
+        "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of taken branch instructions retired"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/silvermont/virtual-memory.json b/pmu-events/arch/x86/silvermont/virtual-memory.json
new file mode 100644
index 0000000..ad31479
--- /dev/null
+++ b/pmu-events/arch/x86/silvermont/virtual-memory.json
@@ -0,0 +1,69 @@
+[
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load ops retired that had DTLB miss.",
+        "EventCode": "0x04",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Loads missed DTLB"
+    },
+    {
+        "PublicDescription": "This event counts when a data (D) page walk is completed or started.  Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_WALKS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "D-side page-walks",
+        "EdgeDetect": "1"
+    },
+    {
+        "PublicDescription": "This event counts every cycle when a D-side (walks due to a load) page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Duration of D-side page-walks in core cycles"
+    },
+    {
+        "PublicDescription": "This event counts when an instruction (I) page walk is completed or started.  Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_WALKS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "I-side page-walks",
+        "EdgeDetect": "1"
+    },
+    {
+        "PublicDescription": "This event counts every cycle when a I-side (walks due to an instruction fetch) page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Duration of I-side page-walks in core cycles"
+    },
+    {
+        "PublicDescription": "This event counts when a data (D) page walk or an instruction (I) page walk is completed or started.  Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.WALKS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Total page walks that are completed (I-side and D-side)",
+        "EdgeDetect": "1"
+    },
+    {
+        "PublicDescription": "This event counts every cycle when a data (D) page walk or instruction (I) page walk is in progress.  Since a pagewalk implies a TLB miss, the approximate cost of a TLB miss can be determined from this event.",
+        "EventCode": "0x05",
+        "Counter": "0,1",
+        "UMask": "0x3",
+        "EventName": "PAGE_WALKS.CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Total cycles for all the page walks. (I-side and D-side)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/cache.json b/pmu-events/arch/x86/skylake/cache.json
new file mode 100644
index 0000000..54bfe9e
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/cache.json
@@ -0,0 +1,939 @@
+[
+    {
+        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x22",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand requests that miss L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x27",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x38",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "All requests that miss L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All requests that miss L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x44",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xd8",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe1",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe2",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the total number of L2 code requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe4",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand requests to L2 cache.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe7",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand requests to L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf8",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "All L2 requests.",
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "All L2 requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "Errata": "SKL057",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "Errata": "SKL057",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D miss outstandings duration in cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+        "EventCode": "0x48",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand and prefetch data reads",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Any memory transaction that reached the SQ.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions that miss the STLB.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x11",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired store instructions that miss the STLB.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x12",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x42",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x81",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load instructions. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "All retired store instructions.",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x82",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store instructions. (Precise Event)",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "L1_Hit_Indication": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n",
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1"
+    },
+    {
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANS.L2_WB",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "L2_LINES_IN.ALL",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "L2 cache lines filling L2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.",
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.USELESS_PREF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.USELESS_HWPF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of cache line split locks sent to uncore.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fc0400001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000400001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400400001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200400001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080400001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fc01c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10001c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04001c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02001c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01001c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00801c0001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fc0020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/floating-point.json b/pmu-events/arch/x86/skylake/floating-point.json
new file mode 100644
index 0000000..213dd62
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/floating-point.json
@@ -0,0 +1,67 @@
+[
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1e",
+        "EventName": "FP_ASSIST.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/frontend.json b/pmu-events/arch/x86/skylake/frontend.json
new file mode 100644
index 0000000..578dff5
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/frontend.json
@@ -0,0 +1,482 @@
+[
+    {
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "EventCode": "0x79",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x83",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ICACHE_64B.IFTAG_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x83",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x83",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ICACHE_64B.IFTAG_STALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "EventCode": "0xAB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n",
+        "EventCode": "0xC6",
+        "MSRValue": "0x11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+        "EventCode": "0xC6",
+        "MSRValue": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "EventCode": "0xC6",
+        "MSRValue": "0x15",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x400206",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x200206",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x400406",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n",
+        "EventCode": "0xC6",
+        "MSRValue": "0x400806",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n",
+        "EventCode": "0xC6",
+        "MSRValue": "0x401006",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n",
+        "EventCode": "0xC6",
+        "MSRValue": "0x402006",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x404006",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x408006",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x410006",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x420006",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n",
+        "EventCode": "0xC6",
+        "MSRValue": "0x100206",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC6",
+        "MSRValue": "0x300206",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "MSRIndex": "0x3F7",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/memory.json b/pmu-events/arch/x86/skylake/memory.json
new file mode 100644
index 0000000..3bd8b71
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/memory.json
@@ -0,0 +1,615 @@
+[
+    {
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "TX_MEM.ABORT_CAPACITY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "EventCode": "0x54",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "TX_EXEC.MISC2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "TX_EXEC.MISC3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "RTM region detected inside HLE.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "TX_EXEC.MISC4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "EventCode": "0x5d",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "TX_EXEC.MISC5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "SKL089",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "HLE_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution started.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution successfully committed",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "HLE_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "HLE_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RTM_RETIRED.START",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution started.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution successfully committed",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "EventCode": "0xC9",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x40",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2003",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1009",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x100",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "503",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "EventCode": "0xCD",
+        "MSRValue": "0x200",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "101",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "TakenAlone": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3ffc000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013c000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00bc000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3fc4000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000001 ",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/other.json b/pmu-events/arch/x86/skylake/other.json
new file mode 100644
index 0000000..84a316d
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/other.json
@@ -0,0 +1,48 @@
+[
+    {
+        "EventCode": "0x32",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "SampleAfterValue": "203",
+        "BriefDescription": "Number of hardware interrupts received by the processor.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/pipeline.json b/pmu-events/arch/x86/skylake/pipeline.json
new file mode 100644
index 0000000..bc6d2af
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/pipeline.json
@@ -0,0 +1,950 @@
+[
+    {
+        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 0",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Instructions retired from execution.",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "EventCode": "0x00",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "EventCode": "0x03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "EventCode": "0x07",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "EventCode": "0x0E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EdgeDetect": "1",
+        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2503",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2503",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2503",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2503",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2503",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+        "EventCode": "0x4C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "EventCode": "0x5E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+        "EventCode": "0x5E",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Total execution stalls.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "CounterMask": "8",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "CounterMask": "12",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "CounterMask": "16",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x14",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "CounterMask": "20",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of uops executed from any thread.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of uops executed on the core.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CounterMask": "2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of x87 uops executed.",
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "UOPS_EXECUTED.X87",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "Errata": "SKL091, SKL044",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+        "EventCode": "0xC0",
+        "Counter": "1",
+        "UMask": "0x1",
+        "Errata": "SKL091, SKL044",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "CounterHTOff": "1"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,2,3",
+        "UMask": "0x1",
+        "Errata": "SKL091, SKL044",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,2,3"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "EventName": "OTHER_ASSISTS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the retirement slots used.",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all (macro) branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "Errata": "SKL091",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Counts the number of far branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/skl-metrics.json b/pmu-events/arch/x86/skylake/skl-metrics.json
new file mode 100644
index 0000000..36c903f
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL  - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/skylake/uncore.json b/pmu-events/arch/x86/skylake/uncore.json
new file mode 100644
index 0000000..dbc1932
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/uncore.json
@@ -0,0 +1,254 @@
+[
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x41",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x81",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+    "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+    "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x44",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x22",
+    "UMask": "0x48",
+    "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+    "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+    "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x21",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in M-state",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x81",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in M-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x18",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in I-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x88",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in I-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x1f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x2f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x8f",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x86",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+    "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state",
+    "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x16",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+    "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state",
+    "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "CBO",
+    "EventCode": "0x34",
+    "UMask": "0x26",
+    "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+    "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state",
+    "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+    "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+    "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+    "Counter": "0",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+    "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+    "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x02",
+    "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT",
+    "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode",
+    "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x81",
+    "UMask": "0x20",
+    "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+    "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+    "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x84",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+    "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+    "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+    "Counter": "0,1",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "iMPH-U",
+    "EventCode": "0x80",
+    "UMask": "0x01",
+    "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+    "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;",
+    "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+    "Counter": "0",
+    "CounterMask": "1",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  },
+  {
+    "Unit": "NCU",
+    "EventCode": "0x0",
+    "UMask": "0x01",
+    "EventName": "UNC_CLOCK.SOCKET",
+    "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
+    "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+    "Counter": "FIXED",
+    "CounterMask": "0",
+    "Invert": "0",
+    "EdgeDetect": "0"
+  }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylake/virtual-memory.json b/pmu-events/arch/x86/skylake/virtual-memory.json
new file mode 100644
index 0000000..2bcba7d
--- /dev/null
+++ b/pmu-events/arch/x86/skylake/virtual-memory.json
@@ -0,0 +1,284 @@
+[
+    {
+        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+        "EventCode": "0x08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages.  The page walks can end with or without a page fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "EventCode": "0xBD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "STLB flush attempts",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/cache.json b/pmu-events/arch/x86/skylakex/cache.json
new file mode 100644
index 0000000..5c99408
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/cache.json
@@ -0,0 +1,1749 @@
+[
+    {
+        "EventCode": "0x24",
+        "UMask": "0x21",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x22",
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x24",
+        "BriefDescription": "L2 cache misses when fetching instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x27",
+        "BriefDescription": "Demand requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "PublicDescription": "Demand requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x38",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x3f",
+        "BriefDescription": "All requests that miss L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.MISS",
+        "PublicDescription": "All requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x41",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x42",
+        "BriefDescription": "RFO requests that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0x44",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xd8",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe1",
+        "BriefDescription": "Demand Data Read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe2",
+        "BriefDescription": "RFO requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe4",
+        "BriefDescription": "L2 code requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "Counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xe7",
+        "BriefDescription": "Demand requests to L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "PublicDescription": "Demand requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xf8",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x24",
+        "UMask": "0xff",
+        "BriefDescription": "All L2 requests",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "PublicDescription": "All L2 requests.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x41",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "UMask": "0x4f",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+        "Counter": "0,1,2,3",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "L1D miss outstandings duration in cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "AnyThread": "1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x48",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "UMask": "0x1",
+        "BriefDescription": "L1D data line replacements",
+        "Counter": "0,1,2,3",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "CounterMask": "6",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x1",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x2",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x4",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x8",
+        "BriefDescription": "Demand and prefetch data reads",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x80",
+        "BriefDescription": "Any memory transaction that reached the SQ.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB2",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x11",
+        "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "PublicDescription": "Retired load instructions that miss the STLB.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x12",
+        "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "PublicDescription": "Retired store instructions that miss the STLB.",
+        "SampleAfterValue": "100003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x21",
+        "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x41",
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x42",
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x81",
+        "BriefDescription": "All retired load instructions. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD0",
+        "UMask": "0x82",
+        "BriefDescription": "All retired store instructions. (Precise Event)",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "PublicDescription": "All retired store instructions.",
+        "SampleAfterValue": "2000003",
+        "L1_Hit_Indication": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x10",
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x20",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD1",
+        "UMask": "0x40",
+        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD2",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x1",
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x2",
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x4",
+        "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD3",
+        "UMask": "0x8",
+        "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xD4",
+        "UMask": "0x4",
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+        "Data_LA": "1",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xF0",
+        "UMask": "0x40",
+        "BriefDescription": "L2 writebacks that access L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF1",
+        "UMask": "0x1f",
+        "BriefDescription": "L2 cache lines filling L2",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x4",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.USELESS_PREF",
+        "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF2",
+        "UMask": "0x4",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "L2_LINES_OUT.USELESS_HWPF",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xF4",
+        "UMask": "0x10",
+        "BriefDescription": "Number of cache line split locks sent to uncore.",
+        "Counter": "0,1,2,3",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that have any response type.",
+        "MSRValue": "0x0000010001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that hit in the L3.",
+        "MSRValue": "0x3f803c0001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
+        "MSRValue": "0x0000010002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
+        "MSRValue": "0x3f803c0002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that have any response type.",
+        "MSRValue": "0x0000010004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that hit in the L3.",
+        "MSRValue": "0x3f803c0004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
+        "MSRValue": "0x0000010010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
+        "MSRValue": "0x3f803c0010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
+        "MSRValue": "0x0000010020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
+        "MSRValue": "0x3f803c0020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
+        "MSRValue": "0x0000010080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
+        "MSRValue": "0x3f803c0080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
+        "MSRValue": "0x0000010100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
+        "MSRValue": "0x3f803c0100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
+        "MSRValue": "0x0000010400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "PF_L1D_AND_SW & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
+        "MSRValue": "0x3f803c0400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that have any response type.",
+        "MSRValue": "0x0000018000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c8000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c8000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "OTHER & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c8000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c8000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that hit in the L3.",
+        "MSRValue": "0x3f803c8000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that have any response type.",
+        "MSRValue": "0x0000010490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
+        "MSRValue": "0x3f803c0490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that have any response type.",
+        "MSRValue": "0x0000010120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
+        "MSRValue": "0x3f803c0120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
+        "MSRValue": "0x0000010491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
+        "MSRValue": "0x3f803c0491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
+        "MSRValue": "0x0000010122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "MSRValue": "0x01003c0122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x04003c0122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+        "MSRValue": "0x08003c0122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+        "MSRValue": "0x10003c0122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
+        "MSRValue": "0x3f803c0122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/floating-point.json b/pmu-events/arch/x86/skylakex/floating-point.json
new file mode 100644
index 0000000..286ed1a
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/floating-point.json
@@ -0,0 +1,87 @@
+[
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x1",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x2",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x4",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x8",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x10",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x20",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x40",
+        "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+        "PublicDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC7",
+        "UMask": "0x80",
+        "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+        "PublicDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "UMask": "0x1e",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "Counter": "0,1,2,3",
+        "EventName": "FP_ASSIST.ANY",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/frontend.json b/pmu-events/arch/x86/skylakex/frontend.json
new file mode 100644
index 0000000..403a4f8
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/frontend.json
@@ -0,0 +1,482 @@
+[
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MITE_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x18",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x20",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x24",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_SWITCHES",
+        "CounterMask": "1",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x30",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x80",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x83",
+        "UMask": "0x1",
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE_64B.IFTAG_HIT",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x83",
+        "UMask": "0x2",
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x83",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+        "Counter": "0,1,2,3",
+        "EventName": "ICACHE_64B.IFTAG_STALL",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "CounterMask": "4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterMask": "3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterMask": "2",
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x9C",
+        "UMask": "0x1",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAB",
+        "UMask": "0x2",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "Counter": "0,1,2,3",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x11",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x12",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x13",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x14",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x15",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x400206",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x200206",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x400406",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+        "PEBS": "1",
+        "MSRValue": "0x400806",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x401006",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x402006",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x404006",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x408006",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x410006",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x420006",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x100206",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC6",
+        "UMask": "0x1",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "PEBS": "1",
+        "MSRValue": "0x300206",
+        "Counter": "0,1,2,3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "MSRIndex": "0x3F7",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/memory.json b/pmu-events/arch/x86/skylakex/memory.json
new file mode 100644
index 0000000..e7f1aa3
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/memory.json
@@ -0,0 +1,1473 @@
+[
+    {
+        "EventCode": "0x54",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_CAPACITY",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x54",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x4",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x8",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5d",
+        "UMask": "0x10",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+        "Counter": "0,1,2,3",
+        "EventName": "TX_EXEC.MISC5",
+        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x10",
+        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "CounterMask": "6",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x6",
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "CounterMask": "6",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB0",
+        "UMask": "0x10",
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x2",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "Errata": "SKL089",
+        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times an HLE execution started.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times an HLE execution successfully committed",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC8",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+        "Counter": "0,1,2,3",
+        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x1",
+        "BriefDescription": "Number of times an RTM execution started.",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x2",
+        "BriefDescription": "Number of times an RTM execution successfully committed",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x4",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x8",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x10",
+        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x20",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x40",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC9",
+        "UMask": "0x80",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+        "Counter": "0,1,2,3",
+        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x4",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x8",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "50021",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x10",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "20011",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x20",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x40",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "2003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x80",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "1009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x100",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "503",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xCD",
+        "UMask": "0x1",
+        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "PEBS": "2",
+        "MSRValue": "0x200",
+        "Counter": "0,1,2,3",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "TakenAlone": "1",
+        "SampleAfterValue": "101",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss in the L3.",
+        "MSRValue": "0x3fbc000001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000001 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
+        "MSRValue": "0x3fbc000002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000002 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss in the L3.",
+        "MSRValue": "0x3fbc000004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000004 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
+        "MSRValue": "0x3fbc000010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000010 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
+        "MSRValue": "0x3fbc000020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000020 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
+        "MSRValue": "0x3fbc000080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000080 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
+        "MSRValue": "0x3fbc000100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000100 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
+        "MSRValue": "0x3fbc000400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000400 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that miss in the L3.",
+        "MSRValue": "0x3fbc008000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc08000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc08000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc08000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b808000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604008000 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
+        "MSRValue": "0x3fbc000490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000490 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
+        "MSRValue": "0x3fbc000120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000120 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
+        "MSRValue": "0x3fbc000491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000491 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
+        "MSRValue": "0x3fbc000122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+        "MSRValue": "0x083fc00122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
+        "MSRValue": "0x103fc00122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
+        "MSRValue": "0x063fc00122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
+        "MSRValue": "0x063b800122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "Offcore": "1",
+        "EventCode": "0xB7, 0xBB",
+        "UMask": "0x1",
+        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
+        "MSRValue": "0x0604000122 ",
+        "Counter": "0,1,2,3",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/other.json b/pmu-events/arch/x86/skylakex/other.json
new file mode 100644
index 0000000..778a541
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/other.json
@@ -0,0 +1,164 @@
+[
+    {
+        "EventCode": "0x28",
+        "UMask": "0x7",
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0.  This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "UMask": "0x18",
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1.  This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "UMask": "0x20",
+        "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture).  This includes high current AVX 512-bit instructions.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "UMask": "0x40",
+        "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_POWER.THROTTLE",
+        "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
+        "SampleAfterValue": "200003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "UMask": "0x1",
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "Counter": "0,1,2,3",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "UMask": "0x2",
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "Counter": "0,1,2,3",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "UMask": "0x4",
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "Counter": "0,1,2,3",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x32",
+        "UMask": "0x8",
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
+        "Counter": "0,1,2,3",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCB",
+        "UMask": "0x1",
+        "BriefDescription": "Number of hardware interrupts received by the processor.",
+        "Counter": "0,1,2,3",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+        "SampleAfterValue": "203",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x1",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x2",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x4",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x8",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x10",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x20",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xEF",
+        "UMask": "0x40",
+        "Counter": "0,1,2,3",
+        "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xFE",
+        "UMask": "0x2",
+        "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
+        "Counter": "0,1,2,3",
+        "EventName": "IDI_MISC.WB_UPGRADE",
+        "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xFE",
+        "UMask": "0x4",
+        "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
+        "Counter": "0,1,2,3",
+        "EventName": "IDI_MISC.WB_DOWNGRADE",
+        "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/pipeline.json b/pmu-events/arch/x86/skylakex/pipeline.json
new file mode 100644
index 0000000..f99f7ae
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/pipeline.json
@@ -0,0 +1,950 @@
+[
+    {
+        "EventCode": "0x00",
+        "UMask": "0x1",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 0"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x2",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 1"
+    },
+    {
+        "EventCode": "0x00",
+        "UMask": "0x3",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x2",
+        "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x03",
+        "UMask": "0x8",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x07",
+        "UMask": "0x1",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "Counter": "0,1,2,3",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0D",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+        "Counter": "0,1,2,3",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0x0E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x2",
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x0E",
+        "UMask": "0x20",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_ISSUED.SLOW_LEA",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x14",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+        "Counter": "0,1,2,3",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0x3C",
+        "UMask": "0x0",
+        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2503",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2503",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2503",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
+        "SampleAfterValue": "2503",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x2",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x3C",
+        "UMask": "0x2",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+        "Counter": "0,1,2,3",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "2503",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4C",
+        "UMask": "0x1",
+        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+        "Counter": "0,1,2,3",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "Invert": "1",
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "CounterMask": "1",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x87",
+        "UMask": "0x1",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "Counter": "0,1,2,3",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x20",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA1",
+        "UMask": "0x80",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x1",
+        "BriefDescription": "Resource-related stall cycles",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "Counter": "0,1,2,3",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x4",
+        "BriefDescription": "Total execution stalls.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x5",
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "CounterMask": "5",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+        "CounterMask": "8",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0xc",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "CounterMask": "12",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "CounterMask": "16",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA3",
+        "UMask": "0x14",
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "CounterMask": "20",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA6",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA6",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+        "Counter": "0,1,2,3",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA6",
+        "UMask": "0x4",
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+        "Counter": "0,1,2,3",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA6",
+        "UMask": "0x8",
+        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+        "Counter": "0,1,2,3",
+        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA6",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "Counter": "0,1,2,3",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA6",
+        "UMask": "0x40",
+        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+        "Counter": "0,1,2,3",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "CounterMask": "2",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "CounterMask": "3",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "CounterMask": "4",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Number of uops executed on the core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "CounterMask": "2",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "CounterMask": "3",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterMask": "4",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xB1",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "CounterMask": "1",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xB1",
+        "UMask": "0x10",
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_EXECUTED.X87",
+        "PublicDescription": "Counts the number of x87 uops executed.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x0",
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "Counter": "0,1,2,3",
+        "EventName": "INST_RETIRED.ANY_P",
+        "Errata": "SKL091, SKL044",
+        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "UMask": "0x1",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "PEBS": "2",
+        "Counter": "1",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "Errata": "SKL091, SKL044",
+        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "1"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC0",
+        "UMask": "0x1",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
+        "PEBS": "2",
+        "Counter": "0,2,3",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "CounterMask": "10",
+        "Errata": "SKL091, SKL044",
+        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,2,3"
+    },
+    {
+        "EventCode": "0xC1",
+        "UMask": "0x3f",
+        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+        "Counter": "0,1,2,3",
+        "EventName": "OTHER_ASSISTS.ANY",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC2",
+        "UMask": "0x2",
+        "BriefDescription": "Retirement slots used.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "Counts the retirement slots used.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles without actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "Invert": "1",
+        "EventCode": "0xC2",
+        "UMask": "0x2",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "Counter": "0,1,2,3",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterMask": "10",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "UMask": "0x1",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "CounterMask": "1",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC3",
+        "UMask": "0x4",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "Counter": "0,1,2,3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x0",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "Errata": "SKL091",
+        "PublicDescription": "Counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x1",
+        "BriefDescription": "Conditional branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x2",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x4",
+        "BriefDescription": "All (macro) branch instructions retired.",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x8",
+        "BriefDescription": "Return instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x10",
+        "BriefDescription": "Not taken branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x20",
+        "BriefDescription": "Taken branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC4",
+        "UMask": "0x40",
+        "BriefDescription": "Counts the number of far branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "Errata": "SKL091",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x0",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x1",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x2",
+        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x4",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
+        "PEBS": "2",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xC5",
+        "UMask": "0x20",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ",
+        "PEBS": "1",
+        "Counter": "0,1,2,3",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+        "SampleAfterValue": "400009",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCC",
+        "UMask": "0x20",
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
+        "Counter": "0,1,2,3",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xE6",
+        "UMask": "0x1",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "Counter": "0,1,2,3",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/skylakex/skx-metrics.json b/pmu-events/arch/x86/skylakex/skx-metrics.json
new file mode 100644
index 0000000..36c903f
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -0,0 +1,164 @@
+[
+    {
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "MetricGroup": "Pipeline",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
+        "MetricGroup": "Frontend",
+        "MetricName": "IFetch_Line_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricName": "DSB_Coverage"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (threaded)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "MetricGroup": "Pipeline;Summary",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Summary",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots",
+        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary",
+        "MetricName": "Instructions"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "MetricGroup": "Pipeline;Ports_Utilization",
+        "MetricName": "ILP"
+    },
+    {
+        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL  - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
+        "MetricGroup": "Unknown_Branches",
+        "MetricName": "BAClear_Cost"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
+        "MetricGroup": "Memory_Bound;Memory_Lat",
+        "MetricName": "Load_Miss_Real_Latency"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricGroup": "Memory_Bound;Memory_BW",
+        "MetricName": "MLP"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
+        "MetricGroup": "TLB",
+        "MetricName": "Page_Walks_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "Summary",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+        "MetricGroup": "FLOPS;Summary",
+        "MetricName": "GFLOPs"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "MetricGroup": "SMT;Summary",
+        "MetricName": "SMT_2T_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Summary",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    }
+]
diff --git a/pmu-events/arch/x86/skylakex/uncore-memory.json b/pmu-events/arch/x86/skylakex/uncore-memory.json
new file mode 100644
index 0000000..9c7e5f8
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/uncore-memory.json
@@ -0,0 +1,172 @@
+[
+    {
+        "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0xC",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Memory controller clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_POWER_CHANNEL_PPD",
+        "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_channel_ppd %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Cycles Memory is in self refresh power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x43",
+        "EventName": "UNC_M_POWER_SELF_REFRESH",
+        "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+        "MetricName": "power_self_refresh %",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charges due to page misses",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for reads",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Pre-charge for writes",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_M_PRE_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "DRAM Page Activate commands sent due to a write request",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_M_ACT_COUNT.WR",
+        "PerPkg": "1",
+        "PublicDescription": "Counts DRAM Page Activate commands sent on this channel due to a write request to the iMC (Memory Controller).  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS (Column Access Select) command.",
+        "UMask": "0x2",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "All DRAM CAS Commands issued",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_M_CAS_COUNT.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts all CAS (Column Address Select) commands issued to DRAM per memory channel.  CAS commands are issued to specify the address to read or write on DRAM, so this event increments for every read and write. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.",
+        "UMask": "0xF",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_READ",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x3",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_M_CAS_COUNT.RD_REG",
+        "PerPkg": "1",
+        "PublicDescription": "Counts CAS (Column Access Select) regular read commands issued to DRAM on a per channel basis.  CAS commands are issued to specify the address to read or write on DRAM, and this event increments for every regular read.  This event only counts regular reads and does not includes underfill reads due to partial write requests.  This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write)  is enabled or not.",
+        "UMask": "0x1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "DRAM Underfill Read CAS Commands issued",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis.  CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads.  Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ",
+        "UMask": "0x2",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x4",
+        "EventName": "LLC_MISSES.MEM_WRITE",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0xC",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue Allocations",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ).  This queue is used to schedule reads out to the memory controller and to track the requests.  Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC.  The requests deallocate after the read CAS command has been issued to DRAM.  This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ.    ",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue Occupancy",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x80",
+        "EventName": "UNC_M_RPQ_OCCUPANCY",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of entries in the Read Pending Queue (RPQ) at each cycle.  This can then be used to calculate both the average occupancy of the queue (in conjunction with the number of cycles not empty) and the average latency in the queue (in conjunction with the number of allocations).  The RPQ is used to schedule reads out to the memory controller and to track the requests.  Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. They deallocate from the RPQ after the CAS command has been issued to memory.",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue Allocations",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x20",
+        "EventName": "UNC_M_WPQ_INSERTS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of writes requests allocated into the Write Pending Queue (WPQ).  The WPQ is used to schedule writes out to the memory controller and to track the requests.  Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (Memory Controller).  The write requests deallocate after being issued to DRAM.  Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC.",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue Occupancy",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x81",
+        "EventName": "UNC_M_WPQ_OCCUPANCY",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle.  This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations).  The WPQ is used to schedule writes out to the memory controller and to track the requests.",
+        "Unit": "iMC"
+    }
+]
diff --git a/pmu-events/arch/x86/skylakex/uncore-other.json b/pmu-events/arch/x86/skylakex/uncore-other.json
new file mode 100644
index 0000000..de6e70e
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -0,0 +1,1156 @@
+[
+    {
+        "BriefDescription": "Uncore cache clock ticks",
+        "Counter": "0,1,2,3",
+        "EventName": "UNC_CHA_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.UNCACHEABLE",
+        "Filter": "config1=0x40e33",
+        "PerPkg": "1",
+        "UMask": "0x21",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "MMIO reads. Derived from unc_cha_tor_inserts.ia_miss",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_READ",
+        "Filter": "config1=0x40040e33",
+        "PerPkg": "1",
+        "UMask": "0x21",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "MMIO writes. Derived from unc_cha_tor_inserts.ia_miss",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_MISSES.MMIO_WRITE",
+        "Filter": "config1=0x40041e33",
+        "PerPkg": "1",
+        "UMask": "0x21",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Streaming stores (full cache line). Derived from unc_cha_tor_inserts.ia_miss",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_FULL",
+        "Filter": "config1=0x41833",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x21",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Streaming stores (partial cache line). Derived from unc_cha_tor_inserts.ia_miss",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x35",
+        "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+        "Filter": "config1=0x41a33",
+        "PerPkg": "1",
+        "ScaleUnit": "64Bytes",
+        "UMask": "0x21",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "read requests from home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS",
+        "PerPkg": "1",
+        "UMask": "0x03",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "read requests from local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "read requests from remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x02",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "write requests from home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "UMask": "0x0C",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "write requests from local home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "write requests from remote home agent",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x08",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UPI_DATA_BANDWIDTH_TX",
+        "PerPkg": "1",
+        "ScaleUnit": "7.11E-06Bytes",
+        "UMask": "0x0F",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "LLC_MISSES.PCIE_READ",
+        "FCMask": "0x07",
+        "Filter": "ch_mask=0x1f",
+        "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+        "MetricName": "LLC_MISSES.PCIE_READ",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "LLC_MISSES.PCIE_WRITE",
+        "FCMask": "0x07",
+        "Filter": "ch_mask=0x1f",
+        "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+        "MetricName": "LLC_MISSES.PCIE_WRITE",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+        "MetricName": "LLC_MISSES.PCIE_WRITE",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+        "MetricName": "LLC_MISSES.PCIE_READ",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
+        "Counter": "0,1",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "ScaleUnit": "4Bytes",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Core Cross Snoops Issued; Multiple Core Requests",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x33",
+        "EventName": "UNC_CHA_CORE_SNP.CORE_GTONE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of transactions that trigger a configurable number of cross snoops.  Cores are snooped if the transaction looks up the cache and determines that it is necessary based on the operation type and what CoreValid bits are set.  For example, if 2 CV bits are set on a data read, the cores must have the data in S state so it is not necessary to snoop them.  However, if only 1 CV bit is set the core my have modified the data.  If the transaction was an RFO, it would need to invalidate the lines.  This event can be filtered based on who triggered the initial snoop(s).",
+        "UMask": "0x42",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Core Cross Snoops Issued; Multiple Eviction",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x33",
+        "EventName": "UNC_CHA_CORE_SNP.EVICT_GTONE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of transactions that trigger a configurable number of cross snoops.  Cores are snooped if the transaction looks up the cache and determines that it is necessary based on the operation type and what CoreValid bits are set.  For example, if 2 CV bits are set on a data read, the cores must have the data in S state so it is not necessary to snoop them.  However, if only 1 CV bit is set the core my have modified the data.  If the transaction was an RFO, it would need to invalidate the lines.  This event can be filtered based on who triggered the initial snoop(s).",
+        "UMask": "0x82",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory state lookups; Snoop Not Needed",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x53",
+        "EventName": "UNC_CHA_DIR_LOOKUP.NO_SNP",
+        "PerPkg": "1",
+        "PublicDescription": "Counts transactions that looked into the multi-socket cacheline Directory state, and therefore did not send a snoop because the Directory indicated it was not needed",
+        "UMask": "0x02",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory state lookups; Snoop Needed",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x53",
+        "EventName": "UNC_CHA_DIR_LOOKUP.SNP",
+        "PerPkg": "1",
+        "PublicDescription": "Counts  transactions that looked into the multi-socket cacheline Directory state, and sent one or more snoops, because the Directory indicated it was needed",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory state updates; Directory Updated memory write from the HA pipe",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x54",
+        "EventName": "UNC_CHA_DIR_UPDATE.HA",
+        "PerPkg": "1",
+        "PublicDescription": "Counts only multi-socket cacheline Directory state updates memory writes issued from the HA pipe. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory state updates; Directory Updated memory write from TOR pipe",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x54",
+        "EventName": "UNC_CHA_DIR_UPDATE.TOR",
+        "PerPkg": "1",
+        "PublicDescription": "Counts only multi-socket cacheline Directory state updates due to memory writes issued from the TOR pipe which are the result of remote transaction hitting the SF/LLC and returning data Core2Core. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.",
+        "UMask": "0x02",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5F",
+        "EventName": "UNC_CHA_HITME_HIT.EX_RDS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts read requests from a remote socket which hit in the HitME cache (used to cache the multi-socket Directory state) to a line in the E(Exclusive) state.  This includes the following read opcodes (RdCode, RdData, RdDataMigratory, RdCur, RdInv*, Inv*)",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Normal priority reads issued to the memory controller from the CHA",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_IMC_READS_COUNT.NORMAL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a normal (Non-Isochronous) read is issued to any of the memory controller channels from the CHA.",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued; Full Line Non-ISOCH",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5B",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a normal (Non-Isochronous) full line write is issued from the CHA to the any of the memory controller channels.",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Number of times that an RFO hit in S state.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x39",
+        "EventName": "UNC_CHA_MISC.RFO_HIT_S",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a RFO (the Read for Ownership issued before a  write) request hit a cacheline in the S (Shared) state.",
+        "UMask": "0x08",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Local requests for exclusive ownership of a cache line  without receiving data",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Local requests for exclusive ownership of a cache line without receiving data",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the total number of requests coming from a remote socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RspCnflct* Snoop Responses Received",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5C",
+        "EventName": "UNC_CHA_SNOOP_RESP.RSPCNFLCTS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a a transaction with the opcode type RspCnflct* Snoop Response was received. This is returned when a snoop finds an existing outstanding transaction in a remote caching agent. This triggers conflict resolution hardware. This covers both the opcode RspCnflct and RspCnflctWbI.",
+        "UMask": "0x40",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RspI Snoop Responses Received",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5C",
+        "EventName": "UNC_CHA_SNOOP_RESP.RSPI",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a transaction with the opcode type RspI Snoop Response was received which indicates the remote cache does not have the data, or when the remote cache silently evicts data (such as when an RFO: the Read for Ownership issued before a write hits non-modified data).",
+        "UMask": "0x01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RspIFwd Snoop Responses Received",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5C",
+        "EventName": "UNC_CHA_SNOOP_RESP.RSPIFWD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states.  This is commonly returned with RFO (the Read for Ownership issued before a write) transactions.  The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward)  states.",
+        "UMask": "0x04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RspSFwd Snoop Responses Received",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5C",
+        "EventName": "UNC_CHA_SNOOP_RESP.RSPSFWD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy.  This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.",
+        "UMask": "0x08",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Rsp*Fwd*WB Snoop Responses Received",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5C",
+        "EventName": "UNC_CHA_SNOOP_RESP.RSP_FWD_WB",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a transaction with the opcode type Rsp*Fwd*WB Snoop Response was received which indicates the data was written back to it's home socket, and the cacheline was forwarded to the requestor socket.  This snoop response is only used in >= 4 socket systems.  It is used when a snoop HITM's in a remote caching agent and it directly forwards data to a requestor, and simultaneously returns data to it's home socket to be written back to memory.",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Rsp*WB Snoop Responses Received",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x5C",
+        "EventName": "UNC_CHA_SNOOP_RESP.RSP_WBWB",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home.  This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured.  This reponse will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Clockticks of the IIO Traffic Controller",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_IIO_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part0",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part1",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part2",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part3",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to  any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of 4 bytes made to IIO Part0 by the CPU",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of 4 bytes made to IIO Part1 by the CPU",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by  a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ",
+        "Counter": "2,3",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by  a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part1",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part2",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part3",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to  any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part0 by the CPU",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part1 by the CPU",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to  any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part0 to a unit on the main die (generally memory). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is  made by IIO Part1 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part1 to a unit on the main die (generally memory). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part2 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part2 to a unit on the main die (generally memory). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part3 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part3 to a unit on the main die (generally memory). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to  any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+        "UMask": "0x04",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part0 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x01",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part0 to a unit on the main die (generally memory). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part1 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part1 to a unit on the main die (generally memory). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part2 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part2 to a unit on the main die (generally memory). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part3 to Memory",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part3 to a unit on the main die (generally memory). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to  any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+        "UMask": "0x01",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x22",
+        "EventName": "UNC_M2M_BYPASS_M2M_Egress.NOT_TAKEN",
+        "PerPkg": "1",
+        "PublicDescription": "Counts traffic in which the M2M (Mesh to Memory) to iMC (Memory Controller) bypass was not taken",
+        "UMask": "0x2",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Cycles when direct to core mode (which bypasses the CHA) was disabled",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x24",
+        "EventName": "UNC_M2M_DIRECT2CORE_NOT_TAKEN_DIRSTATE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts cycles when direct to core mode (which bypasses the CHA) was disabled",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Messages sent direct to core (bypassing the CHA)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x23",
+        "EventName": "UNC_M2M_DIRECT2CORE_TAKEN",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when messages were sent direct to core (bypassing the CHA)",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Number of reads in which direct to core transaction were overridden",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x25",
+        "EventName": "UNC_M2M_DIRECT2CORE_TXN_OVERRIDE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts reads in which direct to core transactions (which would have bypassed the CHA) were overridden",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Number of reads in which direct to Intel UPI transactions were overridden",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x28",
+        "EventName": "UNC_M2M_DIRECT2UPI_NOT_TAKEN_CREDITS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts reads in which direct to Intel Ultra Path Interconnect (UPI) transactions (which would have bypassed the CHA) were overridden",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Cycles when direct to Intel UPI was disabled",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x27",
+        "EventName": "UNC_M2M_DIRECT2UPI_NOT_TAKEN_DIRSTATE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts cycles when the ability to send messages direct to the Intel Ultra Path Interconnect (bypassing the CHA) was disabled",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Messages sent direct to the Intel UPI",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x26",
+        "EventName": "UNC_M2M_DIRECT2UPI_TAKEN",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when messages were sent direct to the Intel Ultra Path Interconnect (bypassing the CHA)",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Number of reads that a message sent direct2 Intel UPI was overridden",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x29",
+        "EventName": "UNC_M2M_DIRECT2UPI_TXN_OVERRIDE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when a read message that was sent direct to the Intel Ultra Path Interconnect (bypassing the CHA) was overridden",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory lookups (any state found)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2D",
+        "EventName": "UNC_M2M_DIRECTORY_LOOKUP.ANY",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state, and found the cacheline marked in Any State (A, I, S or unused)",
+        "UMask": "0x1",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2D",
+        "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state, and found the cacheline marked in the A (SnoopAll) state, indicating the cacheline is stored in another socket in any state, and we must snoop the other sockets to make sure we get the latest data.  The data may be stored in any state in the local socket.",
+        "UMask": "0x8",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2D",
+        "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state , and found the cacheline marked in the I (Invalid) state indicating the cacheline is not stored in another socket, and so there is no need to snoop the other sockets for the latest data.  The data may be stored in any state in the local socket.",
+        "UMask": "0x2",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2D",
+        "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state , and found the cacheline marked in the S (Shared) state indicating the cacheline is either stored in another socket in the S(hared) state , and so there is no need to snoop the other sockets for the latest data.  The data may be stored in any state in the local socket.",
+        "UMask": "0x4",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from A to I",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.A2I",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from A (SnoopAll) to I (Invalid)",
+        "UMask": "0x20",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from A to S",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.A2S",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from A (SnoopAll) to S (Shared)",
+        "UMask": "0x40",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory to a new state",
+        "UMask": "0x1",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from I to A",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.I2A",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from I (Invalid) to A (SnoopAll)",
+        "UMask": "0x4",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from I to S",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.I2S",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from I (Invalid) to S (Shared)",
+        "UMask": "0x2",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from S to A",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.S2A",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from S (Shared) to A (SnoopAll)",
+        "UMask": "0x10",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Multi-socket cacheline Directory update from S to I",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2E",
+        "EventName": "UNC_M2M_DIRECTORY_UPDATE.S2I",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from S (Shared) to I (Invalid)",
+        "UMask": "0x8",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Reads to iMC issued",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x37",
+        "EventName": "UNC_M2M_IMC_READS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ",
+        "UMask": "0x4",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Reads to iMC issued at Normal Priority (Non-Isochronous)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x37",
+        "EventName": "UNC_M2M_IMC_READS.NORMAL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller).  It only counts  normal priority non-isochronous reads.",
+        "UMask": "0x1",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Writes to iMC issued",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x38",
+        "EventName": "UNC_M2M_IMC_WRITES.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) issues writes to the iMC (Memory Controller).",
+        "UMask": "0x10",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Partial Non-Isochronous writes to the iMC",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x38",
+        "EventName": "UNC_M2M_IMC_WRITES.PARTIAL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) issues partial writes to the iMC (Memory Controller).  It only counts normal priority non-isochronous writes.",
+        "UMask": "0x2",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Prefecth requests that got turn into a demand request",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x56",
+        "EventName": "UNC_M2M_PREFCAM_DEMAND_PROMOTIONS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) promotes a outstanding request in the prefetch queue due to a subsequent demand read request that entered the M2M with the same address.  Explanatory Side Note: The Prefecth queue is made of CAM (Content Addressable Memory)",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Inserts into the Memory Controller Prefetch Queue",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x57",
+        "EventName": "UNC_M2M_PREFCAM_INSERTS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the M2M (Mesh to Memory) recieves a prefetch request and inserts it into its outstanding prefetch queue.  Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "AD Ingress (from CMS) Queue Inserts",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_M2M_RxC_AD_INSERTS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop).  This is generally used for reads, and ",
+        "Unit": "M2M"
+    },
+    {
+        "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x29",
+        "EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN",
+        "PerPkg": "1",
+        "PublicDescription": "Count cases where flow control queue that sits between the Intel Ultra Path Interconnect (UPI) and the mesh spawns a prefetch to the iMC (Memory Controller)",
+        "Unit": "M3UPI"
+    },
+    {
+        "BriefDescription": "Clocks of the Intel Ultra Path Interconnect (UPI)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x1",
+        "EventName": "UNC_UPI_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "Counts clockticks of the fixed frequency clock controlling the Intel Ultra Path Interconnect (UPI).  This clock runs at1/8th the 'GT/s' speed of the UPI link.  For example, a  9.6GT/s  link will have a fixed Frequency of 1.2 Ghz.",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Data Response packets that go direct to core",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x12",
+        "EventName": "UNC_UPI_DIRECT_ATTEMPTS.D2C",
+        "PerPkg": "1",
+        "PublicDescription": "Counts Data Response (DRS) packets that attempted to go direct to core bypassing the CHA.",
+        "UMask": "0x1",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Data Response packets that go direct to Intel UPI",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x12",
+        "EventName": "UNC_UPI_DIRECT_ATTEMPTS.D2U",
+        "PerPkg": "1",
+        "PublicDescription": "Counts Data Response (DRS) packets that attempted to go direct to Intel Ultra Path Interconnect (UPI) bypassing the CHA .",
+        "UMask": "0x2",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Cycles Intel UPI is in L1 power mode (shutdown)",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x21",
+        "EventName": "UNC_UPI_L1_POWER_CYCLES",
+        "PerPkg": "1",
+        "PublicDescription": "Counts cycles when the Intel Ultra Path Interconnect (UPI) is in L1 power mode.  L1 is a mode that totally shuts down the UPI link.  Link power states are per link and per direction, so for example the Tx direction could be in one state while Rx was in another, this event only coutns when both links are shutdown.",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Cycles the Rx of the Intel UPI is in L0p power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x25",
+        "EventName": "UNC_UPI_RxL0P_POWER_CYCLES",
+        "PerPkg": "1",
+        "PublicDescription": "Counts cycles when the the receive side (Rx) of the Intel Ultra Path Interconnect(UPI) is in L0p power mode. L0p is a mode where we disable 60% of the UPI lanes, decreasing our bandwidth in order to save power.",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x31",
+        "EventName": "UNC_UPI_RxL_BYPASSED.SLOT0",
+        "PerPkg": "1",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+        "UMask": "0x1",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x31",
+        "EventName": "UNC_UPI_RxL_BYPASSED.SLOT1",
+        "PerPkg": "1",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer  (Receive Queue) and passed directly across the BGF and into the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+        "UMask": "0x2",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "FLITs received which bypassed the Slot0 Recieve Buffer",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x31",
+        "EventName": "UNC_UPI_RxL_BYPASSED.SLOT2",
+        "PerPkg": "1",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) whcih bypassed the slot2 RxQ buffer (Receive Queue)  and passed directly to the Egress.  This is a latency optimization, and should generally be the common case.  If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+        "UMask": "0x4",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Valid data FLITs received from any slot",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x3",
+        "EventName": "UNC_UPI_RxL_FLITS.ALL_DATA",
+        "PerPkg": "1",
+        "PublicDescription": "Counts valid data FLITs  (80 bit FLow control unITs: 64bits of data) received from any of the 3 Intel Ultra Path Interconnect (UPI) Receive Queue slots on this UPI unit.",
+        "UMask": "0x0F",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Null FLITs received from any slot",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x3",
+        "EventName": "UNC_UPI_RxL_FLITS.ALL_NULL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) received from any of the 3 Intel Ultra Path Interconnect (UPI) Receive Queue slots on this UPI unit.",
+        "UMask": "0x27",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Protocol header and credit FLITs received from any slot",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x3",
+        "EventName": "UNC_UPI_RxL_FLITS.NON_DATA",
+        "PerPkg": "1",
+        "PublicDescription": "Counts protocol header and credit FLITs  (80 bit FLow control unITs) received from any of the 3 UPI slots on this UPI unit.",
+        "UMask": "0x97",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Cycles in which the Tx of the Intel Ultra Path Interconnect (UPI) is in L0p power mode",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x27",
+        "EventName": "UNC_UPI_TxL0P_POWER_CYCLES",
+        "PerPkg": "1",
+        "PublicDescription": "Counts cycles when the transmit side (Tx) of the Intel Ultra Path Interconnect(UPI) is in L0p power mode. L0p is a mode where we disable 60% of the UPI lanes, decreasing our bandwidth in order to save power.",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "FLITs that bypassed the TxL Buffer",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x41",
+        "EventName": "UNC_UPI_TxL_BYPASSED",
+        "PerPkg": "1",
+        "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the TxL(transmit) FLIT buffer and pass directly out the UPI Link. Generally, when data is transmitted across the Intel Ultra Path Interconnect (UPI), it will bypass the TxQ and pass directly to the link.  However, the TxQ will be used in L0p (Low Power) mode and (Link Layer Retry) LLR  mode, increasing latency to transfer out to the link.",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UPI_DATA_BANDWIDTH_TX",
+        "PerPkg": "1",
+        "ScaleUnit": "7.11E-06Bytes",
+        "UMask": "0x0F",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Null FLITs transmitted from any slot",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
+        "PerPkg": "1",
+        "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.",
+        "UMask": "0x27",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Idle FLITs transmitted",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_UPI_TxL_FLITS.IDLE",
+        "PerPkg": "1",
+        "PublicDescription": "Counts when the Intel Ultra Path Interconnect(UPI) transmits an idle FLIT(80 bit FLow control unITs).  Every UPI cycle must be sending either data FLITs, protocol/credit FLITs or idle FLITs.",
+        "UMask": "0x47",
+        "Unit": "UPI LL"
+    },
+    {
+        "BriefDescription": "Protocol header and credit FLITs transmitted across any slot",
+        "Counter": "0,1,2,3",
+        "EventCode": "0x2",
+        "EventName": "UNC_UPI_TxL_FLITS.NON_DATA",
+        "PerPkg": "1",
+        "PublicDescription": "Counts protocol header and credit FLITs (80 bit FLow control unITs) transmitted across any of the 3 UPI (Ultra Path Interconnect) slots on this UPI unit.",
+        "UMask": "0x97",
+        "Unit": "UPI LL"
+    }
+]
diff --git a/pmu-events/arch/x86/skylakex/virtual-memory.json b/pmu-events/arch/x86/skylakex/virtual-memory.json
new file mode 100644
index 0000000..7f466c9
--- /dev/null
+++ b/pmu-events/arch/x86/skylakex/virtual-memory.json
@@ -0,0 +1,284 @@
+[
+    {
+        "EventCode": "0x08",
+        "UMask": "0x1",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x2",
+        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x4",
+        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x8",
+        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0xe",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x10",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x08",
+        "UMask": "0x20",
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x1",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x2",
+        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x4",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x8",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages.  The page walks can end with or without a page fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0xe",
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x10",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x49",
+        "UMask": "0x20",
+        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+        "Counter": "0,1,2,3",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x4F",
+        "UMask": "0x10",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+        "Counter": "0,1,2,3",
+        "EventName": "EPT.WALK_PENDING",
+        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x1",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x2",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x4",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x8",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0xe",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x10",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x10",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
+        "UMask": "0x20",
+        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAE",
+        "UMask": "0x1",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+        "Counter": "0,1,2,3",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x1",
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBD",
+        "UMask": "0x20",
+        "BriefDescription": "STLB flush attempts",
+        "Counter": "0,1,2,3",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "SampleAfterValue": "100007",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/cache.json b/pmu-events/arch/x86/westmereep-dp/cache.json
new file mode 100644
index 0000000..6e61ae2
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/cache.json
@@ -0,0 +1,2817 @@
+[
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "CACHE_LOCK_CYCLES.L1D",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D locked"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D and L2 locked"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D.M_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines replaced in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D.M_REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines allocated in the M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D.M_SNOOP_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D snoop eviction of cache lines in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D.REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache lines allocated"
+    },
+    {
+        "EventCode": "0x52",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_PREFETCH.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch misses"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_PREFETCH.REQUESTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_PREFETCH.TRIGGERS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests triggered"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D_WB_L2.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in E state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_WB_L2.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D_WB_L2.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in M state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L1D_WB_L2.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L1 writebacks to L2"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_WB_L2.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_DATA_RQSTS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data prefetches"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the S state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines alloacated"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the E state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the S state"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_LINES_OUT.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.IFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.IFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.IFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.LD_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_RQSTS.LD_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.LOADS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xaa",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PREFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PREFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.PREFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 prefetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.RFOS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO requests"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANSACTIONS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANSACTIONS.FILL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 fill transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANSACTIONS.IFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANSACTIONS.L1D_WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D writeback to L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANSACTIONS.LOAD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 Load transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANSACTIONS.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANSACTIONS.RFO",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANSACTIONS.WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 writeback to LLC transactions"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_WRITE.LOCK.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in E state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe0",
+        "EventName": "L2_WRITE.LOCK.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_WRITE.LOCK.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_WRITE.LOCK.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_WRITE.LOCK.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_WRITE.LOCK.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in S state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "L2_WRITE.RFO.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_WRITE.RFO.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_WRITE.RFO.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_WRITE.RFO.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_WRITE.RFO.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in S state"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Longest latency cache miss"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Longest latency cache reference"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_INST_RETIRED.LOADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_INST_RETIRED.STORES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "OFFCORE_REQUESTS.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ANY.READ",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS.ANY.RFO",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.READ_CODE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.READ_DATA",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.RFO",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore L1 data cache writebacks"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore reads busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand code reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand code read busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand data reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand data read busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand RFOs"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand RFOs busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests blocked due to Super Queue full"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SQ_MISC.LRU_HINTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue LRU hints sent to LLC"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue lock splits across a cache line"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "STORE_BLOCKS.AT_RET",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads delayed with at-Retirement block code"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "STORE_BLOCKS.L1D_BLOCK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cacheable loads delayed with L1D block code"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x0",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x400",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100",
+        "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1000",
+        "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5",
+        "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x800",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50",
+        "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "500",
+        "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5000",
+        "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "3",
+        "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50000",
+        "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x1000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20",
+        "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "200",
+        "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x2000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10",
+        "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x111",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x211",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x411",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x711",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x811",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f44",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff44",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x144",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x444",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x744",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x844",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x50ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7fff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xffff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x80ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f22",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff22",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x222",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x422",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x722",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x822",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x108",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x208",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x408",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x708",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x808",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f77",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff77",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x177",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x277",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x477",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x777",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x877",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f33",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff33",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x133",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x233",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x433",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x733",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x833",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x203",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x403",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x703",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x803",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f01",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff01",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x101",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x201",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x401",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x701",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x801",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f04",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff04",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x104",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x204",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x404",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x704",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x804",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f02",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff02",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x102",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x202",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x402",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x702",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x802",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x180",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x280",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x480",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x780",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x880",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f50",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff50",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x150",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x250",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x450",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x750",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x850",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x110",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x210",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x410",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x710",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x810",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f40",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff40",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x140",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x440",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x740",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x840",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x220",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x420",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x720",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x820",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7f70",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_CACHE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xff70",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_LOCATION",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = IO_CSR_MMIO",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x170",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x270",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x470",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x770",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LOCAL_CACHE",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x870",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = REMOTE_CACHE_HITM",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/floating-point.json b/pmu-events/arch/x86/westmereep-dp/floating-point.json
new file mode 100644
index 0000000..7d2f71a
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/floating-point.json
@@ -0,0 +1,229 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ASSIST.ALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.INPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.OUTPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_COMP_OPS_EXE.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MMX Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP double precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE and SSE2 FP Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP packed Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP scalar Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP single precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE2 integer Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Computational floating-point operations executed"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "FP_MMX_TRANS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All Floating Point to and from MMX transitions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_MMX_TRANS.TO_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from MMX to Floating Point instructions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_MMX_TRANS.TO_MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from Floating Point to MMX instructions"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_128.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer pack operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_128.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer arithmetic operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer logical operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_128.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer multiply operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_128.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shift operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_128.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer unpack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_64.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit pack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_64.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit logical operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_64.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_64.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shift operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_64.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit unpack operations"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/frontend.json b/pmu-events/arch/x86/westmereep-dp/frontend.json
new file mode 100644
index 0000000..e5e21e0
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/frontend.json
@@ -0,0 +1,26 @@
+[
+    {
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions decoded"
+    },
+    {
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused instructions decoded"
+    },
+    {
+        "EventCode": "0x19",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TWO_UOP_INSTS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Two Uop instructions decoded"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/memory.json b/pmu-events/arch/x86/westmereep-dp/memory.json
new file mode 100644
index 0000000..6e0829b
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/memory.json
@@ -0,0 +1,758 @@
+[
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Misaligned store references"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf811",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf844",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x30ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf8ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x40ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20ff",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf822",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = ANY RFO and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf808",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = CORE_WB and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf877",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf833",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DATA_IN and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf803",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf801",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf804",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf802",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf880",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = OTHER and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf850",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf810",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf840",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf820",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM_AND_REMOTE_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xf870",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_LLC_MISS",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.OTHER_LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "REQUEST = PREFETCH and RESPONSE = REMOTE_DRAM",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/other.json b/pmu-events/arch/x86/westmereep-dp/other.json
new file mode 100644
index 0000000..85133d6
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/other.json
@@ -0,0 +1,287 @@
+[
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_CLEARS.EARLY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Early Branch Prediciton Unit clears"
+    },
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BPU_CLEARS.LATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Late Branch Prediction Unit clears"
+    },
+    {
+        "EventCode": "0xE5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_MISSED_CALL_RET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch prediction unit missed call or return"
+    },
+    {
+        "EventCode": "0xD5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ES_REG_RENAMES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ES segment renames"
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IO_TRANSACTIONS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "I/O transactions"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1I.CYCLES_STALLED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch stall cycles"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1I.HITS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch hits"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1I.MISSES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch misses"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L1I.READS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I Instruction fetches"
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LARGE_ITLB.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Large ITLB hit"
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_BLOCK.OVERLAP_STORE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads that partially overlap an earlier store"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "LOAD_DISPATCH.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All loads dispatched"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "LOAD_DISPATCH.MOB",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_DISPATCH.RS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched that bypass the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_DISPATCH.RS_DELAYED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from stage 305"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PARTIAL_ADDRESS_ALIAS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "False dependencies due to partial address aliasing"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RAT_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All RAT stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RAT_STALLS.FLAGS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Flag stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RAT_STALLS.REGISTERS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Partial register stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RAT_STALLS.ROB_READ_PORT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB read port stalls cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RAT_STALLS.SCOREBOARD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Scoreboard stall cycles"
+    },
+    {
+        "EventCode": "0x4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "SB_DRAIN.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All Store buffer stall cycles"
+    },
+    {
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SEG_RENAME_STALLS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Segment rename stall cycles"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOP_RESPONSE.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HIT to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOP_RESPONSE.HITE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITE to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOP_RESPONSE.HITM",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITM to snoop"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS.CODE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop code requests"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS.DATA",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop data requests"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS.INVALIDATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop invalidate requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop code requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop code requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop data requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop data requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop invalidate requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop invalidate requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xF6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SQ_FULL_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue full stall cycles"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/pipeline.json b/pmu-events/arch/x86/westmereep-dp/pipeline.json
new file mode 100644
index 0000000..f130510
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/pipeline.json
@@ -0,0 +1,899 @@
+[
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.CYCLES_DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles the divider is busy"
+    },
+    {
+        "EventCode": "0x14",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.DIV",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide Operations executed",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ARITH.MUL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations executed"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BACLEAR.BAD_TARGET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted with bad target address"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR.CLEAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted, regardless of cause "
+    },
+    {
+        "EventCode": "0xA7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR_FORCE_IQ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction queue forced BACLEAR"
+    },
+    {
+        "EventCode": "0xE0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch instructions decoded"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_INST_EXEC.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_EXEC.COND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Conditional branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_EXEC.DIRECT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Unconditional branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Unconditional call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_INST_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_INST_EXEC.NON_CALLS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect return branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_EXEC.TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Retired near call instructions (Precise Event)"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_MISP_EXEC.ANY",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_EXEC.COND",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_EXEC.DIRECT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted unconditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_MISP_EXEC.NON_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted return branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_MISP_EXEC.TAKEN",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional retired branches (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_P",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total CPU cycles",
+        "CounterMask": "2"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "ILD_STALL.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Any Instruction Length Decoder stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction Queue full stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Length Change Prefix stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ILD_STALL.MRU",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stall cycles due to BPU MRU bypass"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ILD_STALL.REGEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Regen stall cycles"
+    },
+    {
+        "EventCode": "0x18",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_DECODED.DEC0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions that must be decoded by decoder 0"
+    },
+    {
+        "EventCode": "0x1E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles instructions are written to the instruction queue"
+    },
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions written to instruction queue."
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (fixed counter)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "INST_RETIRED.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired MMX instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired floating-point operations (Precise Event)"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load operations conflicting with software prefetches"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xA8",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.INACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD_OVERFLOW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loops that can't stream from the instruction queue"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Cycles machine clear asserted"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEM_ORDER",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RESOURCE_STALLS.FPCW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FPU control word write stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LOAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Load buffer stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS.MXCSR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MXCSR rename stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RESOURCE_STALLS.OTHER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Other Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reservation Station full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.STORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Store buffer stall cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+    },
+    {
+        "EventCode": "0xDB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOP_UNFUSION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uop unfusions due to FP exceptions"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DECODED.ESP_FOLDING",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer instructions decoded"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DECODED.ESP_SYNC",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer sync operations"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops decoded by Microcode Sequencer",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xD1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DECODED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops are decoded",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on any port (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.PORT0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 0"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 0, 1 or 5"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.PORT1",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT2_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 2 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT234_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 2, 3 or 4"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT3_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 3 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT4_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 4 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED.PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 5"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued on any thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops were issued on either thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_ISSUED.FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Fused Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are being retired",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retirement slots used (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-dp/virtual-memory.json b/pmu-events/arch/x86/westmereep-dp/virtual-memory.json
new file mode 100644
index 0000000..57b5356
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-dp/virtual-memory.json
@@ -0,0 +1,173 @@
+[
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load misses"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss large page walks"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss caused by low part of address"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB second level hit"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walks complete"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walk cycles"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss large page walks"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses casued by low part of address"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB first level misses but second level hit"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss page walks"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB miss page walk cycles"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Extended Page Table walk cycles"
+    },
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_FLUSH",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB flushes"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISS_RETIRED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "ITLB_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss large page walks"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss page walks"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB miss page walk cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/cache.json b/pmu-events/arch/x86/westmereep-sp/cache.json
new file mode 100644
index 0000000..dad20f0
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/cache.json
@@ -0,0 +1,3233 @@
+[
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "CACHE_LOCK_CYCLES.L1D",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D locked"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D and L2 locked"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D.M_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines replaced in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D.M_REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines allocated in the M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D.M_SNOOP_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D snoop eviction of cache lines in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D.REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache lines allocated"
+    },
+    {
+        "EventCode": "0x52",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_PREFETCH.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch misses"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_PREFETCH.REQUESTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_PREFETCH.TRIGGERS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests triggered"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D_WB_L2.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in E state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_WB_L2.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D_WB_L2.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in M state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L1D_WB_L2.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L1 writebacks to L2"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_WB_L2.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_DATA_RQSTS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data prefetches"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the S state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines alloacated"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the E state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the S state"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_LINES_OUT.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.IFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.IFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.IFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.LD_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_RQSTS.LD_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.LOADS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xaa",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PREFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PREFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.PREFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 prefetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.RFOS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO requests"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANSACTIONS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANSACTIONS.FILL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 fill transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANSACTIONS.IFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANSACTIONS.L1D_WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D writeback to L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANSACTIONS.LOAD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 Load transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANSACTIONS.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANSACTIONS.RFO",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANSACTIONS.WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 writeback to LLC transactions"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_WRITE.LOCK.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in E state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe0",
+        "EventName": "L2_WRITE.LOCK.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_WRITE.LOCK.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_WRITE.LOCK.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_WRITE.LOCK.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_WRITE.LOCK.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in S state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "L2_WRITE.RFO.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_WRITE.RFO.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_WRITE.RFO.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_WRITE.RFO.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_WRITE.RFO.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in S state"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Longest latency cache miss"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Longest latency cache reference"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_INST_RETIRED.LOADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_INST_RETIRED.STORES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_UNCORE_RETIRED.LOCAL_DRAM",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Load instructions retired that HIT modified data in sibling core (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Load instructions retired remote cache HIT data source (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_UNCORE_RETIRED.REMOTE_DRAM",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_UNCORE_RETIRED.UNCACHEABLE",
+        "SampleAfterValue": "4000",
+        "BriefDescription": "Load instructions retired IO (Precise Event)"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "OFFCORE_REQUESTS.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ANY.READ",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS.ANY.RFO",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.READ_CODE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.READ_DATA",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.RFO",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore L1 data cache writebacks"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "OFFCORE_REQUESTS.UNCACHED_MEM",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore uncached memory accesses"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore reads busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand code reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand code read busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand data reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand data read busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand RFOs"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand RFOs busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests blocked due to Super Queue full"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SQ_MISC.LRU_HINTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue LRU hints sent to LLC"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue lock splits across a cache line"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "STORE_BLOCKS.AT_RET",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads delayed with at-Retirement block code"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "STORE_BLOCKS.L1D_BLOCK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cacheable loads delayed with L1D block code"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x0",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x400",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100",
+        "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1000",
+        "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5",
+        "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x800",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50",
+        "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "500",
+        "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5000",
+        "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "3",
+        "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50000",
+        "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x1000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20",
+        "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "200",
+        "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x2000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10",
+        "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF11",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x111",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x211",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x411",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x711",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2711",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1811",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5811",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x811",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F44",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF44",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x144",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x244",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x444",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x744",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2744",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1844",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5844",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x844",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7FFF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFFFF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x80FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x27FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x18FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x58FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F22",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF22",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x122",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x222",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x422",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x722",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2722",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1822",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5822",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x822",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF08",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore writebacks",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x108",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x408",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x708",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2708",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1808",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5808",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x808",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F77",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF77",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code or data read requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x177",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x277",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x477",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x777",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2777",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1877",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5877",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x877",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F33",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any cache_dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF33",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any location",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x133",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x233",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x433",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x733",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2733",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1833",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5833",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x833",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF03",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x203",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x403",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x703",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2703",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1803",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5803",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x803",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F01",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF01",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x101",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x201",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x401",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x701",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2701",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1801",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5801",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x801",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F04",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF04",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x104",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x204",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x404",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x704",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2704",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1804",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5804",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x804",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F02",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF02",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x102",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x202",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x402",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x702",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2702",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1802",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5802",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x802",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore other requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x180",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x280",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x480",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x780",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2780",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1880",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5880",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x880",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F50",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF50",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x150",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x250",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x450",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x750",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2750",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1850",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5850",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x850",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x110",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x210",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x410",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x710",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2710",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1810",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5810",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x810",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F40",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF40",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x140",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x240",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x440",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x740",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2740",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1840",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5840",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x840",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x120",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x220",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x420",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x720",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2720",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1820",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5820",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x820",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x7F70",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xFF70",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x8070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x170",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x270",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x470",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x770",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2770",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1870",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x5870",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x870",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/floating-point.json b/pmu-events/arch/x86/westmereep-sp/floating-point.json
new file mode 100644
index 0000000..7d2f71a
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/floating-point.json
@@ -0,0 +1,229 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ASSIST.ALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.INPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.OUTPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_COMP_OPS_EXE.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MMX Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP double precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE and SSE2 FP Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP packed Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP scalar Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP single precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE2 integer Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Computational floating-point operations executed"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "FP_MMX_TRANS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All Floating Point to and from MMX transitions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_MMX_TRANS.TO_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from MMX to Floating Point instructions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_MMX_TRANS.TO_MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from Floating Point to MMX instructions"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_128.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer pack operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_128.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer arithmetic operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer logical operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_128.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer multiply operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_128.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shift operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_128.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer unpack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_64.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit pack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_64.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit logical operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_64.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_64.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shift operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_64.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit unpack operations"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/frontend.json b/pmu-events/arch/x86/westmereep-sp/frontend.json
new file mode 100644
index 0000000..e5e21e0
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/frontend.json
@@ -0,0 +1,26 @@
+[
+    {
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions decoded"
+    },
+    {
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused instructions decoded"
+    },
+    {
+        "EventCode": "0x19",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TWO_UOP_INSTS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Two Uop instructions decoded"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/memory.json b/pmu-events/arch/x86/westmereep-sp/memory.json
new file mode 100644
index 0000000..90eb6aa
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/memory.json
@@ -0,0 +1,739 @@
+[
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF811",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4011",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF844",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4044",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x60FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF8FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x40FF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF822",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4022",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF808",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4008",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF877",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4077",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF833",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any LLC miss",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4033",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF803",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4003",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF801",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF804",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF802",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF880",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4080",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF850",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4050",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF810",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4010",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF840",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4040",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF820",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4020",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x6070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0xF870",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x4070",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/other.json b/pmu-events/arch/x86/westmereep-sp/other.json
new file mode 100644
index 0000000..85133d6
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/other.json
@@ -0,0 +1,287 @@
+[
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_CLEARS.EARLY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Early Branch Prediciton Unit clears"
+    },
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BPU_CLEARS.LATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Late Branch Prediction Unit clears"
+    },
+    {
+        "EventCode": "0xE5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_MISSED_CALL_RET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch prediction unit missed call or return"
+    },
+    {
+        "EventCode": "0xD5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ES_REG_RENAMES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ES segment renames"
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IO_TRANSACTIONS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "I/O transactions"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1I.CYCLES_STALLED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch stall cycles"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1I.HITS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch hits"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1I.MISSES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch misses"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L1I.READS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I Instruction fetches"
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LARGE_ITLB.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Large ITLB hit"
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_BLOCK.OVERLAP_STORE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads that partially overlap an earlier store"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "LOAD_DISPATCH.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All loads dispatched"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "LOAD_DISPATCH.MOB",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_DISPATCH.RS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched that bypass the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_DISPATCH.RS_DELAYED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from stage 305"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PARTIAL_ADDRESS_ALIAS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "False dependencies due to partial address aliasing"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RAT_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All RAT stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RAT_STALLS.FLAGS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Flag stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RAT_STALLS.REGISTERS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Partial register stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RAT_STALLS.ROB_READ_PORT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB read port stalls cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RAT_STALLS.SCOREBOARD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Scoreboard stall cycles"
+    },
+    {
+        "EventCode": "0x4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "SB_DRAIN.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All Store buffer stall cycles"
+    },
+    {
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SEG_RENAME_STALLS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Segment rename stall cycles"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOP_RESPONSE.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HIT to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOP_RESPONSE.HITE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITE to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOP_RESPONSE.HITM",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITM to snoop"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS.CODE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop code requests"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS.DATA",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop data requests"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS.INVALIDATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop invalidate requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop code requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop code requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop data requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop data requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop invalidate requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop invalidate requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xF6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SQ_FULL_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue full stall cycles"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/pipeline.json b/pmu-events/arch/x86/westmereep-sp/pipeline.json
new file mode 100644
index 0000000..f130510
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/pipeline.json
@@ -0,0 +1,899 @@
+[
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.CYCLES_DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles the divider is busy"
+    },
+    {
+        "EventCode": "0x14",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.DIV",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide Operations executed",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ARITH.MUL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations executed"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BACLEAR.BAD_TARGET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted with bad target address"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR.CLEAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted, regardless of cause "
+    },
+    {
+        "EventCode": "0xA7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR_FORCE_IQ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction queue forced BACLEAR"
+    },
+    {
+        "EventCode": "0xE0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch instructions decoded"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_INST_EXEC.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_EXEC.COND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Conditional branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_EXEC.DIRECT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Unconditional branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Unconditional call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_INST_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_INST_EXEC.NON_CALLS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect return branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_EXEC.TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Retired near call instructions (Precise Event)"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_MISP_EXEC.ANY",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_EXEC.COND",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_EXEC.DIRECT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted unconditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_MISP_EXEC.NON_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted return branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_MISP_EXEC.TAKEN",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional retired branches (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_P",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total CPU cycles",
+        "CounterMask": "2"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "ILD_STALL.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Any Instruction Length Decoder stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction Queue full stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Length Change Prefix stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ILD_STALL.MRU",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stall cycles due to BPU MRU bypass"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ILD_STALL.REGEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Regen stall cycles"
+    },
+    {
+        "EventCode": "0x18",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_DECODED.DEC0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions that must be decoded by decoder 0"
+    },
+    {
+        "EventCode": "0x1E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles instructions are written to the instruction queue"
+    },
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions written to instruction queue."
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (fixed counter)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "INST_RETIRED.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired MMX instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired floating-point operations (Precise Event)"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load operations conflicting with software prefetches"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xA8",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.INACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD_OVERFLOW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loops that can't stream from the instruction queue"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Cycles machine clear asserted"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEM_ORDER",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RESOURCE_STALLS.FPCW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FPU control word write stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LOAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Load buffer stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS.MXCSR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MXCSR rename stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RESOURCE_STALLS.OTHER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Other Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reservation Station full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.STORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Store buffer stall cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+    },
+    {
+        "EventCode": "0xDB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOP_UNFUSION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uop unfusions due to FP exceptions"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DECODED.ESP_FOLDING",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer instructions decoded"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DECODED.ESP_SYNC",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer sync operations"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops decoded by Microcode Sequencer",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xD1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DECODED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops are decoded",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on any port (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.PORT0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 0"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 0, 1 or 5"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.PORT1",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT2_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 2 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT234_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 2, 3 or 4"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT3_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 3 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT4_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 4 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED.PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 5"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued on any thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops were issued on either thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_ISSUED.FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Fused Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are being retired",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retirement slots used (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereep-sp/virtual-memory.json b/pmu-events/arch/x86/westmereep-sp/virtual-memory.json
new file mode 100644
index 0000000..2153b3f
--- /dev/null
+++ b/pmu-events/arch/x86/westmereep-sp/virtual-memory.json
@@ -0,0 +1,149 @@
+[
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load misses"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss caused by low part of address"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB second level hit"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walks complete"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walk cycles"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss large page walks"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB first level misses but second level hit"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss page walks"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB miss page walk cycles"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Extended Page Table walk cycles"
+    },
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_FLUSH",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB flushes"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISS_RETIRED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss page walks"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB miss page walk cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/cache.json b/pmu-events/arch/x86/westmereex/cache.json
new file mode 100644
index 0000000..f9bc7fd
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/cache.json
@@ -0,0 +1,3225 @@
+[
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "CACHE_LOCK_CYCLES.L1D",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D locked"
+    },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles L1D and L2 locked"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D.M_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines replaced in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D.M_REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D cache lines allocated in the M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x8",
+        "EventName": "L1D.M_SNOOP_EVICT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D snoop eviction of cache lines in M state"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D.REPL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1 data cache lines allocated"
+    },
+    {
+        "EventCode": "0x52",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "L1D_PREFETCH.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch misses"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "L1D_PREFETCH.REQUESTS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests"
+    },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1",
+        "UMask": "0x4",
+        "EventName": "L1D_PREFETCH.TRIGGERS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D hardware prefetch requests triggered"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D_WB_L2.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in E state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D_WB_L2.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D_WB_L2.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in M state"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L1D_WB_L2.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L1 writebacks to L2"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D_WB_L2.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L1 writebacks to L2 in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_DATA_RQSTS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand requests"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data demand loads in S state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in E state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the I state (misses)"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in M state"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 data prefetches"
+    },
+    {
+        "EventCode": "0x26",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 data prefetches in the S state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "L2_LINES_IN.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines alloacated"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_IN.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the E state"
+    },
+    {
+        "EventCode": "0xF1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_IN.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines allocated in the S state"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_LINES_OUT.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a demand request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0xF2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 modified lines evicted by a prefetch request"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.IFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.IFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.IFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.LD_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_RQSTS.LD_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 load misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.LOADS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xaa",
+        "EventName": "L2_RQSTS.MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PREFETCH_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PREFETCH_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.PREFETCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 prefetches"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xff",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 requests"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO hits"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO misses"
+    },
+    {
+        "EventCode": "0x24",
+        "Counter": "0,1,2,3",
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.RFOS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO requests"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_TRANSACTIONS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_TRANSACTIONS.FILL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 fill transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L2_TRANSACTIONS.IFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 instruction fetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_TRANSACTIONS.L1D_WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L1D writeback to L2 transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_TRANSACTIONS.LOAD",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 Load transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_TRANSACTIONS.PREFETCH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 prefetch transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_TRANSACTIONS.RFO",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 RFO transactions"
+    },
+    {
+        "EventCode": "0xF0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_TRANSACTIONS.WB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "L2 writeback to LLC transactions"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "L2_WRITE.LOCK.E_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in E state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe0",
+        "EventName": "L2_WRITE.LOCK.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "L2_WRITE.LOCK.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "L2_WRITE.LOCK.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf0",
+        "EventName": "L2_WRITE.LOCK.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All demand L2 lock RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "L2_WRITE.LOCK.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand lock RFOs in S state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "L2_WRITE.RFO.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs that hit the cache"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L2_WRITE.RFO.I_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in I state (misses)"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L2_WRITE.RFO.M_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in M state"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_WRITE.RFO.MESI",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All L2 demand store RFOs"
+    },
+    {
+        "EventCode": "0x27",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L2_WRITE.RFO.S_STATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "L2 demand store RFOs in S state"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Longest latency cache miss"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Longest latency cache reference"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_INST_RETIRED.LOADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_INST_RETIRED.STORES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MEM_UNCORE_RETIRED.LOCAL_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Load instructions retired that HIT modified data in sibling core (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Load instructions retired local dram and remote cache HIT data sources (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "MEM_UNCORE_RETIRED.REMOTE_DRAM",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_UNCORE_RETIRED.UNCACHEABLE",
+        "SampleAfterValue": "4000",
+        "BriefDescription": "Load instructions retired IO (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MEM_UNCORE_RETIRED.REMOTE_HITM",
+        "SampleAfterValue": "40000",
+        "BriefDescription": "Retired loads that hit remote socket in modified state (Precise Event)"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "OFFCORE_REQUESTS.ANY",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS.ANY.READ",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OFFCORE_REQUESTS.ANY.RFO",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.READ_CODE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.READ_DATA",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data read requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS.DEMAND.RFO",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests"
+    },
+    {
+        "EventCode": "0xB0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore L1 data cache writebacks"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x8",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore reads busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand code reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x2",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand code read busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand data reads"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand data read busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding offcore demand RFOs"
+    },
+    {
+        "EventCode": "0x60",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles offcore demand RFOs busy",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests blocked due to Super Queue full"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SQ_MISC.LRU_HINTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue LRU hints sent to LLC"
+    },
+    {
+        "EventCode": "0xF4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue lock splits across a cache line"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "STORE_BLOCKS.AT_RET",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads delayed with at-Retirement block code"
+    },
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "STORE_BLOCKS.L1D_BLOCK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Cacheable loads delayed with L1D block code"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x0",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x400",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "100",
+        "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x80",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "1000",
+        "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x10",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10000",
+        "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5",
+        "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x800",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50",
+        "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x100",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "500",
+        "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x20",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "5000",
+        "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "3",
+        "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x4",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "50000",
+        "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x1000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20",
+        "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x200",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "200",
+        "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x40",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x8",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xB",
+        "MSRValue": "0x2000",
+        "Counter": "3",
+        "UMask": "0x10",
+        "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+        "MSRIndex": "0x3F6",
+        "SampleAfterValue": "10",
+        "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F11",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF11",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x111",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x211",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x411",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x711",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4711",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F44",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF44",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x144",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x244",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x444",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x744",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4744",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7FFF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFFFF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x80FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x47FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x18FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x38FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x10FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F22",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF22",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x122",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x222",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x422",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x722",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4722",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F08",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF08",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore writebacks",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x108",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x408",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x708",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4708",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F77",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF77",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore code or data read requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x177",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x277",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x477",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x777",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4777",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F33",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any cache_dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF33",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any location",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x133",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x233",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x433",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x733",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4733",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = local cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F03",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF03",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x103",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x203",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x403",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x703",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4703",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F01",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF01",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x101",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x201",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x401",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x701",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4701",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F04",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF04",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x104",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x204",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x404",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x704",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4704",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F02",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF02",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore demand RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x102",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x202",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x402",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x702",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4702",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F80",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF80",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore other requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x180",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x280",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x480",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x780",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4780",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F30",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF30",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x130",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x230",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x430",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x730",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4730",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F10",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF10",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch data reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x110",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x210",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x410",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x710",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4710",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F40",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF40",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch code reads",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x140",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x240",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x440",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x740",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4740",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F20",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF20",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch RFO requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x120",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x220",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x420",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x720",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4720",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x7F70",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xFF70",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "All offcore prefetch requests",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x8070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x170",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x270",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x470",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC  and HITM in a sibling core",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x770",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4770",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x3870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x1070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/floating-point.json b/pmu-events/arch/x86/westmereex/floating-point.json
new file mode 100644
index 0000000..7d2f71a
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/floating-point.json
@@ -0,0 +1,229 @@
+[
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_ASSIST.ALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.INPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xF7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.OUTPUT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_COMP_OPS_EXE.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MMX Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP double precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE and SSE2 FP Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP packed Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE FP scalar Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE* FP single precision Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "SSE2 integer Uops"
+    },
+    {
+        "EventCode": "0x10",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_COMP_OPS_EXE.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Computational floating-point operations executed"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "FP_MMX_TRANS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All Floating Point to and from MMX transitions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "FP_MMX_TRANS.TO_FP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from MMX to Floating Point instructions"
+    },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_MMX_TRANS.TO_MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Transitions from Floating Point to MMX instructions"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_128.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer pack operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_128.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer arithmetic operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer logical operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_128.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer multiply operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_128.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shift operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+    },
+    {
+        "EventCode": "0x12",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_128.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "128 bit SIMD integer unpack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SIMD_INT_64.PACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit pack operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "SIMD_INT_64.PACKED_ARITH",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit logical operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SIMD_INT_64.PACKED_MPY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SIMD_INT_64.PACKED_SHIFT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shift operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+    },
+    {
+        "EventCode": "0xFD",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SIMD_INT_64.UNPACK",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD integer 64 bit unpack operations"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/frontend.json b/pmu-events/arch/x86/westmereex/frontend.json
new file mode 100644
index 0000000..e5e21e0
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/frontend.json
@@ -0,0 +1,26 @@
+[
+    {
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions decoded"
+    },
+    {
+        "EventCode": "0xA6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused instructions decoded"
+    },
+    {
+        "EventCode": "0x19",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "TWO_UOP_INSTS_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Two Uop instructions decoded"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/memory.json b/pmu-events/arch/x86/westmereex/memory.json
new file mode 100644
index 0000000..3ba555e
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/memory.json
@@ -0,0 +1,747 @@
+[
+    {
+        "EventCode": "0x5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Misaligned store references"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF811",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2011",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF844",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2044",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x60FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF8FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x40FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x20FF",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF822",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2022",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF808",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2008",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore writebacks to a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF877",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2077",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF833",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore request = all data, response = any LLC miss",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2033",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF803",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2003",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF801",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2001",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF804",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2004",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF802",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2002",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF880",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2080",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF830",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2030",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF810",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2010",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF840",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2040",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF820",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2020",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x6070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0xF870",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests that missed the LLC",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x4070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+        "Offcore": "1"
+    },
+    {
+        "EventCode": "0xB7",
+        "MSRValue": "0x2070",
+        "Counter": "2",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+        "MSRIndex": "0x1A6",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+        "Offcore": "1"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/other.json b/pmu-events/arch/x86/westmereex/other.json
new file mode 100644
index 0000000..85133d6
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/other.json
@@ -0,0 +1,287 @@
+[
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_CLEARS.EARLY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Early Branch Prediciton Unit clears"
+    },
+    {
+        "EventCode": "0xE8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BPU_CLEARS.LATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Late Branch Prediction Unit clears"
+    },
+    {
+        "EventCode": "0xE5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BPU_MISSED_CALL_RET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch prediction unit missed call or return"
+    },
+    {
+        "EventCode": "0xD5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ES_REG_RENAMES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ES segment renames"
+    },
+    {
+        "EventCode": "0x6C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IO_TRANSACTIONS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "I/O transactions"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1I.CYCLES_STALLED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch stall cycles"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1I.HITS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch hits"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1I.MISSES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I instruction fetch misses"
+    },
+    {
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3",
+        "EventName": "L1I.READS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "L1I Instruction fetches"
+    },
+    {
+        "EventCode": "0x82",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LARGE_ITLB.HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Large ITLB hit"
+    },
+    {
+        "EventCode": "0x3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_BLOCK.OVERLAP_STORE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Loads that partially overlap an earlier store"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "LOAD_DISPATCH.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All loads dispatched"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "LOAD_DISPATCH.MOB",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LOAD_DISPATCH.RS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched that bypass the MOB"
+    },
+    {
+        "EventCode": "0x13",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOAD_DISPATCH.RS_DELAYED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loads dispatched from stage 305"
+    },
+    {
+        "EventCode": "0x7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PARTIAL_ADDRESS_ALIAS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "False dependencies due to partial address aliasing"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "RAT_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "All RAT stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RAT_STALLS.FLAGS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Flag stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RAT_STALLS.REGISTERS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Partial register stall cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RAT_STALLS.ROB_READ_PORT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB read port stalls cycles"
+    },
+    {
+        "EventCode": "0xD2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RAT_STALLS.SCOREBOARD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Scoreboard stall cycles"
+    },
+    {
+        "EventCode": "0x4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "SB_DRAIN.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All Store buffer stall cycles"
+    },
+    {
+        "EventCode": "0xD4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SEG_RENAME_STALLS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Segment rename stall cycles"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOP_RESPONSE.HIT",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HIT to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOP_RESPONSE.HITE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITE to snoop"
+    },
+    {
+        "EventCode": "0xB8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOP_RESPONSE.HITM",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Thread responded HITM to snoop"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS.CODE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop code requests"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS.DATA",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop data requests"
+    },
+    {
+        "EventCode": "0xB4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS.INVALIDATE",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Snoop invalidate requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop code requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x4",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop code requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop data requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x1",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop data requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Outstanding snoop invalidate requests"
+    },
+    {
+        "EventCode": "0xB3",
+        "UMask": "0x2",
+        "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE_NOT_EMPTY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles snoop invalidate requests queued",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xF6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SQ_FULL_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Super Queue full stall cycles"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/pipeline.json b/pmu-events/arch/x86/westmereex/pipeline.json
new file mode 100644
index 0000000..799c57d
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/pipeline.json
@@ -0,0 +1,905 @@
+[
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.CYCLES_DIV_BUSY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles the divider is busy"
+    },
+    {
+        "EventCode": "0x14",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ARITH.DIV",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Divide Operations executed",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0x14",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ARITH.MUL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Multiply operations executed"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BACLEAR.BAD_TARGET",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted with bad target address"
+    },
+    {
+        "EventCode": "0xE6",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR.CLEAR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "BACLEAR asserted, regardless of cause "
+    },
+    {
+        "EventCode": "0xA7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BACLEAR_FORCE_IQ",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction queue forced BACLEAR"
+    },
+    {
+        "EventCode": "0xE0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_DECODED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Branch instructions decoded"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_INST_EXEC.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_EXEC.COND",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Conditional branch instructions executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_EXEC.DIRECT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Unconditional branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Unconditional call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_INST_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_INST_EXEC.NON_CALLS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "All non call branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Indirect return branches executed"
+    },
+    {
+        "EventCode": "0x88",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_INST_EXEC.TAKEN",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Retired near call instructions (Precise Event)"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7f",
+        "EventName": "BR_MISP_EXEC.ANY",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_EXEC.COND",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_EXEC.DIRECT",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted unconditional branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted indirect non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x30",
+        "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x7",
+        "EventName": "BR_MISP_EXEC.NON_CALLS",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted non call branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted return branches executed"
+    },
+    {
+        "EventCode": "0x89",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "BR_MISP_EXEC.TAKEN",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted taken branches executed"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted retired branch instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Mispredicted conditional retired branches (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC5",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "2000",
+        "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.REF",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_P",
+        "SampleAfterValue": "100000",
+        "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total CPU cycles",
+        "CounterMask": "2"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "ILD_STALL.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Any Instruction Length Decoder stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instruction Queue full stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ILD_STALL.LCP",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Length Change Prefix stall cycles"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ILD_STALL.MRU",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stall cycles due to BPU MRU bypass"
+    },
+    {
+        "EventCode": "0x87",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "ILD_STALL.REGEN",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Regen stall cycles"
+    },
+    {
+        "EventCode": "0x18",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_DECODED.DEC0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions that must be decoded by decoder 0"
+    },
+    {
+        "EventCode": "0x1E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles instructions are written to the instruction queue"
+    },
+    {
+        "EventCode": "0x17",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_QUEUE_WRITES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions written to instruction queue."
+    },
+    {
+        "EventCode": "0x0",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (fixed counter)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.ANY_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "INST_RETIRED.MMX",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired MMX instructions (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "INST_RETIRED.X87",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retired floating-point operations (Precise Event)"
+    },
+    {
+        "EventCode": "0x4C",
+        "Counter": "0,1",
+        "UMask": "0x1",
+        "EventName": "LOAD_HIT_PRE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Load operations conflicting with software prefetches"
+    },
+    {
+        "EventCode": "0xA8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles when uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xA8",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD.INACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no uops were delivered by the LSD",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0x20",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "LSD_OVERFLOW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Loops that can't stream from the instruction queue"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MACHINE_CLEARS.CYCLES",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Cycles machine clear asserted"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MACHINE_CLEARS.MEM_ORDER",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+    },
+    {
+        "EventCode": "0xC3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20000",
+        "BriefDescription": "Self-Modifying Code detected"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "RESOURCE_STALLS.FPCW",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "FPU control word write stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "RESOURCE_STALLS.LOAD",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Load buffer stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "RESOURCE_STALLS.MXCSR",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "MXCSR rename stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "RESOURCE_STALLS.OTHER",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Other Resource related stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "RESOURCE_STALLS.ROB_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ROB full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS_FULL",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Reservation Station full stall cycles"
+    },
+    {
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.STORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Store buffer stall cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC7",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+    },
+    {
+        "EventCode": "0x3C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles thread is active"
+    },
+    {
+        "EventCode": "0xDB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOP_UNFUSION",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uop unfusions due to FP exceptions"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_DECODED.ESP_FOLDING",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer instructions decoded"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "UOPS_DECODED.ESP_SYNC",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Stack pointer sync operations"
+    },
+    {
+        "EventCode": "0xD1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops decoded by Microcode Sequencer",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xD1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_DECODED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops are decoded",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on any port (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on ports 0-4 (core count)",
+        "CounterMask": "1",
+        "EdgeDetect": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x3f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on any port (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1f",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_EXECUTED.PORT0",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 0"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 0, 1 or 5"
+    },
+    {
+        "EventCode": "0xB1",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.PORT1",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 1"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT2_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 2 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT234_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued on ports 2, 3 or 4"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT3_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 3 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "AnyThread": "1",
+        "EventName": "UOPS_EXECUTED.PORT4_CORE",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 4 (core count)"
+    },
+    {
+        "EventCode": "0xB1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "UOPS_EXECUTED.PORT5",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops executed on port 5"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued on any thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops were issued on either thread",
+        "CounterMask": "1"
+    },
+    {
+        "EventCode": "0xE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_ISSUED.FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Fused Uops issued"
+    },
+    {
+        "EventCode": "0xE",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles no Uops were issued",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are being retired",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ANY",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Retirement slots used (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+        "CounterMask": "1"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC2",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+        "CounterMask": "16"
+    },
+    {
+        "PEBS": "2",
+        "EventCode": "0xC0",
+        "Invert": "1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Total cycles (Precise Event)",
+        "CounterMask": "16"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/arch/x86/westmereex/virtual-memory.json b/pmu-events/arch/x86/westmereex/virtual-memory.json
new file mode 100644
index 0000000..ad98920
--- /dev/null
+++ b/pmu-events/arch/x86/westmereex/virtual-memory.json
@@ -0,0 +1,173 @@
+[
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_LOAD_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load misses"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss large page walks"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss caused by low part of address"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB second level hit"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walks complete"
+    },
+    {
+        "EventCode": "0x8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_LOAD_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB load miss page walk cycles"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "DTLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "DTLB_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss large page walks"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "DTLB_MISSES.PDE_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB misses caused by low part of address. Count also includes 2M page references because 2M pages do not use the PDE."
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "DTLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB first level misses but second level hit"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "DTLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "DTLB miss page walks"
+    },
+    {
+        "EventCode": "0x49",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "DTLB_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "DTLB miss page walk cycles"
+    },
+    {
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "Extended Page Table walk cycles"
+    },
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_FLUSH",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB flushes"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC8",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "ITLB_MISS_RETIRED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB_MISSES.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "ITLB_MISSES.LARGE_WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss large page walks"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "ITLB miss page walks"
+    },
+    {
+        "EventCode": "0x85",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "ITLB_MISSES.WALK_CYCLES",
+        "SampleAfterValue": "2000000",
+        "BriefDescription": "ITLB miss page walk cycles"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xCB",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+    }
+]
\ No newline at end of file
diff --git a/pmu-events/jevents.c b/pmu-events/jevents.c
new file mode 100644
index 0000000..db3a594
--- /dev/null
+++ b/pmu-events/jevents.c
@@ -0,0 +1,1157 @@
+#define  _XOPEN_SOURCE 500	/* needed for nftw() */
+#define  _GNU_SOURCE		/* needed for asprintf() */
+
+/* Parse event JSON files */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <limits.h>
+#include <dirent.h>
+#include <sys/time.h>			/* getrlimit */
+#include <sys/resource.h>		/* getrlimit */
+#include <ftw.h>
+#include <sys/stat.h>
+#include <linux/list.h>
+#include "jsmn.h"
+#include "json.h"
+#include "jevents.h"
+
+int verbose;
+char *prog;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+
+	int ret;
+	va_list args;
+
+	if (var < level)
+		return 0;
+
+	va_start(args, fmt);
+
+	ret = vfprintf(stderr, fmt, args);
+
+	va_end(args);
+
+	return ret;
+}
+
+__attribute__((weak)) char *get_cpu_str(void)
+{
+	return NULL;
+}
+
+static void addfield(char *map, char **dst, const char *sep,
+		     const char *a, jsmntok_t *bt)
+{
+	unsigned int len = strlen(a) + 1 + strlen(sep);
+	int olen = *dst ? strlen(*dst) : 0;
+	int blen = bt ? json_len(bt) : 0;
+	char *out;
+
+	out = realloc(*dst, len + olen + blen);
+	if (!out) {
+		/* Don't add field in this case */
+		return;
+	}
+	*dst = out;
+
+	if (!olen)
+		*(*dst) = 0;
+	else
+		strcat(*dst, sep);
+	strcat(*dst, a);
+	if (bt)
+		strncat(*dst, map + bt->start, blen);
+}
+
+static void fixname(char *s)
+{
+	for (; *s; s++)
+		*s = tolower(*s);
+}
+
+static void fixdesc(char *s)
+{
+	char *e = s + strlen(s);
+
+	/* Remove trailing dots that look ugly in perf list */
+	--e;
+	while (e >= s && isspace(*e))
+		--e;
+	if (*e == '.')
+		*e = 0;
+}
+
+/* Add escapes for '\' so they are proper C strings. */
+static char *fixregex(char *s)
+{
+	int len = 0;
+	int esc_count = 0;
+	char *fixed = NULL;
+	char *p, *q;
+
+	/* Count the number of '\' in string */
+	for (p = s; *p; p++) {
+		++len;
+		if (*p == '\\')
+			++esc_count;
+	}
+
+	if (esc_count == 0)
+		return s;
+
+	/* allocate space for a new string */
+	fixed = (char *) malloc(len + 1);
+	if (!fixed)
+		return NULL;
+
+	/* copy over the characters */
+	q = fixed;
+	for (p = s; *p; p++) {
+		if (*p == '\\') {
+			*q = '\\';
+			++q;
+		}
+		*q = *p;
+		++q;
+	}
+	*q = '\0';
+	return fixed;
+}
+
+static struct msrmap {
+	const char *num;
+	const char *pname;
+} msrmap[] = {
+	{ "0x3F6", "ldlat=" },
+	{ "0x1A6", "offcore_rsp=" },
+	{ "0x1A7", "offcore_rsp=" },
+	{ "0x3F7", "frontend=" },
+	{ NULL, NULL }
+};
+
+static struct field {
+	const char *field;
+	const char *kernel;
+} fields[] = {
+	{ "UMask",	"umask=" },
+	{ "CounterMask", "cmask=" },
+	{ "Invert",	"inv=" },
+	{ "AnyThread",	"any=" },
+	{ "EdgeDetect",	"edge=" },
+	{ "SampleAfterValue", "period=" },
+	{ "FCMask",	"fc_mask=" },
+	{ "PortMask",	"ch_mask=" },
+	{ NULL, NULL }
+};
+
+static void cut_comma(char *map, jsmntok_t *newval)
+{
+	int i;
+
+	/* Cut off everything after comma */
+	for (i = newval->start; i < newval->end; i++) {
+		if (map[i] == ',')
+			newval->end = i;
+	}
+}
+
+static int match_field(char *map, jsmntok_t *field, int nz,
+		       char **event, jsmntok_t *val)
+{
+	struct field *f;
+	jsmntok_t newval = *val;
+
+	for (f = fields; f->field; f++)
+		if (json_streq(map, field, f->field) && nz) {
+			cut_comma(map, &newval);
+			addfield(map, event, ",", f->kernel, &newval);
+			return 1;
+		}
+	return 0;
+}
+
+static struct msrmap *lookup_msr(char *map, jsmntok_t *val)
+{
+	jsmntok_t newval = *val;
+	static bool warned;
+	int i;
+
+	cut_comma(map, &newval);
+	for (i = 0; msrmap[i].num; i++)
+		if (json_streq(map, &newval, msrmap[i].num))
+			return &msrmap[i];
+	if (!warned) {
+		warned = true;
+		pr_err("%s: Unknown MSR in event file %.*s\n", prog,
+			json_len(val), map + val->start);
+	}
+	return NULL;
+}
+
+static struct map {
+	const char *json;
+	const char *perf;
+} unit_to_pmu[] = {
+	{ "CBO", "uncore_cbox" },
+	{ "QPI LL", "uncore_qpi" },
+	{ "SBO", "uncore_sbox" },
+	{ "iMPH-U", "uncore_arb" },
+	{}
+};
+
+static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val)
+{
+	int i;
+
+	for (i = 0; table[i].json; i++) {
+		if (json_streq(map, val, table[i].json))
+			return table[i].perf;
+	}
+	return NULL;
+}
+
+#define EXPECT(e, t, m) do { if (!(e)) {			\
+	jsmntok_t *loc = (t);					\
+	if (!(t)->start && (t) > tokens)			\
+		loc = (t) - 1;					\
+	pr_err("%s:%d: " m ", got %s\n", fn,			\
+	       json_line(map, loc),				\
+	       json_name(t));					\
+	err = -EIO;						\
+	goto out_free;						\
+} } while (0)
+
+static char *topic;
+
+static char *get_topic(void)
+{
+	char *tp;
+	int i;
+
+	/* tp is free'd in process_one_file() */
+	i = asprintf(&tp, "%s", topic);
+	if (i < 0) {
+		pr_info("%s: asprintf() error %s\n", prog);
+		return NULL;
+	}
+
+	for (i = 0; i < (int) strlen(tp); i++) {
+		char c = tp[i];
+
+		if (c == '-')
+			tp[i] = ' ';
+		else if (c == '.') {
+			tp[i] = '\0';
+			break;
+		}
+	}
+
+	return tp;
+}
+
+static int add_topic(char *bname)
+{
+	free(topic);
+	topic = strdup(bname);
+	if (!topic) {
+		pr_info("%s: strdup() error %s for file %s\n", prog,
+				strerror(errno), bname);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+struct perf_entry_data {
+	FILE *outfp;
+	char *topic;
+};
+
+static int close_table;
+
+static void print_events_table_prefix(FILE *fp, const char *tblname)
+{
+	fprintf(fp, "struct pmu_event %s[] = {\n", tblname);
+	close_table = 1;
+}
+
+static int print_events_table_entry(void *data, char *name, char *event,
+				    char *desc, char *long_desc,
+				    char *pmu, char *unit, char *perpkg,
+				    char *metric_expr,
+				    char *metric_name, char *metric_group)
+{
+	struct perf_entry_data *pd = data;
+	FILE *outfp = pd->outfp;
+	char *topic = pd->topic;
+
+	/*
+	 * TODO: Remove formatting chars after debugging to reduce
+	 *	 string lengths.
+	 */
+	fprintf(outfp, "{\n");
+
+	if (name)
+		fprintf(outfp, "\t.name = \"%s\",\n", name);
+	if (event)
+		fprintf(outfp, "\t.event = \"%s\",\n", event);
+	fprintf(outfp, "\t.desc = \"%s\",\n", desc);
+	fprintf(outfp, "\t.topic = \"%s\",\n", topic);
+	if (long_desc && long_desc[0])
+		fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
+	if (pmu)
+		fprintf(outfp, "\t.pmu = \"%s\",\n", pmu);
+	if (unit)
+		fprintf(outfp, "\t.unit = \"%s\",\n", unit);
+	if (perpkg)
+		fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg);
+	if (metric_expr)
+		fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
+	if (metric_name)
+		fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
+	if (metric_group)
+		fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
+	fprintf(outfp, "},\n");
+
+	return 0;
+}
+
+struct event_struct {
+	struct list_head list;
+	char *name;
+	char *event;
+	char *desc;
+	char *long_desc;
+	char *pmu;
+	char *unit;
+	char *perpkg;
+	char *metric_expr;
+	char *metric_name;
+	char *metric_group;
+};
+
+#define ADD_EVENT_FIELD(field) do { if (field) {		\
+	es->field = strdup(field);				\
+	if (!es->field)						\
+		goto out_free;					\
+} } while (0)
+
+#define FREE_EVENT_FIELD(field) free(es->field)
+
+#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\
+	*field = strdup(es->field);				\
+	if (!*field)						\
+		return -ENOMEM;					\
+} } while (0)
+
+#define FOR_ALL_EVENT_STRUCT_FIELDS(op) do {			\
+	op(name);						\
+	op(event);						\
+	op(desc);						\
+	op(long_desc);						\
+	op(pmu);						\
+	op(unit);						\
+	op(perpkg);						\
+	op(metric_expr);					\
+	op(metric_name);					\
+	op(metric_group);					\
+} while (0)
+
+static LIST_HEAD(arch_std_events);
+
+static void free_arch_std_events(void)
+{
+	struct event_struct *es, *next;
+
+	list_for_each_entry_safe(es, next, &arch_std_events, list) {
+		FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD);
+		list_del(&es->list);
+		free(es);
+	}
+}
+
+static int save_arch_std_events(void *data, char *name, char *event,
+				char *desc, char *long_desc, char *pmu,
+				char *unit, char *perpkg, char *metric_expr,
+				char *metric_name, char *metric_group)
+{
+	struct event_struct *es;
+	struct stat *sb = data;
+
+	es = malloc(sizeof(*es));
+	if (!es)
+		return -ENOMEM;
+	memset(es, 0, sizeof(*es));
+	FOR_ALL_EVENT_STRUCT_FIELDS(ADD_EVENT_FIELD);
+	list_add_tail(&es->list, &arch_std_events);
+	return 0;
+out_free:
+	FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD);
+	free(es);
+	return -ENOMEM;
+}
+
+static void print_events_table_suffix(FILE *outfp)
+{
+	fprintf(outfp, "{\n");
+
+	fprintf(outfp, "\t.name = 0,\n");
+	fprintf(outfp, "\t.event = 0,\n");
+	fprintf(outfp, "\t.desc = 0,\n");
+
+	fprintf(outfp, "},\n");
+	fprintf(outfp, "};\n");
+	close_table = 0;
+}
+
+static struct fixed {
+	const char *name;
+	const char *event;
+} fixed[] = {
+	{ "inst_retired.any", "event=0xc0" },
+	{ "inst_retired.any_p", "event=0xc0" },
+	{ "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
+	{ "cpu_clk_unhalted.thread", "event=0x3c" },
+	{ "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
+	{ NULL, NULL},
+};
+
+/*
+ * Handle different fixed counter encodings between JSON and perf.
+ */
+static char *real_event(const char *name, char *event)
+{
+	int i;
+
+	if (!name)
+		return NULL;
+
+	for (i = 0; fixed[i].name; i++)
+		if (!strcasecmp(name, fixed[i].name))
+			return (char *)fixed[i].event;
+	return event;
+}
+
+static int
+try_fixup(const char *fn, char *arch_std, char **event, char **desc,
+	  char **name, char **long_desc, char **pmu, char **filter,
+	  char **perpkg, char **unit, char **metric_expr, char **metric_name,
+	  char **metric_group, unsigned long long eventcode)
+{
+	/* try to find matching event from arch standard values */
+	struct event_struct *es;
+
+	list_for_each_entry(es, &arch_std_events, list) {
+		if (!strcmp(arch_std, es->name)) {
+			if (!eventcode && es->event) {
+				/* allow EventCode to be overridden */
+				free(*event);
+				*event = NULL;
+			}
+			FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD);
+			return 0;
+		}
+	}
+
+	pr_err("%s: could not find matching %s for %s\n",
+					prog, arch_std, fn);
+	return -1;
+}
+
+/* Call func with each event in the json file */
+int json_events(const char *fn,
+	  int (*func)(void *data, char *name, char *event, char *desc,
+		      char *long_desc,
+		      char *pmu, char *unit, char *perpkg,
+		      char *metric_expr,
+		      char *metric_name, char *metric_group),
+	  void *data)
+{
+	int err;
+	size_t size;
+	jsmntok_t *tokens, *tok;
+	int i, j, len;
+	char *map;
+	char buf[128];
+
+	if (!fn)
+		return -ENOENT;
+
+	tokens = parse_json(fn, &map, &size, &len);
+	if (!tokens)
+		return -EIO;
+	EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array");
+	tok = tokens + 1;
+	for (i = 0; i < tokens->size; i++) {
+		char *event = NULL, *desc = NULL, *name = NULL;
+		char *long_desc = NULL;
+		char *extra_desc = NULL;
+		char *pmu = NULL;
+		char *filter = NULL;
+		char *perpkg = NULL;
+		char *unit = NULL;
+		char *metric_expr = NULL;
+		char *metric_name = NULL;
+		char *metric_group = NULL;
+		char *arch_std = NULL;
+		unsigned long long eventcode = 0;
+		struct msrmap *msr = NULL;
+		jsmntok_t *msrval = NULL;
+		jsmntok_t *precise = NULL;
+		jsmntok_t *obj = tok++;
+
+		EXPECT(obj->type == JSMN_OBJECT, obj, "expected object");
+		for (j = 0; j < obj->size; j += 2) {
+			jsmntok_t *field, *val;
+			int nz;
+			char *s;
+
+			field = tok + j;
+			EXPECT(field->type == JSMN_STRING, tok + j,
+			       "Expected field name");
+			val = tok + j + 1;
+			EXPECT(val->type == JSMN_STRING, tok + j + 1,
+			       "Expected string value");
+
+			nz = !json_streq(map, val, "0");
+			if (match_field(map, field, nz, &event, val)) {
+				/* ok */
+			} else if (json_streq(map, field, "EventCode")) {
+				char *code = NULL;
+				addfield(map, &code, "", "", val);
+				eventcode |= strtoul(code, NULL, 0);
+				free(code);
+			} else if (json_streq(map, field, "ExtSel")) {
+				char *code = NULL;
+				addfield(map, &code, "", "", val);
+				eventcode |= strtoul(code, NULL, 0) << 21;
+				free(code);
+			} else if (json_streq(map, field, "EventName")) {
+				addfield(map, &name, "", "", val);
+			} else if (json_streq(map, field, "BriefDescription")) {
+				addfield(map, &desc, "", "", val);
+				fixdesc(desc);
+			} else if (json_streq(map, field,
+					     "PublicDescription")) {
+				addfield(map, &long_desc, "", "", val);
+				fixdesc(long_desc);
+			} else if (json_streq(map, field, "PEBS") && nz) {
+				precise = val;
+			} else if (json_streq(map, field, "MSRIndex") && nz) {
+				msr = lookup_msr(map, val);
+			} else if (json_streq(map, field, "MSRValue")) {
+				msrval = val;
+			} else if (json_streq(map, field, "Errata") &&
+				   !json_streq(map, val, "null")) {
+				addfield(map, &extra_desc, ". ",
+					" Spec update: ", val);
+			} else if (json_streq(map, field, "Data_LA") && nz) {
+				addfield(map, &extra_desc, ". ",
+					" Supports address when precise",
+					NULL);
+			} else if (json_streq(map, field, "Unit")) {
+				const char *ppmu;
+
+				ppmu = field_to_perf(unit_to_pmu, map, val);
+				if (ppmu) {
+					pmu = strdup(ppmu);
+				} else {
+					if (!pmu)
+						pmu = strdup("uncore_");
+					addfield(map, &pmu, "", "", val);
+					for (s = pmu; *s; s++)
+						*s = tolower(*s);
+				}
+				addfield(map, &desc, ". ", "Unit: ", NULL);
+				addfield(map, &desc, "", pmu, NULL);
+				addfield(map, &desc, "", " ", NULL);
+			} else if (json_streq(map, field, "Filter")) {
+				addfield(map, &filter, "", "", val);
+			} else if (json_streq(map, field, "ScaleUnit")) {
+				addfield(map, &unit, "", "", val);
+			} else if (json_streq(map, field, "PerPkg")) {
+				addfield(map, &perpkg, "", "", val);
+			} else if (json_streq(map, field, "MetricName")) {
+				addfield(map, &metric_name, "", "", val);
+			} else if (json_streq(map, field, "MetricGroup")) {
+				addfield(map, &metric_group, "", "", val);
+			} else if (json_streq(map, field, "MetricExpr")) {
+				addfield(map, &metric_expr, "", "", val);
+				for (s = metric_expr; *s; s++)
+					*s = tolower(*s);
+			} else if (json_streq(map, field, "ArchStdEvent")) {
+				addfield(map, &arch_std, "", "", val);
+				for (s = arch_std; *s; s++)
+					*s = tolower(*s);
+			}
+			/* ignore unknown fields */
+		}
+		if (precise && desc && !strstr(desc, "(Precise Event)")) {
+			if (json_streq(map, precise, "2"))
+				addfield(map, &extra_desc, " ",
+						"(Must be precise)", NULL);
+			else
+				addfield(map, &extra_desc, " ",
+						"(Precise event)", NULL);
+		}
+		snprintf(buf, sizeof buf, "event=%#llx", eventcode);
+		addfield(map, &event, ",", buf, NULL);
+		if (desc && extra_desc)
+			addfield(map, &desc, " ", extra_desc, NULL);
+		if (long_desc && extra_desc)
+			addfield(map, &long_desc, " ", extra_desc, NULL);
+		if (filter)
+			addfield(map, &event, ",", filter, NULL);
+		if (msr != NULL)
+			addfield(map, &event, ",", msr->pname, msrval);
+		if (name)
+			fixname(name);
+
+		if (arch_std) {
+			/*
+			 * An arch standard event is referenced, so try to
+			 * fixup any unassigned values.
+			 */
+			err = try_fixup(fn, arch_std, &event, &desc, &name,
+					&long_desc, &pmu, &filter, &perpkg,
+					&unit, &metric_expr, &metric_name,
+					&metric_group, eventcode);
+			if (err)
+				goto free_strings;
+		}
+		err = func(data, name, real_event(name, event), desc, long_desc,
+			   pmu, unit, perpkg, metric_expr, metric_name, metric_group);
+free_strings:
+		free(event);
+		free(desc);
+		free(name);
+		free(long_desc);
+		free(extra_desc);
+		free(pmu);
+		free(filter);
+		free(perpkg);
+		free(unit);
+		free(metric_expr);
+		free(metric_name);
+		free(metric_group);
+		free(arch_std);
+
+		if (err)
+			break;
+		tok += j;
+	}
+	EXPECT(tok - tokens == len, tok, "unexpected objects at end");
+	err = 0;
+out_free:
+	free_json(map, size, tokens);
+	return err;
+}
+
+static char *file_name_to_table_name(char *fname)
+{
+	unsigned int i;
+	int n;
+	int c;
+	char *tblname;
+
+	/*
+	 * Ensure tablename starts with alphabetic character.
+	 * Derive rest of table name from basename of the JSON file,
+	 * replacing hyphens and stripping out .json suffix.
+	 */
+	n = asprintf(&tblname, "pme_%s", fname);
+	if (n < 0) {
+		pr_info("%s: asprintf() error %s for file %s\n", prog,
+				strerror(errno), fname);
+		return NULL;
+	}
+
+	for (i = 0; i < strlen(tblname); i++) {
+		c = tblname[i];
+
+		if (c == '-' || c == '/')
+			tblname[i] = '_';
+		else if (c == '.') {
+			tblname[i] = '\0';
+			break;
+		} else if (!isalnum(c) && c != '_') {
+			pr_err("%s: Invalid character '%c' in file name %s\n",
+					prog, c, basename(fname));
+			free(tblname);
+			tblname = NULL;
+			break;
+		}
+	}
+
+	return tblname;
+}
+
+static void print_mapping_table_prefix(FILE *outfp)
+{
+	fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n");
+}
+
+static void print_mapping_table_suffix(FILE *outfp)
+{
+	/*
+	 * Print the terminating, NULL entry.
+	 */
+	fprintf(outfp, "{\n");
+	fprintf(outfp, "\t.cpuid = 0,\n");
+	fprintf(outfp, "\t.version = 0,\n");
+	fprintf(outfp, "\t.type = 0,\n");
+	fprintf(outfp, "\t.table = 0,\n");
+	fprintf(outfp, "},\n");
+
+	/* and finally, the closing curly bracket for the struct */
+	fprintf(outfp, "};\n");
+}
+
+static int process_mapfile(FILE *outfp, char *fpath)
+{
+	int n = 16384;
+	FILE *mapfp;
+	char *save = NULL;
+	char *line, *p;
+	int line_num;
+	char *tblname;
+
+	pr_info("%s: Processing mapfile %s\n", prog, fpath);
+
+	line = malloc(n);
+	if (!line)
+		return -1;
+
+	mapfp = fopen(fpath, "r");
+	if (!mapfp) {
+		pr_info("%s: Error %s opening %s\n", prog, strerror(errno),
+				fpath);
+		return -1;
+	}
+
+	print_mapping_table_prefix(outfp);
+
+	/* Skip first line (header) */
+	p = fgets(line, n, mapfp);
+	if (!p)
+		goto out;
+
+	line_num = 1;
+	while (1) {
+		char *cpuid, *version, *type, *fname;
+
+		line_num++;
+		p = fgets(line, n, mapfp);
+		if (!p)
+			break;
+
+		if (line[0] == '#' || line[0] == '\n')
+			continue;
+
+		if (line[strlen(line)-1] != '\n') {
+			/* TODO Deal with lines longer than 16K */
+			pr_info("%s: Mapfile %s: line %d too long, aborting\n",
+					prog, fpath, line_num);
+			return -1;
+		}
+		line[strlen(line)-1] = '\0';
+
+		cpuid = fixregex(strtok_r(p, ",", &save));
+		version = strtok_r(NULL, ",", &save);
+		fname = strtok_r(NULL, ",", &save);
+		type = strtok_r(NULL, ",", &save);
+
+		tblname = file_name_to_table_name(fname);
+		fprintf(outfp, "{\n");
+		fprintf(outfp, "\t.cpuid = \"%s\",\n", cpuid);
+		fprintf(outfp, "\t.version = \"%s\",\n", version);
+		fprintf(outfp, "\t.type = \"%s\",\n", type);
+
+		/*
+		 * CHECK: We can't use the type (eg "core") field in the
+		 * table name. For us to do that, we need to somehow tweak
+		 * the other caller of file_name_to_table(), process_json()
+		 * to determine the type. process_json() file has no way
+		 * of knowing these are "core" events unless file name has
+		 * core in it. If filename has core in it, we can safely
+		 * ignore the type field here also.
+		 */
+		fprintf(outfp, "\t.table = %s\n", tblname);
+		fprintf(outfp, "},\n");
+	}
+
+out:
+	print_mapping_table_suffix(outfp);
+	return 0;
+}
+
+/*
+ * If we fail to locate/process JSON and map files, create a NULL mapping
+ * table. This would at least allow perf to build even if we can't find/use
+ * the aliases.
+ */
+static void create_empty_mapping(const char *output_file)
+{
+	FILE *outfp;
+
+	pr_info("%s: Creating empty pmu_events_map[] table\n", prog);
+
+	/* Truncate file to clear any partial writes to it */
+	outfp = fopen(output_file, "w");
+	if (!outfp) {
+		perror("fopen()");
+		_Exit(1);
+	}
+
+	fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+	print_mapping_table_prefix(outfp);
+	print_mapping_table_suffix(outfp);
+	fclose(outfp);
+}
+
+static int get_maxfds(void)
+{
+	struct rlimit rlim;
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
+		return min((int)rlim.rlim_max / 2, 512);
+
+	return 512;
+}
+
+/*
+ * nftw() doesn't let us pass an argument to the processing function,
+ * so use a global variables.
+ */
+static FILE *eventsfp;
+static char *mapfile;
+
+static int is_leaf_dir(const char *fpath)
+{
+	DIR *d;
+	struct dirent *dir;
+	int res = 1;
+
+	d = opendir(fpath);
+	if (!d)
+		return 0;
+
+	while ((dir = readdir(d)) != NULL) {
+		if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
+			continue;
+
+		if (dir->d_type == DT_DIR) {
+			res = 0;
+			break;
+		} else if (dir->d_type == DT_UNKNOWN) {
+			char path[PATH_MAX];
+			struct stat st;
+
+			sprintf(path, "%s/%s", fpath, dir->d_name);
+			if (stat(path, &st))
+				break;
+
+			if (S_ISDIR(st.st_mode)) {
+				res = 0;
+				break;
+			}
+		}
+	}
+
+	closedir(d);
+
+	return res;
+}
+
+static int is_json_file(const char *name)
+{
+	const char *suffix;
+
+	if (strlen(name) < 5)
+		return 0;
+
+	suffix = name + strlen(name) - 5;
+
+	if (strncmp(suffix, ".json", 5) == 0)
+		return 1;
+	return 0;
+}
+
+static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
+				int typeflag, struct FTW *ftwbuf)
+{
+	int level = ftwbuf->level;
+	int is_file = typeflag == FTW_F;
+
+	if (level == 1 && is_file && is_json_file(fpath))
+		return json_events(fpath, save_arch_std_events, (void *)sb);
+
+	return 0;
+}
+
+static int process_one_file(const char *fpath, const struct stat *sb,
+			    int typeflag, struct FTW *ftwbuf)
+{
+	char *tblname, *bname;
+	int is_dir  = typeflag == FTW_D;
+	int is_file = typeflag == FTW_F;
+	int level   = ftwbuf->level;
+	int err = 0;
+
+	if (level == 2 && is_dir) {
+		/*
+		 * For level 2 directory, bname will include parent name,
+		 * like vendor/platform. So search back from platform dir
+		 * to find this.
+		 */
+		bname = (char *) fpath + ftwbuf->base - 2;
+		for (;;) {
+			if (*bname == '/')
+				break;
+			bname--;
+		}
+		bname++;
+	} else
+		bname = (char *) fpath + ftwbuf->base;
+
+	pr_debug("%s %d %7jd %-20s %s\n",
+		 is_file ? "f" : is_dir ? "d" : "x",
+		 level, sb->st_size, bname, fpath);
+
+	/* base dir or too deep */
+	if (level == 0 || level > 3)
+		return 0;
+
+
+	/* model directory, reset topic */
+	if ((level == 1 && is_dir && is_leaf_dir(fpath)) ||
+	    (level == 2 && is_dir)) {
+		if (close_table)
+			print_events_table_suffix(eventsfp);
+
+		/*
+		 * Drop file name suffix. Replace hyphens with underscores.
+		 * Fail if file name contains any alphanum characters besides
+		 * underscores.
+		 */
+		tblname = file_name_to_table_name(bname);
+		if (!tblname) {
+			pr_info("%s: Error determining table name for %s\n", prog,
+				bname);
+			return -1;
+		}
+
+		print_events_table_prefix(eventsfp, tblname);
+		return 0;
+	}
+
+	/*
+	 * Save the mapfile name for now. We will process mapfile
+	 * after processing all JSON files (so we can write out the
+	 * mapping table after all PMU events tables).
+	 *
+	 */
+	if (level == 1 && is_file) {
+		if (!strcmp(bname, "mapfile.csv")) {
+			mapfile = strdup(fpath);
+			return 0;
+		}
+
+		pr_info("%s: Ignoring file %s\n", prog, fpath);
+		return 0;
+	}
+
+	/*
+	 * If the file name does not have a .json extension,
+	 * ignore it. It could be a readme.txt for instance.
+	 */
+	if (is_file) {
+		if (!is_json_file(bname)) {
+			pr_info("%s: Ignoring file without .json suffix %s\n", prog,
+				fpath);
+			return 0;
+		}
+	}
+
+	if (level > 1 && add_topic(bname))
+		return -ENOMEM;
+
+	/*
+	 * Assume all other files are JSON files.
+	 *
+	 * If mapfile refers to 'power7_core.json', we create a table
+	 * named 'power7_core'. Any inconsistencies between the mapfile
+	 * and directory tree could result in build failure due to table
+	 * names not being found.
+	 *
+	 * Atleast for now, be strict with processing JSON file names.
+	 * i.e. if JSON file name cannot be mapped to C-style table name,
+	 * fail.
+	 */
+	if (is_file) {
+		struct perf_entry_data data = {
+			.topic = get_topic(),
+			.outfp = eventsfp,
+		};
+
+		err = json_events(fpath, print_events_table_entry, &data);
+
+		free(data.topic);
+	}
+
+	return err;
+}
+
+#ifndef PATH_MAX
+#define PATH_MAX	4096
+#endif
+
+/*
+ * Starting in directory 'start_dirname', find the "mapfile.csv" and
+ * the set of JSON files for the architecture 'arch'.
+ *
+ * From each JSON file, create a C-style "PMU events table" from the
+ * JSON file (see struct pmu_event).
+ *
+ * From the mapfile, create a mapping between the CPU revisions and
+ * PMU event tables (see struct pmu_events_map).
+ *
+ * Write out the PMU events tables and the mapping table to pmu-event.c.
+ */
+int main(int argc, char *argv[])
+{
+	int rc;
+	int maxfds;
+	char ldirname[PATH_MAX];
+
+	const char *arch;
+	const char *output_file;
+	const char *start_dirname;
+	struct stat stbuf;
+
+	prog = basename(argv[0]);
+	if (argc < 4) {
+		pr_err("Usage: %s <arch> <starting_dir> <output_file>\n", prog);
+		return 1;
+	}
+
+	arch = argv[1];
+	start_dirname = argv[2];
+	output_file = argv[3];
+
+	if (argc > 4)
+		verbose = atoi(argv[4]);
+
+	eventsfp = fopen(output_file, "w");
+	if (!eventsfp) {
+		pr_err("%s Unable to create required file %s (%s)\n",
+				prog, output_file, strerror(errno));
+		return 2;
+	}
+
+	sprintf(ldirname, "%s/%s", start_dirname, arch);
+
+	/* If architecture does not have any event lists, bail out */
+	if (stat(ldirname, &stbuf) < 0) {
+		pr_info("%s: Arch %s has no PMU event lists\n", prog, arch);
+		goto empty_map;
+	}
+
+	/* Include pmu-events.h first */
+	fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+
+	/*
+	 * The mapfile allows multiple CPUids to point to the same JSON file,
+	 * so, not sure if there is a need for symlinks within the pmu-events
+	 * directory.
+	 *
+	 * For now, treat symlinks of JSON files as regular files and create
+	 * separate tables for each symlink (presumably, each symlink refers
+	 * to specific version of the CPU).
+	 */
+
+	maxfds = get_maxfds();
+	mapfile = NULL;
+	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
+	if (rc && verbose) {
+		pr_info("%s: Error preprocessing arch standard files %s\n",
+			prog, ldirname);
+		goto empty_map;
+	} else if (rc < 0) {
+		/* Make build fail */
+		free_arch_std_events();
+		return 1;
+	} else if (rc) {
+		goto empty_map;
+	}
+
+	rc = nftw(ldirname, process_one_file, maxfds, 0);
+	if (rc && verbose) {
+		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
+		goto empty_map;
+	} else if (rc < 0) {
+		/* Make build fail */
+		free_arch_std_events();
+		return 1;
+	} else if (rc) {
+		goto empty_map;
+	}
+
+	if (close_table)
+		print_events_table_suffix(eventsfp);
+
+	if (!mapfile) {
+		pr_info("%s: No CPU->JSON mapping?\n", prog);
+		goto empty_map;
+	}
+
+	if (process_mapfile(eventsfp, mapfile)) {
+		pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
+		/* Make build fail */
+		return 1;
+	}
+
+	return 0;
+
+empty_map:
+	fclose(eventsfp);
+	create_empty_mapping(output_file);
+	free_arch_std_events();
+	return 0;
+}
diff --git a/pmu-events/jevents.h b/pmu-events/jevents.h
new file mode 100644
index 0000000..4684c67
--- /dev/null
+++ b/pmu-events/jevents.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef JEVENTS_H
+#define JEVENTS_H 1
+
+int json_events(const char *fn,
+		int (*func)(void *data, char *name, char *event, char *desc,
+				char *long_desc,
+				char *pmu,
+				char *unit, char *perpkg, char *metric_expr,
+				char *metric_name, char *metric_group),
+		void *data);
+char *get_cpu_str(void);
+
+#ifndef min
+#define min(x, y) ({                            \
+	typeof(x) _min1 = (x);                  \
+	typeof(y) _min2 = (y);                  \
+	(void) (&_min1 == &_min2);              \
+	_min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#endif
diff --git a/pmu-events/jsmn.c b/pmu-events/jsmn.c
new file mode 100644
index 0000000..11d1fa1
--- /dev/null
+++ b/pmu-events/jsmn.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2010 Serge A. Zaitsev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Slightly modified by AK to not assume 0 terminated input.
+ */
+
+#include <stdlib.h>
+#include "jsmn.h"
+
+/*
+ * Allocates a fresh unused token from the token pool.
+ */
+static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser,
+				   jsmntok_t *tokens, size_t num_tokens)
+{
+	jsmntok_t *tok;
+
+	if ((unsigned)parser->toknext >= num_tokens)
+		return NULL;
+	tok = &tokens[parser->toknext++];
+	tok->start = tok->end = -1;
+	tok->size = 0;
+	return tok;
+}
+
+/*
+ * Fills token type and boundaries.
+ */
+static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type,
+			    int start, int end)
+{
+	token->type = type;
+	token->start = start;
+	token->end = end;
+	token->size = 0;
+}
+
+/*
+ * Fills next available token with JSON primitive.
+ */
+static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js,
+				      size_t len,
+				      jsmntok_t *tokens, size_t num_tokens)
+{
+	jsmntok_t *token;
+	int start;
+
+	start = parser->pos;
+
+	for (; parser->pos < len; parser->pos++) {
+		switch (js[parser->pos]) {
+#ifndef JSMN_STRICT
+		/*
+		 * In strict mode primitive must be followed by ","
+		 * or "}" or "]"
+		 */
+		case ':':
+#endif
+		case '\t':
+		case '\r':
+		case '\n':
+		case ' ':
+		case ',':
+		case ']':
+		case '}':
+			goto found;
+		default:
+			break;
+		}
+		if (js[parser->pos] < 32 || js[parser->pos] >= 127) {
+			parser->pos = start;
+			return JSMN_ERROR_INVAL;
+		}
+	}
+#ifdef JSMN_STRICT
+	/*
+	 * In strict mode primitive must be followed by a
+	 * comma/object/array.
+	 */
+	parser->pos = start;
+	return JSMN_ERROR_PART;
+#endif
+
+found:
+	token = jsmn_alloc_token(parser, tokens, num_tokens);
+	if (token == NULL) {
+		parser->pos = start;
+		return JSMN_ERROR_NOMEM;
+	}
+	jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
+	parser->pos--; /* parent sees closing brackets */
+	return JSMN_SUCCESS;
+}
+
+/*
+ * Fills next token with JSON string.
+ */
+static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js,
+				   size_t len,
+				   jsmntok_t *tokens, size_t num_tokens)
+{
+	jsmntok_t *token;
+	int start = parser->pos;
+
+	/* Skip starting quote */
+	parser->pos++;
+
+	for (; parser->pos < len; parser->pos++) {
+		char c = js[parser->pos];
+
+		/* Quote: end of string */
+		if (c == '\"') {
+			token = jsmn_alloc_token(parser, tokens, num_tokens);
+			if (token == NULL) {
+				parser->pos = start;
+				return JSMN_ERROR_NOMEM;
+			}
+			jsmn_fill_token(token, JSMN_STRING, start+1,
+					parser->pos);
+			return JSMN_SUCCESS;
+		}
+
+		/* Backslash: Quoted symbol expected */
+		if (c == '\\') {
+			parser->pos++;
+			switch (js[parser->pos]) {
+				/* Allowed escaped symbols */
+			case '\"':
+			case '/':
+			case '\\':
+			case 'b':
+			case 'f':
+			case 'r':
+			case 'n':
+			case 't':
+				break;
+				/* Allows escaped symbol \uXXXX */
+			case 'u':
+				/* TODO */
+				break;
+				/* Unexpected symbol */
+			default:
+				parser->pos = start;
+				return JSMN_ERROR_INVAL;
+			}
+		}
+	}
+	parser->pos = start;
+	return JSMN_ERROR_PART;
+}
+
+/*
+ * Parse JSON string and fill tokens.
+ */
+jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
+		     jsmntok_t *tokens, unsigned int num_tokens)
+{
+	jsmnerr_t r;
+	int i;
+	jsmntok_t *token;
+
+	for (; parser->pos < len; parser->pos++) {
+		char c;
+		jsmntype_t type;
+
+		c = js[parser->pos];
+		switch (c) {
+		case '{':
+		case '[':
+			token = jsmn_alloc_token(parser, tokens, num_tokens);
+			if (token == NULL)
+				return JSMN_ERROR_NOMEM;
+			if (parser->toksuper != -1)
+				tokens[parser->toksuper].size++;
+			token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
+			token->start = parser->pos;
+			parser->toksuper = parser->toknext - 1;
+			break;
+		case '}':
+		case ']':
+			type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
+			for (i = parser->toknext - 1; i >= 0; i--) {
+				token = &tokens[i];
+				if (token->start != -1 && token->end == -1) {
+					if (token->type != type)
+						return JSMN_ERROR_INVAL;
+					parser->toksuper = -1;
+					token->end = parser->pos + 1;
+					break;
+				}
+			}
+			/* Error if unmatched closing bracket */
+			if (i == -1)
+				return JSMN_ERROR_INVAL;
+			for (; i >= 0; i--) {
+				token = &tokens[i];
+				if (token->start != -1 && token->end == -1) {
+					parser->toksuper = i;
+					break;
+				}
+			}
+			break;
+		case '\"':
+			r = jsmn_parse_string(parser, js, len, tokens,
+					      num_tokens);
+			if (r < 0)
+				return r;
+			if (parser->toksuper != -1)
+				tokens[parser->toksuper].size++;
+			break;
+		case '\t':
+		case '\r':
+		case '\n':
+		case ':':
+		case ',':
+		case ' ':
+			break;
+#ifdef JSMN_STRICT
+			/*
+			 * In strict mode primitives are:
+			 * numbers and booleans.
+			 */
+		case '-':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case 't':
+		case 'f':
+		case 'n':
+#else
+			/*
+			 * In non-strict mode every unquoted value
+			 * is a primitive.
+			 */
+			/*FALL THROUGH */
+		default:
+#endif
+			r = jsmn_parse_primitive(parser, js, len, tokens,
+						 num_tokens);
+			if (r < 0)
+				return r;
+			if (parser->toksuper != -1)
+				tokens[parser->toksuper].size++;
+			break;
+
+#ifdef JSMN_STRICT
+			/* Unexpected char in strict mode */
+		default:
+			return JSMN_ERROR_INVAL;
+#endif
+		}
+	}
+
+	for (i = parser->toknext - 1; i >= 0; i--) {
+		/* Unmatched opened object or array */
+		if (tokens[i].start != -1 && tokens[i].end == -1)
+			return JSMN_ERROR_PART;
+	}
+
+	return JSMN_SUCCESS;
+}
+
+/*
+ * Creates a new parser based over a given  buffer with an array of tokens
+ * available.
+ */
+void jsmn_init(jsmn_parser *parser)
+{
+	parser->pos = 0;
+	parser->toknext = 0;
+	parser->toksuper = -1;
+}
+
+const char *jsmn_strerror(jsmnerr_t err)
+{
+	switch (err) {
+	case JSMN_ERROR_NOMEM:
+		return "No enough tokens";
+	case JSMN_ERROR_INVAL:
+		return "Invalid character inside JSON string";
+	case JSMN_ERROR_PART:
+		return "The string is not a full JSON packet, more bytes expected";
+	case JSMN_SUCCESS:
+		return "Success";
+	default:
+		return "Unknown json error";
+	}
+}
diff --git a/pmu-events/jsmn.h b/pmu-events/jsmn.h
new file mode 100644
index 0000000..c7b0f6e
--- /dev/null
+++ b/pmu-events/jsmn.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JSMN_H_
+#define __JSMN_H_
+
+/*
+ * JSON type identifier. Basic types are:
+ *	o Object
+ *	o Array
+ *	o String
+ *	o Other primitive: number, boolean (true/false) or null
+ */
+typedef enum {
+	JSMN_PRIMITIVE = 0,
+	JSMN_OBJECT = 1,
+	JSMN_ARRAY = 2,
+	JSMN_STRING = 3
+} jsmntype_t;
+
+typedef enum {
+	/* Not enough tokens were provided */
+	JSMN_ERROR_NOMEM = -1,
+	/* Invalid character inside JSON string */
+	JSMN_ERROR_INVAL = -2,
+	/* The string is not a full JSON packet, more bytes expected */
+	JSMN_ERROR_PART = -3,
+	/* Everything was fine */
+	JSMN_SUCCESS = 0
+} jsmnerr_t;
+
+/*
+ * JSON token description.
+ * @param		type	type (object, array, string etc.)
+ * @param		start	start position in JSON data string
+ * @param		end		end position in JSON data string
+ */
+typedef struct {
+	jsmntype_t type;
+	int start;
+	int end;
+	int size;
+} jsmntok_t;
+
+/*
+ * JSON parser. Contains an array of token blocks available. Also stores
+ * the string being parsed now and current position in that string
+ */
+typedef struct {
+	unsigned int pos; /* offset in the JSON string */
+	int toknext; /* next token to allocate */
+	int toksuper; /* superior token node, e.g parent object or array */
+} jsmn_parser;
+
+/*
+ * Create JSON parser over an array of tokens
+ */
+void jsmn_init(jsmn_parser *parser);
+
+/*
+ * Run JSON parser. It parses a JSON data string into and array of tokens,
+ * each describing a single JSON object.
+ */
+jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js,
+		     size_t len,
+		     jsmntok_t *tokens, unsigned int num_tokens);
+
+const char *jsmn_strerror(jsmnerr_t err);
+
+#endif /* __JSMN_H_ */
diff --git a/pmu-events/json.c b/pmu-events/json.c
new file mode 100644
index 0000000..0544398
--- /dev/null
+++ b/pmu-events/json.c
@@ -0,0 +1,162 @@
+/* Parse JSON files using the JSMN parser. */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include "jsmn.h"
+#include "json.h"
+#include <linux/kernel.h>
+
+
+static char *mapfile(const char *fn, size_t *size)
+{
+	unsigned ps = sysconf(_SC_PAGESIZE);
+	struct stat st;
+	char *map = NULL;
+	int err;
+	int fd = open(fn, O_RDONLY);
+
+	if (fd < 0 && verbose > 0 && fn) {
+		pr_err("Error opening events file '%s': %s\n", fn,
+				strerror(errno));
+	}
+
+	if (fd < 0)
+		return NULL;
+	err = fstat(fd, &st);
+	if (err < 0)
+		goto out;
+	*size = st.st_size;
+	map = mmap(NULL,
+		   (st.st_size + ps - 1) & ~(ps - 1),
+		   PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (map == MAP_FAILED)
+		map = NULL;
+out:
+	close(fd);
+	return map;
+}
+
+static void unmapfile(char *map, size_t size)
+{
+	unsigned ps = sysconf(_SC_PAGESIZE);
+	munmap(map, roundup(size, ps));
+}
+
+/*
+ * Parse json file using jsmn. Return array of tokens,
+ * and mapped file. Caller needs to free array.
+ */
+jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len)
+{
+	jsmn_parser parser;
+	jsmntok_t *tokens;
+	jsmnerr_t res;
+	unsigned sz;
+
+	*map = mapfile(fn, size);
+	if (!*map)
+		return NULL;
+	/* Heuristic */
+	sz = *size * 16;
+	tokens = malloc(sz);
+	if (!tokens)
+		goto error;
+	jsmn_init(&parser);
+	res = jsmn_parse(&parser, *map, *size, tokens,
+			 sz / sizeof(jsmntok_t));
+	if (res != JSMN_SUCCESS) {
+		pr_err("%s: json error %s\n", fn, jsmn_strerror(res));
+		goto error_free;
+	}
+	if (len)
+		*len = parser.toknext;
+	return tokens;
+error_free:
+	free(tokens);
+error:
+	unmapfile(*map, *size);
+	return NULL;
+}
+
+void free_json(char *map, size_t size, jsmntok_t *tokens)
+{
+	free(tokens);
+	unmapfile(map, size);
+}
+
+static int countchar(char *map, char c, int end)
+{
+	int i;
+	int count = 0;
+	for (i = 0; i < end; i++)
+		if (map[i] == c)
+			count++;
+	return count;
+}
+
+/* Return line number of a jsmn token */
+int json_line(char *map, jsmntok_t *t)
+{
+	return countchar(map, '\n', t->start) + 1;
+}
+
+static const char * const jsmn_types[] = {
+	[JSMN_PRIMITIVE] = "primitive",
+	[JSMN_ARRAY] = "array",
+	[JSMN_OBJECT] = "object",
+	[JSMN_STRING] = "string"
+};
+
+#define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?")
+
+/* Return type name of a jsmn token */
+const char *json_name(jsmntok_t *t)
+{
+	return LOOKUP(jsmn_types, t->type);
+}
+
+int json_len(jsmntok_t *t)
+{
+	return t->end - t->start;
+}
+
+/* Is string t equal to s? */
+int json_streq(char *map, jsmntok_t *t, const char *s)
+{
+	unsigned len = json_len(t);
+	return len == strlen(s) && !strncasecmp(map + t->start, s, len);
+}
diff --git a/pmu-events/json.h b/pmu-events/json.h
new file mode 100644
index 0000000..fbcd5a0
--- /dev/null
+++ b/pmu-events/json.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef JSON_H
+#define JSON_H 1
+
+#include "jsmn.h"
+
+jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len);
+void free_json(char *map, size_t size, jsmntok_t *tokens);
+int json_line(char *map, jsmntok_t *t);
+const char *json_name(jsmntok_t *t);
+int json_streq(char *map, jsmntok_t *t, const char *s);
+int json_len(jsmntok_t *t);
+
+extern int verbose;
+
+#include <stdbool.h>
+
+extern int eprintf(int level, int var, const char *fmt, ...);
+#define pr_fmt(fmt)	fmt
+
+#define pr_err(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_info(fmt, ...) \
+	eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_debug(fmt, ...) \
+	eprintf(2, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#ifndef roundup
+#define roundup(x, y) (                                \
+{                                                      \
+        const typeof(y) __y = y;                       \
+        (((x) + (__y - 1)) / __y) * __y;               \
+}                                                      \
+)
+#endif
+
+#endif
diff --git a/pmu-events/pmu-events.h b/pmu-events/pmu-events.h
new file mode 100644
index 0000000..92a4d15
--- /dev/null
+++ b/pmu-events/pmu-events.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PMU_EVENTS_H
+#define PMU_EVENTS_H
+
+/*
+ * Describe each PMU event. Each CPU has a table of PMU events.
+ */
+struct pmu_event {
+	const char *name;
+	const char *event;
+	const char *desc;
+	const char *topic;
+	const char *long_desc;
+	const char *pmu;
+	const char *unit;
+	const char *perpkg;
+	const char *metric_expr;
+	const char *metric_name;
+	const char *metric_group;
+};
+
+/*
+ *
+ * Map a CPU to its table of PMU events. The CPU is identified by the
+ * cpuid field, which is an arch-specific identifier for the CPU.
+ * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
+ * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ *
+ * The  cpuid can contain any character other than the comma.
+ */
+struct pmu_events_map {
+	const char *cpuid;
+	const char *version;
+	const char *type;		/* core, uncore etc */
+	struct pmu_event *table;
+};
+
+/*
+ * Global table mapping each known CPU for the architecture to its
+ * table of PMU events.
+ */
+extern struct pmu_events_map pmu_events_map[];
+
+#endif
diff --git a/python/tracepoint.py b/python/tracepoint.py
new file mode 100755
index 0000000..eb76f65
--- /dev/null
+++ b/python/tracepoint.py
@@ -0,0 +1,48 @@
+#! /usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+# -*- python -*-
+# -*- coding: utf-8 -*-
+
+import perf
+
+class tracepoint(perf.evsel):
+    def __init__(self, sys, name):
+        config = perf.tracepoint(sys, name)
+        perf.evsel.__init__(self,
+                            type   = perf.TYPE_TRACEPOINT,
+                            config = config,
+                            freq = 0, sample_period = 1, wakeup_events = 1,
+                            sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_RAW | perf.SAMPLE_TIME)
+
+def main():
+    tp      = tracepoint("sched", "sched_switch")
+    cpus    = perf.cpu_map()
+    threads = perf.thread_map(-1)
+
+    evlist = perf.evlist(cpus, threads)
+    evlist.add(tp)
+    evlist.open()
+    evlist.mmap()
+
+    while True:
+        evlist.poll(timeout = -1)
+        for cpu in cpus:
+            event = evlist.read_on_cpu(cpu)
+            if not event:
+                continue
+
+            if not isinstance(event, perf.sample_event):
+                continue
+
+            print "time %u prev_comm=%s prev_pid=%d prev_prio=%d prev_state=0x%x ==> next_comm=%s next_pid=%d next_prio=%d" % (
+                   event.sample_time,
+                   event.prev_comm,
+                   event.prev_pid,
+                   event.prev_prio,
+                   event.prev_state,
+                   event.next_comm,
+                   event.next_pid,
+                   event.next_prio)
+
+if __name__ == '__main__':
+    main()
diff --git a/python/twatch.py b/python/twatch.py
new file mode 100755
index 0000000..0a29c5c
--- /dev/null
+++ b/python/twatch.py
@@ -0,0 +1,68 @@
+#! /usr/bin/python
+# -*- python -*-
+# -*- coding: utf-8 -*-
+#   twatch - Experimental use of the perf python interface
+#   Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
+#
+#   This application is free software; you can redistribute it and/or
+#   modify it under the terms of the GNU General Public License
+#   as published by the Free Software Foundation; version 2.
+#
+#   This application is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+
+import perf
+
+def main(context_switch = 0, thread = -1):
+	cpus = perf.cpu_map()
+	threads = perf.thread_map(thread)
+	evsel = perf.evsel(type	  = perf.TYPE_SOFTWARE,
+			   config = perf.COUNT_SW_DUMMY,
+			   task = 1, comm = 1, mmap = 0, freq = 0,
+			   wakeup_events = 1, watermark = 1,
+			   sample_id_all = 1, context_switch = context_switch,
+			   sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
+
+	"""What we want are just the PERF_RECORD_ lifetime events for threads,
+	 using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1
+	 (the default), makes perf reenable irq_vectors:local_timer_entry, when
+	 disabling nohz, not good for some use cases where all we want is to get
+	 threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY,
+	 freq=0) instead."""
+
+	evsel.open(cpus = cpus, threads = threads);
+	evlist = perf.evlist(cpus, threads)
+	evlist.add(evsel)
+	evlist.mmap()
+	while True:
+		evlist.poll(timeout = -1)
+		for cpu in cpus:
+			event = evlist.read_on_cpu(cpu)
+			if not event:
+				continue
+			print("cpu: {0}, pid: {1}, tid: {2} {3}".format(event.sample_cpu,
+                                                                        event.sample_pid,
+                                                                        event.sample_tid,
+                                                                        event))
+
+if __name__ == '__main__':
+    """
+	To test the PERF_RECORD_SWITCH record, pick a pid and replace
+	in the following line.
+
+	Example output:
+
+cpu: 3, pid: 31463, tid: 31593 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31593, switch_out: 1 }
+cpu: 1, pid: 31463, tid: 31489 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31489, switch_out: 1 }
+cpu: 2, pid: 31463, tid: 31496 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31496, switch_out: 1 }
+cpu: 3, pid: 31463, tid: 31491 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31491, switch_out: 0 }
+
+	It is possible as well to use event.misc & perf.PERF_RECORD_MISC_SWITCH_OUT
+	to figure out if this is a context switch in or out of the monitored threads.
+
+	If bored, please add command line option parsing support for these options :-)
+    """
+    # main(context_switch = 1, thread = 31463)
+    main()
diff --git a/scripts/Build b/scripts/Build
new file mode 100644
index 0000000..41efd7e
--- /dev/null
+++ b/scripts/Build
@@ -0,0 +1,2 @@
+libperf-$(CONFIG_LIBPERL)   += perl/Perf-Trace-Util/
+libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
diff --git a/scripts/perl/Perf-Trace-Util/Build b/scripts/perl/Perf-Trace-Util/Build
new file mode 100644
index 0000000..34faecf
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/Build
@@ -0,0 +1,5 @@
+libperf-y += Context.o
+
+CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes
+CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
+CFLAGS_Context.o += -Wno-switch-default -Wno-shadow
diff --git a/scripts/perl/Perf-Trace-Util/Context.c b/scripts/perl/Perf-Trace-Util/Context.c
new file mode 100644
index 0000000..28431d1
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/Context.c
@@ -0,0 +1,138 @@
+/*
+ * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the
+ * contents of Context.xs. Do not edit this file, edit Context.xs instead.
+ *
+ *	ANY CHANGES MADE HERE WILL BE LOST! 
+ *
+ */
+#include <stdbool.h>
+#ifndef HAS_BOOL
+# define HAS_BOOL 1
+#endif
+#line 1 "Context.xs"
+/*
+ * Context.xs.  XS interfaces for perf script.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+#ifndef PERL_UNUSED_VAR
+#  define PERL_UNUSED_VAR(var) if (0) var = var
+#endif
+
+#line 42 "Context.c"
+
+XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_pc)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+	struct scripting_context *	context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+	int	RETVAL;
+	dXSTARG;
+
+	RETVAL = common_pc(context);
+	XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_flags)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+	struct scripting_context *	context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+	int	RETVAL;
+	dXSTARG;
+
+	RETVAL = common_flags(context);
+	XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_lock_depth)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    if (items != 1)
+       Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context");
+    PERL_UNUSED_VAR(cv); /* -W */
+    {
+	struct scripting_context *	context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+	int	RETVAL;
+	dXSTARG;
+
+	RETVAL = common_lock_depth(context);
+	XSprePUSH; PUSHi((IV)RETVAL);
+    }
+    XSRETURN(1);
+}
+
+#ifdef __cplusplus
+extern "C"
+#endif
+XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */
+XS(boot_Perf__Trace__Context)
+{
+#ifdef dVAR
+    dVAR; dXSARGS;
+#else
+    dXSARGS;
+#endif
+    const char* file = __FILE__;
+
+    PERL_UNUSED_VAR(cv); /* -W */
+    PERL_UNUSED_VAR(items); /* -W */
+    XS_VERSION_BOOTCHECK ;
+
+        newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$");
+        newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$");
+        newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$");
+    if (PL_unitcheckav)
+         call_list(PL_scopestack_ix, PL_unitcheckav);
+    XSRETURN_YES;
+}
+
diff --git a/scripts/perl/Perf-Trace-Util/Context.xs b/scripts/perl/Perf-Trace-Util/Context.xs
new file mode 100644
index 0000000..8c7ea42
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/Context.xs
@@ -0,0 +1,42 @@
+/*
+ * Context.xs.  XS interfaces for perf script.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+MODULE = Perf::Trace::Context		PACKAGE = Perf::Trace::Context
+PROTOTYPES: ENABLE
+
+int
+common_pc(context)
+	struct scripting_context * context
+
+int
+common_flags(context)
+	struct scripting_context * context
+
+int
+common_lock_depth(context)
+	struct scripting_context * context
+
diff --git a/scripts/perl/Perf-Trace-Util/Makefile.PL b/scripts/perl/Perf-Trace-Util/Makefile.PL
new file mode 100644
index 0000000..e899433
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/Makefile.PL
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+use 5.010000;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    NAME              => 'Perf::Trace::Context',
+    VERSION_FROM      => 'lib/Perf/Trace/Context.pm', # finds $VERSION
+    PREREQ_PM         => {}, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?     ## Add these new keywords supported since 5.005
+      (ABSTRACT_FROM  => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module
+       AUTHOR         => 'Tom Zanussi <tzanussi@gmail.com>') : ()),
+    LIBS              => [''], # e.g., '-lm'
+    DEFINE            => '-I ../..', # e.g., '-DHAVE_SOMETHING'
+    INC               => '-I.', # e.g., '-I. -I/usr/include/other'
+	# Un-comment this if you add C files to link with later:
+    OBJECT            => 'Context.o', # link all the C files too
+);
diff --git a/scripts/perl/Perf-Trace-Util/README b/scripts/perl/Perf-Trace-Util/README
new file mode 100644
index 0000000..2f0c7f3
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/README
@@ -0,0 +1,59 @@
+Perf-Trace-Util version 0.01
+============================
+
+This module contains utility functions for use with perf script.
+
+Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
+that the core perf support for Perl calls on and should always be
+'used', while Util.pm contains useful but optional utility functions
+that scripts may want to use.  Context.pm contains the Perl->C
+interface that allows scripts to access data in the embedding perf
+executable; scripts wishing to do that should 'use Context.pm'.
+
+The Perl->C perf interface is completely driven by Context.xs.  If you
+want to add new Perl functions that end up accessing C data in the
+perf executable, you add desciptions of the new functions here.
+scripting_context is a pointer to the perf data in the perf executable
+that you want to access - it's passed as the second parameter,
+$context, to all handler functions.
+
+After you do that:
+
+  perl Makefile.PL   # to create a Makefile for the next step
+  make               # to create Context.c
+
+  edit Context.c to add const to the char* file = __FILE__ line in
+  XS(boot_Perf__Trace__Context) to silence a warning/error.
+
+  You can delete the Makefile, object files and anything else that was
+  generated e.g. blib and shared library, etc, except for of course
+  Context.c
+
+  You should then be able to run the normal perf make as usual.
+
+INSTALLATION
+
+Building perf with perf script Perl scripting should install this
+module in the right place.
+
+You should make sure libperl and ExtUtils/Embed.pm are installed first
+e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed.
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  None
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2009 by Tom Zanussi <tzanussi@gmail.com>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
diff --git a/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
new file mode 100644
index 0000000..4e2f603
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -0,0 +1,55 @@
+package Perf::Trace::Context;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+	common_pc common_flags common_lock_depth
+);
+
+our $VERSION = '0.01';
+
+require XSLoader;
+XSLoader::load('Perf::Trace::Context', $VERSION);
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Context - Perl extension for accessing functions in perf.
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Context;
+
+=head1 SEE ALSO
+
+Perf (script) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
new file mode 100644
index 0000000..9158458
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -0,0 +1,192 @@
+package Perf::Trace::Core;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+define_flag_field define_flag_value flag_str dump_flag_fields
+define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields
+trace_flag_str
+);
+
+our $VERSION = '0.01';
+
+my %trace_flags = (0x00 => "NONE",
+		   0x01 => "IRQS_OFF",
+		   0x02 => "IRQS_NOSUPPORT",
+		   0x04 => "NEED_RESCHED",
+		   0x08 => "HARDIRQ",
+		   0x10 => "SOFTIRQ");
+
+sub trace_flag_str
+{
+    my ($value) = @_;
+
+    my $string;
+
+    my $print_delim = 0;
+
+    foreach my $idx (sort {$a <=> $b} keys %trace_flags) {
+	if (!$value && !$idx) {
+	    $string .= "NONE";
+	    last;
+	}
+
+	if ($idx && ($value & $idx) == $idx) {
+	    if ($print_delim) {
+		$string .= " | ";
+	    }
+	    $string .= "$trace_flags{$idx}";
+	    $print_delim = 1;
+	    $value &= ~$idx;
+	}
+    }
+
+    return $string;
+}
+
+my %flag_fields;
+my %symbolic_fields;
+
+sub flag_str
+{
+    my ($event_name, $field_name, $value) = @_;
+
+    my $string;
+
+    if ($flag_fields{$event_name}{$field_name}) {
+	my $print_delim = 0;
+	foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) {
+	    if (!$value && !$idx) {
+		$string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+		last;
+	    }
+	    if ($idx && ($value & $idx) == $idx) {
+		if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) {
+		    $string .= " $flag_fields{$event_name}{$field_name}{'delim'} ";
+		}
+		$string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+		$print_delim = 1;
+		$value &= ~$idx;
+	    }
+	}
+    }
+
+    return $string;
+}
+
+sub define_flag_field
+{
+    my ($event_name, $field_name, $delim) = @_;
+
+    $flag_fields{$event_name}{$field_name}{"delim"} = $delim;
+}
+
+sub define_flag_value
+{
+    my ($event_name, $field_name, $value, $field_str) = @_;
+
+    $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_flag_fields
+{
+    for my $event (keys %flag_fields) {
+	print "event $event:\n";
+	for my $field (keys %{$flag_fields{$event}}) {
+	    print "    field: $field:\n";
+	    print "        delim: $flag_fields{$event}{$field}{'delim'}\n";
+	    foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) {
+		print "        value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n";
+	    }
+	}
+    }
+}
+
+sub symbol_str
+{
+    my ($event_name, $field_name, $value) = @_;
+
+    if ($symbolic_fields{$event_name}{$field_name}) {
+	foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) {
+	    if (!$value && !$idx) {
+		return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+		last;
+	    }
+	    if ($value == $idx) {
+		return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+	    }
+	}
+    }
+
+    return undef;
+}
+
+sub define_symbolic_field
+{
+    my ($event_name, $field_name) = @_;
+
+    # nothing to do, really
+}
+
+sub define_symbolic_value
+{
+    my ($event_name, $field_name, $value, $field_str) = @_;
+
+    $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_symbolic_fields
+{
+    for my $event (keys %symbolic_fields) {
+	print "event $event:\n";
+	for my $field (keys %{$symbolic_fields{$event}}) {
+	    print "    field: $field:\n";
+	    foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) {
+		print "        value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n";
+	    }
+	}
+    }
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Core - Perl extension for perf script
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Core
+
+=head1 SEE ALSO
+
+Perf (script) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
new file mode 100644
index 0000000..0535001
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -0,0 +1,94 @@
+package Perf::Trace::Util;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs
+clear_term
+);
+
+our $VERSION = '0.01';
+
+sub avg
+{
+    my ($total, $n) = @_;
+
+    return $total / $n;
+}
+
+my $NSECS_PER_SEC    = 1000000000;
+
+sub nsecs
+{
+    my ($secs, $nsecs) = @_;
+
+    return $secs * $NSECS_PER_SEC + $nsecs;
+}
+
+sub nsecs_secs {
+    my ($nsecs) = @_;
+
+    return $nsecs / $NSECS_PER_SEC;
+}
+
+sub nsecs_nsecs {
+    my ($nsecs) = @_;
+
+    return $nsecs % $NSECS_PER_SEC;
+}
+
+sub nsecs_str {
+    my ($nsecs) = @_;
+
+    my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs));
+
+    return $str;
+}
+
+sub clear_term
+{
+    print "\x1b[H\x1b[2J";
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Util - Perl extension for perf script
+
+=head1 SYNOPSIS
+
+  use Perf::Trace::Util;
+
+=head1 SEE ALSO
+
+Perf (script) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/scripts/perl/Perf-Trace-Util/typemap b/scripts/perl/Perf-Trace-Util/typemap
new file mode 100644
index 0000000..8408368
--- /dev/null
+++ b/scripts/perl/Perf-Trace-Util/typemap
@@ -0,0 +1 @@
+struct scripting_context * T_PTR
diff --git a/scripts/perl/bin/check-perf-trace-record b/scripts/perl/bin/check-perf-trace-record
new file mode 100644
index 0000000..423ad6a
--- /dev/null
+++ b/scripts/perl/bin/check-perf-trace-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
diff --git a/scripts/perl/bin/failed-syscalls-record b/scripts/perl/bin/failed-syscalls-record
new file mode 100644
index 0000000..74685f3
--- /dev/null
+++ b/scripts/perl/bin/failed-syscalls-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_exit $@ || \
+ perf record -e syscalls:sys_exit $@) 2> /dev/null
diff --git a/scripts/perl/bin/failed-syscalls-report b/scripts/perl/bin/failed-syscalls-report
new file mode 100644
index 0000000..9f83cc1
--- /dev/null
+++ b/scripts/perl/bin/failed-syscalls-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide failed syscalls
+# args: [comm]
+if [ $# -gt 0 ] ; then
+    if ! expr match "$1" "-" > /dev/null ; then
+	comm=$1
+	shift
+    fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/scripts/perl/bin/rw-by-file-record b/scripts/perl/bin/rw-by-file-record
new file mode 100644
index 0000000..33efc86
--- /dev/null
+++ b/scripts/perl/bin/rw-by-file-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
+
diff --git a/scripts/perl/bin/rw-by-file-report b/scripts/perl/bin/rw-by-file-report
new file mode 100644
index 0000000..77200b3
--- /dev/null
+++ b/scripts/perl/bin/rw-by-file-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: r/w activity for a program, by file
+# args: <comm>
+if [ $# -lt 1 ] ; then
+    echo "usage: rw-by-file <comm>"
+    exit
+fi
+comm=$1
+shift
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
diff --git a/scripts/perl/bin/rw-by-pid-record b/scripts/perl/bin/rw-by-pid-record
new file mode 100644
index 0000000..7cb9db2
--- /dev/null
+++ b/scripts/perl/bin/rw-by-pid-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/scripts/perl/bin/rw-by-pid-report b/scripts/perl/bin/rw-by-pid-report
new file mode 100644
index 0000000..a27b9f3
--- /dev/null
+++ b/scripts/perl/bin/rw-by-pid-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: system-wide r/w activity
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
diff --git a/scripts/perl/bin/rwtop-record b/scripts/perl/bin/rwtop-record
new file mode 100644
index 0000000..7cb9db2
--- /dev/null
+++ b/scripts/perl/bin/rwtop-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/scripts/perl/bin/rwtop-report b/scripts/perl/bin/rwtop-report
new file mode 100644
index 0000000..83e11ec
--- /dev/null
+++ b/scripts/perl/bin/rwtop-report
@@ -0,0 +1,20 @@
+#!/bin/bash
+# description: system-wide r/w top
+# args: [interval]
+n_args=0
+for i in "$@"
+do
+    if expr match "$i" "-" > /dev/null ; then
+	break
+    fi
+    n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 1 ] ; then
+    echo "usage: rwtop-report [interval]"
+    exit
+fi
+if [ "$n_args" -gt 0 ] ; then
+    interval=$1
+    shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
diff --git a/scripts/perl/bin/wakeup-latency-record b/scripts/perl/bin/wakeup-latency-record
new file mode 100644
index 0000000..464251a
--- /dev/null
+++ b/scripts/perl/bin/wakeup-latency-record
@@ -0,0 +1,6 @@
+#!/bin/bash
+perf record -e sched:sched_switch -e sched:sched_wakeup $@
+
+
+
+
diff --git a/scripts/perl/bin/wakeup-latency-report b/scripts/perl/bin/wakeup-latency-report
new file mode 100644
index 0000000..889e813
--- /dev/null
+++ b/scripts/perl/bin/wakeup-latency-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: system-wide min/max/avg wakeup latency
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
diff --git a/scripts/perl/check-perf-trace.pl b/scripts/perl/check-perf-trace.pl
new file mode 100644
index 0000000..4e7076c
--- /dev/null
+++ b/scripts/perl/check-perf-trace.pl
@@ -0,0 +1,106 @@
+# perf script event handlers, generated by perf script -g perl
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc.  Basically, if this script runs successfully and
+# displays expected results, perl scripting support should be ok.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+sub trace_begin
+{
+    print "trace_begin\n";
+}
+
+sub trace_end
+{
+    print "trace_end\n";
+
+    print_unhandled();
+}
+
+sub irq::softirq_entry
+{
+	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	    $common_pid, $common_comm,
+	    $vec) = @_;
+
+	print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+		     $common_pid, $common_comm);
+
+	print_uncommon($context);
+
+	printf("vec=%s\n",
+	       symbol_str("irq::softirq_entry", "vec", $vec));
+}
+
+sub kmem::kmalloc
+{
+	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	    $common_pid, $common_comm,
+	    $call_site, $ptr, $bytes_req, $bytes_alloc,
+	    $gfp_flags) = @_;
+
+	print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+		     $common_pid, $common_comm);
+
+	print_uncommon($context);
+
+	printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ".
+	       "gfp_flags=%s\n",
+	       $call_site, $ptr, $bytes_req, $bytes_alloc,
+
+	       flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags));
+}
+
+# print trace fields not included in handler args
+sub print_uncommon
+{
+    my ($context) = @_;
+
+    printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ",
+	   common_pc($context), trace_flag_str(common_flags($context)),
+	   common_lock_depth($context));
+
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
+
+sub print_header
+{
+	my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;
+
+	printf("%-20s %5u %05u.%09u %8u %-20s ",
+	       $event_name, $cpu, $secs, $nsecs, $pid, $comm);
+}
diff --git a/scripts/perl/failed-syscalls.pl b/scripts/perl/failed-syscalls.pl
new file mode 100644
index 0000000..55e7ae4
--- /dev/null
+++ b/scripts/perl/failed-syscalls.pl
@@ -0,0 +1,47 @@
+# failed system call counts
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide failed system call totals
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+my $for_comm = shift;
+
+my %failed_syscalls;
+
+sub raw_syscalls::sys_exit
+{
+	my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	    $common_pid, $common_comm,
+	    $id, $ret) = @_;
+
+	if ($ret < 0) {
+	    $failed_syscalls{$common_comm}++;
+	}
+}
+
+sub syscalls::sys_exit
+{
+	raw_syscalls::sys_exit(@_)
+}
+
+sub trace_end
+{
+    printf("\nfailed syscalls by comm:\n\n");
+
+    printf("%-20s  %10s\n", "comm", "# errors");
+    printf("%-20s  %6s  %10s\n", "--------------------", "----------");
+
+    foreach my $comm (sort {$failed_syscalls{$b} <=> $failed_syscalls{$a}}
+		      keys %failed_syscalls) {
+	next if ($for_comm && $comm ne $for_comm);
+
+	printf("%-20s  %10s\n", $comm, $failed_syscalls{$comm});
+    }
+}
diff --git a/scripts/perl/rw-by-file.pl b/scripts/perl/rw-by-file.pl
new file mode 100644
index 0000000..74844ee
--- /dev/null
+++ b/scripts/perl/rw-by-file.pl
@@ -0,0 +1,106 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for files read/written to for a given program
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my $usage = "perf script -s rw-by-file.pl <comm>\n";
+
+my $for_comm = shift or die $usage;
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+    if ($common_comm eq $for_comm) {
+	$reads{$fd}{bytes_requested} += $count;
+	$reads{$fd}{total_reads}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+    if ($common_comm eq $for_comm) {
+	$writes{$fd}{bytes_written} += $count;
+	$writes{$fd}{total_writes}++;
+    }
+}
+
+sub trace_end
+{
+    printf("file read counts for $for_comm:\n\n");
+
+    printf("%6s  %10s  %10s\n", "fd", "# reads", "bytes_requested");
+    printf("%6s  %10s  %10s\n", "------", "----------", "-----------");
+
+    foreach my $fd (sort {$reads{$b}{bytes_requested} <=>
+			      $reads{$a}{bytes_requested}} keys %reads) {
+	my $total_reads = $reads{$fd}{total_reads};
+	my $bytes_requested = $reads{$fd}{bytes_requested};
+	printf("%6u  %10u  %10u\n", $fd, $total_reads, $bytes_requested);
+    }
+
+    printf("\nfile write counts for $for_comm:\n\n");
+
+    printf("%6s  %10s  %10s\n", "fd", "# writes", "bytes_written");
+    printf("%6s  %10s  %10s\n", "------", "----------", "-----------");
+
+    foreach my $fd (sort {$writes{$b}{bytes_written} <=>
+			      $writes{$a}{bytes_written}} keys %writes) {
+	my $total_writes = $writes{$fd}{total_writes};
+	my $bytes_written = $writes{$fd}{bytes_written};
+	printf("%6u  %10u  %10u\n", $fd, $total_writes, $bytes_written);
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
+
+
diff --git a/scripts/perl/rw-by-pid.pl b/scripts/perl/rw-by-pid.pl
new file mode 100644
index 0000000..9db23c9
--- /dev/null
+++ b/scripts/perl/rw-by-pid.pl
@@ -0,0 +1,184 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for all processes
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_exit_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $ret) = @_;
+
+    if ($ret > 0) {
+	$reads{$common_pid}{bytes_read} += $ret;
+    } else {
+	if (!defined ($reads{$common_pid}{bytes_read})) {
+	    $reads{$common_pid}{bytes_read} = 0;
+	}
+	$reads{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $fd, $buf, $count) = @_;
+
+    $reads{$common_pid}{bytes_requested} += $count;
+    $reads{$common_pid}{total_reads}++;
+    $reads{$common_pid}{comm} = $common_comm;
+}
+
+sub syscalls::sys_exit_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $ret) = @_;
+
+    if ($ret <= 0) {
+	$writes{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $fd, $buf, $count) = @_;
+
+    $writes{$common_pid}{bytes_written} += $count;
+    $writes{$common_pid}{total_writes}++;
+    $writes{$common_pid}{comm} = $common_comm;
+}
+
+sub trace_end
+{
+    printf("read counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s  %10s\n", "pid", "comm",
+	   "# reads", "bytes_requested", "bytes_read");
+    printf("%6s  %-20s  %10s  %10s  %10s\n", "------", "--------------------",
+	   "-----------", "----------", "----------");
+
+    foreach my $pid (sort { ($reads{$b}{bytes_read} || 0) <=>
+				($reads{$a}{bytes_read} || 0) } keys %reads) {
+	my $comm = $reads{$pid}{comm} || "";
+	my $total_reads = $reads{$pid}{total_reads} || 0;
+	my $bytes_requested = $reads{$pid}{bytes_requested} || 0;
+	my $bytes_read = $reads{$pid}{bytes_read} || 0;
+
+	printf("%6s  %-20s  %10s  %10s  %10s\n", $pid, $comm,
+	       $total_reads, $bytes_requested, $bytes_read);
+    }
+
+    printf("\nfailed reads by pid:\n\n");
+
+    printf("%6s  %20s  %6s  %10s\n", "pid", "comm", "error #", "# errors");
+    printf("%6s  %20s  %6s  %10s\n", "------", "--------------------",
+	   "------", "----------");
+
+    my @errcounts = ();
+
+    foreach my $pid (keys %reads) {
+	foreach my $error (keys %{$reads{$pid}{errors}}) {
+	    my $comm = $reads{$pid}{comm} || "";
+	    my $errcount = $reads{$pid}{errors}{$error} || 0;
+	    push @errcounts, [$pid, $comm, $error, $errcount];
+	}
+    }
+
+    @errcounts = sort { $b->[3] <=> $a->[3] } @errcounts;
+
+    for my $i (0 .. $#errcounts) {
+	printf("%6d  %-20s  %6d  %10s\n", $errcounts[$i][0],
+	       $errcounts[$i][1], $errcounts[$i][2], $errcounts[$i][3]);
+    }
+
+    printf("\nwrite counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s\n", "pid", "comm",
+	   "# writes", "bytes_written");
+    printf("%6s  %-20s  %10s  %10s\n", "------", "--------------------",
+	   "-----------", "----------");
+
+    foreach my $pid (sort { ($writes{$b}{bytes_written} || 0) <=>
+			($writes{$a}{bytes_written} || 0)} keys %writes) {
+	my $comm = $writes{$pid}{comm} || "";
+	my $total_writes = $writes{$pid}{total_writes} || 0;
+	my $bytes_written = $writes{$pid}{bytes_written} || 0;
+
+	printf("%6s  %-20s  %10s  %10s\n", $pid, $comm,
+	       $total_writes, $bytes_written);
+    }
+
+    printf("\nfailed writes by pid:\n\n");
+
+    printf("%6s  %20s  %6s  %10s\n", "pid", "comm", "error #", "# errors");
+    printf("%6s  %20s  %6s  %10s\n", "------", "--------------------",
+	   "------", "----------");
+
+    @errcounts = ();
+
+    foreach my $pid (keys %writes) {
+	foreach my $error (keys %{$writes{$pid}{errors}}) {
+	    my $comm = $writes{$pid}{comm} || "";
+	    my $errcount = $writes{$pid}{errors}{$error} || 0;
+	    push @errcounts, [$pid, $comm, $error, $errcount];
+	}
+    }
+
+    @errcounts = sort { $b->[3] <=> $a->[3] } @errcounts;
+
+    for my $i (0 .. $#errcounts) {
+	printf("%6d  %-20s  %6d  %10s\n", $errcounts[$i][0],
+	       $errcounts[$i][1], $errcounts[$i][2], $errcounts[$i][3]);
+    }
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/scripts/perl/rwtop.pl b/scripts/perl/rwtop.pl
new file mode 100644
index 0000000..8b20787
--- /dev/null
+++ b/scripts/perl/rwtop.pl
@@ -0,0 +1,203 @@
+#!/usr/bin/perl -w
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# read/write top
+#
+# Periodically displays system-wide r/w call activity, broken down by
+# pid.  If an [interval] arg is specified, the display will be
+# refreshed every [interval] seconds.  The default interval is 3
+# seconds.
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+use POSIX qw/SIGALRM SA_RESTART/;
+
+my $default_interval = 3;
+my $nlines = 20;
+my $print_thread;
+my $print_pending = 0;
+
+my %reads;
+my %writes;
+
+my $interval = shift;
+if (!$interval) {
+    $interval = $default_interval;
+}
+
+sub syscalls::sys_exit_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $ret) = @_;
+
+    print_check();
+
+    if ($ret > 0) {
+	$reads{$common_pid}{bytes_read} += $ret;
+    } else {
+	if (!defined ($reads{$common_pid}{bytes_read})) {
+	    $reads{$common_pid}{bytes_read} = 0;
+	}
+	$reads{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_read
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $fd, $buf, $count) = @_;
+
+    print_check();
+
+    $reads{$common_pid}{bytes_requested} += $count;
+    $reads{$common_pid}{total_reads}++;
+    $reads{$common_pid}{comm} = $common_comm;
+}
+
+sub syscalls::sys_exit_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $ret) = @_;
+
+    print_check();
+
+    if ($ret <= 0) {
+	$writes{$common_pid}{errors}{$ret}++;
+    }
+}
+
+sub syscalls::sys_enter_write
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$nr, $fd, $buf, $count) = @_;
+
+    print_check();
+
+    $writes{$common_pid}{bytes_written} += $count;
+    $writes{$common_pid}{total_writes}++;
+    $writes{$common_pid}{comm} = $common_comm;
+}
+
+sub trace_begin
+{
+    my $sa = POSIX::SigAction->new(\&set_print_pending);
+    $sa->flags(SA_RESTART);
+    $sa->safe(1);
+    POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n";
+    alarm 1;
+}
+
+sub trace_end
+{
+    print_unhandled();
+    print_totals();
+}
+
+sub print_check()
+{
+    if ($print_pending == 1) {
+	$print_pending = 0;
+	print_totals();
+    }
+}
+
+sub set_print_pending()
+{
+    $print_pending = 1;
+    alarm $interval;
+}
+
+sub print_totals
+{
+    my $count;
+
+    $count = 0;
+
+    clear_term();
+
+    printf("\nread counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %10s  %10s\n", "pid", "comm",
+	   "# reads", "bytes_req", "bytes_read");
+    printf("%6s  %-20s  %10s  %10s  %10s\n", "------", "--------------------",
+	   "----------", "----------", "----------");
+
+    foreach my $pid (sort { ($reads{$b}{bytes_read} || 0) <=>
+			       ($reads{$a}{bytes_read} || 0) } keys %reads) {
+	my $comm = $reads{$pid}{comm} || "";
+	my $total_reads = $reads{$pid}{total_reads} || 0;
+	my $bytes_requested = $reads{$pid}{bytes_requested} || 0;
+	my $bytes_read = $reads{$pid}{bytes_read} || 0;
+
+	printf("%6s  %-20s  %10s  %10s  %10s\n", $pid, $comm,
+	       $total_reads, $bytes_requested, $bytes_read);
+
+	if (++$count == $nlines) {
+	    last;
+	}
+    }
+
+    $count = 0;
+
+    printf("\nwrite counts by pid:\n\n");
+
+    printf("%6s  %20s  %10s  %13s\n", "pid", "comm",
+	   "# writes", "bytes_written");
+    printf("%6s  %-20s  %10s  %13s\n", "------", "--------------------",
+	   "----------", "-------------");
+
+    foreach my $pid (sort { ($writes{$b}{bytes_written} || 0) <=>
+			($writes{$a}{bytes_written} || 0)} keys %writes) {
+	my $comm = $writes{$pid}{comm} || "";
+	my $total_writes = $writes{$pid}{total_writes} || 0;
+	my $bytes_written = $writes{$pid}{bytes_written} || 0;
+
+	printf("%6s  %-20s  %10s  %13s\n", $pid, $comm,
+	       $total_writes, $bytes_written);
+
+	if (++$count == $nlines) {
+	    last;
+	}
+    }
+
+    %reads = ();
+    %writes = ();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/scripts/perl/wakeup-latency.pl b/scripts/perl/wakeup-latency.pl
new file mode 100644
index 0000000..d9143dc
--- /dev/null
+++ b/scripts/perl/wakeup-latency.pl
@@ -0,0 +1,107 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display avg/min/max wakeup latency
+
+# The common_* event handler fields are the most useful fields common to
+# all events.  They don't necessarily correspond to the 'common_*' fields
+# in the status files.  Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %last_wakeup;
+
+my $max_wakeup_latency;
+my $min_wakeup_latency;
+my $total_wakeup_latency = 0;
+my $total_wakeups = 0;
+
+sub sched::sched_switch
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
+	$next_prio) = @_;
+
+    my $wakeup_ts = $last_wakeup{$common_cpu}{ts};
+    if ($wakeup_ts) {
+	my $switch_ts = nsecs($common_secs, $common_nsecs);
+	my $wakeup_latency = $switch_ts - $wakeup_ts;
+	if ($wakeup_latency > $max_wakeup_latency) {
+	    $max_wakeup_latency = $wakeup_latency;
+	}
+	if ($wakeup_latency < $min_wakeup_latency) {
+	    $min_wakeup_latency = $wakeup_latency;
+	}
+	$total_wakeup_latency += $wakeup_latency;
+	$total_wakeups++;
+    }
+    $last_wakeup{$common_cpu}{ts} = 0;
+}
+
+sub sched::sched_wakeup
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm,
+	$comm, $pid, $prio, $success, $target_cpu) = @_;
+
+    $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
+}
+
+sub trace_begin
+{
+    $min_wakeup_latency = 1000000000;
+    $max_wakeup_latency = 0;
+}
+
+sub trace_end
+{
+    printf("wakeup_latency stats:\n\n");
+    print "total_wakeups: $total_wakeups\n";
+    if ($total_wakeups) {
+	printf("avg_wakeup_latency (ns): %u\n",
+	       avg($total_wakeup_latency, $total_wakeups));
+    } else {
+	printf("avg_wakeup_latency (ns): N/A\n");
+    }
+    printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency);
+    printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency);
+
+    print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+    if ((scalar keys %unhandled) == 0) {
+	return;
+    }
+
+    print "\nunhandled events:\n\n";
+
+    printf("%-40s  %10s\n", "event", "count");
+    printf("%-40s  %10s\n", "----------------------------------------",
+	   "-----------");
+
+    foreach my $event_name (keys %unhandled) {
+	printf("%-40s  %10d\n", $event_name, $unhandled{$event_name});
+    }
+}
+
+sub trace_unhandled
+{
+    my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+	$common_pid, $common_comm) = @_;
+
+    $unhandled{$event_name}++;
+}
diff --git a/scripts/python/Perf-Trace-Util/Build b/scripts/python/Perf-Trace-Util/Build
new file mode 100644
index 0000000..aefc15c
--- /dev/null
+++ b/scripts/python/Perf-Trace-Util/Build
@@ -0,0 +1,3 @@
+libperf-y += Context.o
+
+CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
diff --git a/scripts/python/Perf-Trace-Util/Context.c b/scripts/python/Perf-Trace-Util/Context.c
new file mode 100644
index 0000000..1a0d277
--- /dev/null
+++ b/scripts/python/Perf-Trace-Util/Context.c
@@ -0,0 +1,116 @@
+/*
+ * Context.c.  Python interfaces for perf script.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <Python.h>
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyCapsule_GetPointer(arg1, arg2) \
+  PyCObject_AsVoidPtr(arg1)
+
+PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyCapsule_GetPointer(arg1, arg2) \
+  PyCapsule_GetPointer((arg1), (arg2))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
+
+static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
+{
+	static struct scripting_context *scripting_context;
+	PyObject *context;
+	int retval;
+
+	if (!PyArg_ParseTuple(args, "O", &context))
+		return NULL;
+
+	scripting_context = _PyCapsule_GetPointer(context, NULL);
+	retval = common_pc(scripting_context);
+
+	return Py_BuildValue("i", retval);
+}
+
+static PyObject *perf_trace_context_common_flags(PyObject *obj,
+						 PyObject *args)
+{
+	static struct scripting_context *scripting_context;
+	PyObject *context;
+	int retval;
+
+	if (!PyArg_ParseTuple(args, "O", &context))
+		return NULL;
+
+	scripting_context = _PyCapsule_GetPointer(context, NULL);
+	retval = common_flags(scripting_context);
+
+	return Py_BuildValue("i", retval);
+}
+
+static PyObject *perf_trace_context_common_lock_depth(PyObject *obj,
+						      PyObject *args)
+{
+	static struct scripting_context *scripting_context;
+	PyObject *context;
+	int retval;
+
+	if (!PyArg_ParseTuple(args, "O", &context))
+		return NULL;
+
+	scripting_context = _PyCapsule_GetPointer(context, NULL);
+	retval = common_lock_depth(scripting_context);
+
+	return Py_BuildValue("i", retval);
+}
+
+static PyMethodDef ContextMethods[] = {
+	{ "common_pc", perf_trace_context_common_pc, METH_VARARGS,
+	  "Get the common preempt count event field value."},
+	{ "common_flags", perf_trace_context_common_flags, METH_VARARGS,
+	  "Get the common flags event field value."},
+	{ "common_lock_depth", perf_trace_context_common_lock_depth,
+	  METH_VARARGS,	"Get the common lock depth event field value."},
+	{ NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf_trace_context(void)
+{
+	(void) Py_InitModule("perf_trace_context", ContextMethods);
+}
+#else
+PyMODINIT_FUNC PyInit_perf_trace_context(void)
+{
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"perf_trace_context",	/* m_name */
+		"",			/* m_doc */
+		-1,			/* m_size */
+		ContextMethods,		/* m_methods */
+		NULL,			/* m_reload */
+		NULL,			/* m_traverse */
+		NULL,			/* m_clear */
+		NULL,			/* m_free */
+	};
+	return PyModule_Create(&moduledef);
+}
+#endif
diff --git a/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
new file mode 100644
index 0000000..38dfb72
--- /dev/null
+++ b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -0,0 +1,122 @@
+# Core.py - Python extension for perf script, core functions
+#
+# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
+#
+# This software may be distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+from collections import defaultdict
+
+def autodict():
+    return defaultdict(autodict)
+
+flag_fields = autodict()
+symbolic_fields = autodict()
+
+def define_flag_field(event_name, field_name, delim):
+    flag_fields[event_name][field_name]['delim'] = delim
+
+def define_flag_value(event_name, field_name, value, field_str):
+    flag_fields[event_name][field_name]['values'][value] = field_str
+
+def define_symbolic_field(event_name, field_name):
+    # nothing to do, really
+    pass
+
+def define_symbolic_value(event_name, field_name, value, field_str):
+    symbolic_fields[event_name][field_name]['values'][value] = field_str
+
+def flag_str(event_name, field_name, value):
+    string = ""
+
+    if flag_fields[event_name][field_name]:
+	print_delim = 0
+        keys = flag_fields[event_name][field_name]['values'].keys()
+        keys.sort()
+        for idx in keys:
+            if not value and not idx:
+                string += flag_fields[event_name][field_name]['values'][idx]
+                break
+            if idx and (value & idx) == idx:
+                if print_delim and flag_fields[event_name][field_name]['delim']:
+                    string += " " + flag_fields[event_name][field_name]['delim'] + " "
+                string += flag_fields[event_name][field_name]['values'][idx]
+                print_delim = 1
+                value &= ~idx
+
+    return string
+
+def symbol_str(event_name, field_name, value):
+    string = ""
+
+    if symbolic_fields[event_name][field_name]:
+        keys = symbolic_fields[event_name][field_name]['values'].keys()
+        keys.sort()
+        for idx in keys:
+            if not value and not idx:
+		string = symbolic_fields[event_name][field_name]['values'][idx]
+                break
+	    if (value == idx):
+		string = symbolic_fields[event_name][field_name]['values'][idx]
+                break
+
+    return string
+
+trace_flags = { 0x00: "NONE", \
+                    0x01: "IRQS_OFF", \
+                    0x02: "IRQS_NOSUPPORT", \
+                    0x04: "NEED_RESCHED", \
+                    0x08: "HARDIRQ", \
+                    0x10: "SOFTIRQ" }
+
+def trace_flag_str(value):
+    string = ""
+    print_delim = 0
+
+    keys = trace_flags.keys()
+
+    for idx in keys:
+	if not value and not idx:
+	    string += "NONE"
+	    break
+
+	if idx and (value & idx) == idx:
+	    if print_delim:
+		string += " | ";
+	    string += trace_flags[idx]
+	    print_delim = 1
+	    value &= ~idx
+
+    return string
+
+
+def taskState(state):
+	states = {
+		0 : "R",
+		1 : "S",
+		2 : "D",
+		64: "DEAD"
+	}
+
+	if state not in states:
+		return "Unknown"
+
+	return states[state]
+
+
+class EventHeaders:
+	def __init__(self, common_cpu, common_secs, common_nsecs,
+		     common_pid, common_comm, common_callchain):
+		self.cpu = common_cpu
+		self.secs = common_secs
+		self.nsecs = common_nsecs
+		self.pid = common_pid
+		self.comm = common_comm
+		self.callchain = common_callchain
+
+	def ts(self):
+		return (self.secs * (10 ** 9)) + self.nsecs
+
+	def ts_format(self):
+		return "%d.%d" % (self.secs, int(self.nsecs / 1000))
diff --git a/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
new file mode 100755
index 0000000..81a56cd
--- /dev/null
+++ b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
@@ -0,0 +1,95 @@
+# EventClass.py
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is a library defining some events types classes, which could
+# be used by other scripts to analyzing the perf samples.
+#
+# Currently there are just a few classes defined for examples,
+# PerfEvent is the base class for all perf event sample, PebsEvent
+# is a HW base Intel x86 PEBS event, and user could add more SW/HW
+# event classes based on requirements.
+
+import struct
+
+# Event types, user could add more here
+EVTYPE_GENERIC  = 0
+EVTYPE_PEBS     = 1     # Basic PEBS event
+EVTYPE_PEBS_LL  = 2     # PEBS event with load latency info
+EVTYPE_IBS      = 3
+
+#
+# Currently we don't have good way to tell the event type, but by
+# the size of raw buffer, raw PEBS event with load latency data's
+# size is 176 bytes, while the pure PEBS event's size is 144 bytes.
+#
+def create_event(name, comm, dso, symbol, raw_buf):
+        if (len(raw_buf) == 144):
+                event = PebsEvent(name, comm, dso, symbol, raw_buf)
+        elif (len(raw_buf) == 176):
+                event = PebsNHM(name, comm, dso, symbol, raw_buf)
+        else:
+                event = PerfEvent(name, comm, dso, symbol, raw_buf)
+
+        return event
+
+class PerfEvent(object):
+        event_num = 0
+        def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_GENERIC):
+                self.name       = name
+                self.comm       = comm
+                self.dso        = dso
+                self.symbol     = symbol
+                self.raw_buf    = raw_buf
+                self.ev_type    = ev_type
+                PerfEvent.event_num += 1
+
+        def show(self):
+                print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso)
+
+#
+# Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer
+# contains the context info when that event happened: the EFLAGS and
+# linear IP info, as well as all the registers.
+#
+class PebsEvent(PerfEvent):
+        pebs_num = 0
+        def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS):
+                tmp_buf=raw_buf[0:80]
+                flags, ip, ax, bx, cx, dx, si, di, bp, sp = struct.unpack('QQQQQQQQQQ', tmp_buf)
+                self.flags = flags
+                self.ip    = ip
+                self.ax    = ax
+                self.bx    = bx
+                self.cx    = cx
+                self.dx    = dx
+                self.si    = si
+                self.di    = di
+                self.bp    = bp
+                self.sp    = sp
+
+                PerfEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type)
+                PebsEvent.pebs_num += 1
+                del tmp_buf
+
+#
+# Intel Nehalem and Westmere support PEBS plus Load Latency info which lie
+# in the four 64 bit words write after the PEBS data:
+#       Status: records the IA32_PERF_GLOBAL_STATUS register value
+#       DLA:    Data Linear Address (EIP)
+#       DSE:    Data Source Encoding, where the latency happens, hit or miss
+#               in L1/L2/L3 or IO operations
+#       LAT:    the actual latency in cycles
+#
+class PebsNHM(PebsEvent):
+        pebs_nhm_num = 0
+        def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS_LL):
+                tmp_buf=raw_buf[144:176]
+                status, dla, dse, lat = struct.unpack('QQQQ', tmp_buf)
+                self.status = status
+                self.dla = dla
+                self.dse = dse
+                self.lat = lat
+
+                PebsEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type)
+                PebsNHM.pebs_nhm_num += 1
+                del tmp_buf
diff --git a/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
new file mode 100644
index 0000000..fdd92f6
--- /dev/null
+++ b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -0,0 +1,184 @@
+# SchedGui.py - Python extension for perf script, basic GUI code for
+#		traces drawing and overview.
+#
+# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
+#
+# This software is distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+
+try:
+	import wx
+except ImportError:
+	raise ImportError, "You need to install the wxpython lib for this script"
+
+
+class RootFrame(wx.Frame):
+	Y_OFFSET = 100
+	RECT_HEIGHT = 100
+	RECT_SPACE = 50
+	EVENT_MARKING_WIDTH = 5
+
+	def __init__(self, sched_tracer, title, parent = None, id = -1):
+		wx.Frame.__init__(self, parent, id, title)
+
+		(self.screen_width, self.screen_height) = wx.GetDisplaySize()
+		self.screen_width -= 10
+		self.screen_height -= 10
+		self.zoom = 0.5
+		self.scroll_scale = 20
+		self.sched_tracer = sched_tracer
+		self.sched_tracer.set_root_win(self)
+		(self.ts_start, self.ts_end) = sched_tracer.interval()
+		self.update_width_virtual()
+		self.nr_rects = sched_tracer.nr_rectangles() + 1
+		self.height_virtual = RootFrame.Y_OFFSET + (self.nr_rects * (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE))
+
+		# whole window panel
+		self.panel = wx.Panel(self, size=(self.screen_width, self.screen_height))
+
+		# scrollable container
+		self.scroll = wx.ScrolledWindow(self.panel)
+		self.scroll.SetScrollbars(self.scroll_scale, self.scroll_scale, self.width_virtual / self.scroll_scale, self.height_virtual / self.scroll_scale)
+		self.scroll.EnableScrolling(True, True)
+		self.scroll.SetFocus()
+
+		# scrollable drawing area
+		self.scroll_panel = wx.Panel(self.scroll, size=(self.screen_width - 15, self.screen_height / 2))
+		self.scroll_panel.Bind(wx.EVT_PAINT, self.on_paint)
+		self.scroll_panel.Bind(wx.EVT_KEY_DOWN, self.on_key_press)
+		self.scroll_panel.Bind(wx.EVT_LEFT_DOWN, self.on_mouse_down)
+		self.scroll.Bind(wx.EVT_PAINT, self.on_paint)
+		self.scroll.Bind(wx.EVT_KEY_DOWN, self.on_key_press)
+		self.scroll.Bind(wx.EVT_LEFT_DOWN, self.on_mouse_down)
+
+		self.scroll.Fit()
+		self.Fit()
+
+		self.scroll_panel.SetDimensions(-1, -1, self.width_virtual, self.height_virtual, wx.SIZE_USE_EXISTING)
+
+		self.txt = None
+
+		self.Show(True)
+
+	def us_to_px(self, val):
+		return val / (10 ** 3) * self.zoom
+
+	def px_to_us(self, val):
+		return (val / self.zoom) * (10 ** 3)
+
+	def scroll_start(self):
+		(x, y) = self.scroll.GetViewStart()
+		return (x * self.scroll_scale, y * self.scroll_scale)
+
+	def scroll_start_us(self):
+		(x, y) = self.scroll_start()
+		return self.px_to_us(x)
+
+	def paint_rectangle_zone(self, nr, color, top_color, start, end):
+		offset_px = self.us_to_px(start - self.ts_start)
+		width_px = self.us_to_px(end - self.ts_start)
+
+		offset_py = RootFrame.Y_OFFSET + (nr * (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE))
+		width_py = RootFrame.RECT_HEIGHT
+
+		dc = self.dc
+
+		if top_color is not None:
+			(r, g, b) = top_color
+			top_color = wx.Colour(r, g, b)
+			brush = wx.Brush(top_color, wx.SOLID)
+			dc.SetBrush(brush)
+			dc.DrawRectangle(offset_px, offset_py, width_px, RootFrame.EVENT_MARKING_WIDTH)
+			width_py -= RootFrame.EVENT_MARKING_WIDTH
+			offset_py += RootFrame.EVENT_MARKING_WIDTH
+
+		(r ,g, b) = color
+		color = wx.Colour(r, g, b)
+		brush = wx.Brush(color, wx.SOLID)
+		dc.SetBrush(brush)
+		dc.DrawRectangle(offset_px, offset_py, width_px, width_py)
+
+	def update_rectangles(self, dc, start, end):
+		start += self.ts_start
+		end += self.ts_start
+		self.sched_tracer.fill_zone(start, end)
+
+	def on_paint(self, event):
+		dc = wx.PaintDC(self.scroll_panel)
+		self.dc = dc
+
+		width = min(self.width_virtual, self.screen_width)
+		(x, y) = self.scroll_start()
+		start = self.px_to_us(x)
+		end = self.px_to_us(x + width)
+		self.update_rectangles(dc, start, end)
+
+	def rect_from_ypixel(self, y):
+		y -= RootFrame.Y_OFFSET
+		rect = y / (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE)
+		height = y % (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE)
+
+		if rect < 0 or rect > self.nr_rects - 1 or height > RootFrame.RECT_HEIGHT:
+			return -1
+
+		return rect
+
+	def update_summary(self, txt):
+		if self.txt:
+			self.txt.Destroy()
+		self.txt = wx.StaticText(self.panel, -1, txt, (0, (self.screen_height / 2) + 50))
+
+
+	def on_mouse_down(self, event):
+		(x, y) = event.GetPositionTuple()
+		rect = self.rect_from_ypixel(y)
+		if rect == -1:
+			return
+
+		t = self.px_to_us(x) + self.ts_start
+
+		self.sched_tracer.mouse_down(rect, t)
+
+
+	def update_width_virtual(self):
+		self.width_virtual = self.us_to_px(self.ts_end - self.ts_start)
+
+	def __zoom(self, x):
+		self.update_width_virtual()
+		(xpos, ypos) = self.scroll.GetViewStart()
+		xpos = self.us_to_px(x) / self.scroll_scale
+		self.scroll.SetScrollbars(self.scroll_scale, self.scroll_scale, self.width_virtual / self.scroll_scale, self.height_virtual / self.scroll_scale, xpos, ypos)
+		self.Refresh()
+
+	def zoom_in(self):
+		x = self.scroll_start_us()
+		self.zoom *= 2
+		self.__zoom(x)
+
+	def zoom_out(self):
+		x = self.scroll_start_us()
+		self.zoom /= 2
+		self.__zoom(x)
+
+
+	def on_key_press(self, event):
+		key = event.GetRawKeyCode()
+		if key == ord("+"):
+			self.zoom_in()
+			return
+		if key == ord("-"):
+			self.zoom_out()
+			return
+
+		key = event.GetKeyCode()
+		(x, y) = self.scroll.GetViewStart()
+		if key == wx.WXK_RIGHT:
+			self.scroll.Scroll(x + 1, y)
+		elif key == wx.WXK_LEFT:
+			self.scroll.Scroll(x - 1, y)
+		elif key == wx.WXK_DOWN:
+			self.scroll.Scroll(x, y + 1)
+		elif key == wx.WXK_UP:
+			self.scroll.Scroll(x, y - 1)
diff --git a/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
new file mode 100644
index 0000000..f6c8496
--- /dev/null
+++ b/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -0,0 +1,90 @@
+# Util.py - Python extension for perf script, miscellaneous utility code
+#
+# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
+#
+# This software may be distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+import errno, os
+
+FUTEX_WAIT = 0
+FUTEX_WAKE = 1
+FUTEX_PRIVATE_FLAG = 128
+FUTEX_CLOCK_REALTIME = 256
+FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
+
+NSECS_PER_SEC    = 1000000000
+
+def avg(total, n):
+    return total / n
+
+def nsecs(secs, nsecs):
+    return secs * NSECS_PER_SEC + nsecs
+
+def nsecs_secs(nsecs):
+    return nsecs / NSECS_PER_SEC
+
+def nsecs_nsecs(nsecs):
+    return nsecs % NSECS_PER_SEC
+
+def nsecs_str(nsecs):
+    str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
+    return str
+
+def add_stats(dict, key, value):
+	if not dict.has_key(key):
+		dict[key] = (value, value, value, 1)
+	else:
+		min, max, avg, count = dict[key]
+		if value < min:
+			min = value
+		if value > max:
+			max = value
+		avg = (avg + value) / 2
+		dict[key] = (min, max, avg, count + 1)
+
+def clear_term():
+    print("\x1b[H\x1b[2J")
+
+audit_package_warned = False
+
+try:
+	import audit
+	machine_to_id = {
+		'x86_64': audit.MACH_86_64,
+		'alpha'	: audit.MACH_ALPHA,
+		'ia64'	: audit.MACH_IA64,
+		'ppc'	: audit.MACH_PPC,
+		'ppc64'	: audit.MACH_PPC64,
+		'ppc64le' : audit.MACH_PPC64LE,
+		's390'	: audit.MACH_S390,
+		's390x'	: audit.MACH_S390X,
+		'i386'	: audit.MACH_X86,
+		'i586'	: audit.MACH_X86,
+		'i686'	: audit.MACH_X86,
+	}
+	try:
+		machine_to_id['armeb'] = audit.MACH_ARMEB
+	except:
+		pass
+	machine_id = machine_to_id[os.uname()[4]]
+except:
+	if not audit_package_warned:
+		audit_package_warned = True
+		print "Install the audit-libs-python package to get syscall names.\n" \
+                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
+                    "\n  # yum install audit-libs-python (Fedora)" \
+                    "\n  etc.\n"
+
+def syscall_name(id):
+	try:
+		return audit.audit_syscall_to_name(id, machine_id)
+	except:
+		return str(id)
+
+def strerror(nr):
+	try:
+		return errno.errorcode[abs(nr)]
+	except:
+		return "Unknown %d errno" % nr
diff --git a/scripts/python/bin/compaction-times-record b/scripts/python/bin/compaction-times-record
new file mode 100644
index 0000000..6edcd40
--- /dev/null
+++ b/scripts/python/bin/compaction-times-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@
diff --git a/scripts/python/bin/compaction-times-report b/scripts/python/bin/compaction-times-report
new file mode 100644
index 0000000..3dc1389
--- /dev/null
+++ b/scripts/python/bin/compaction-times-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+#description: display time taken by mm compaction
+#args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]
+perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@
diff --git a/scripts/python/bin/event_analyzing_sample-record b/scripts/python/bin/event_analyzing_sample-record
new file mode 100644
index 0000000..5ce652d
--- /dev/null
+++ b/scripts/python/bin/event_analyzing_sample-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# event_analyzing_sample.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record $@
diff --git a/scripts/python/bin/event_analyzing_sample-report b/scripts/python/bin/event_analyzing_sample-report
new file mode 100644
index 0000000..0941fc9
--- /dev/null
+++ b/scripts/python/bin/event_analyzing_sample-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: analyze all perf samples
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/event_analyzing_sample.py
diff --git a/scripts/python/bin/export-to-postgresql-record b/scripts/python/bin/export-to-postgresql-record
new file mode 100644
index 0000000..221d66e
--- /dev/null
+++ b/scripts/python/bin/export-to-postgresql-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# export perf data to a postgresql database. Can cover
+# perf ip samples (excluding the tracepoints). No special
+# record requirements, just record what you want to export.
+#
+perf record $@
diff --git a/scripts/python/bin/export-to-postgresql-report b/scripts/python/bin/export-to-postgresql-report
new file mode 100644
index 0000000..cd335b6
--- /dev/null
+++ b/scripts/python/bin/export-to-postgresql-report
@@ -0,0 +1,29 @@
+#!/bin/bash
+# description: export perf data to a postgresql database
+# args: [database name] [columns] [calls]
+n_args=0
+for i in "$@"
+do
+    if expr match "$i" "-" > /dev/null ; then
+	break
+    fi
+    n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 3 ] ; then
+    echo "usage: export-to-postgresql-report [database name] [columns] [calls]"
+    exit
+fi
+if [ "$n_args" -gt 2 ] ; then
+    dbname=$1
+    columns=$2
+    calls=$3
+    shift 3
+elif [ "$n_args" -gt 1 ] ; then
+    dbname=$1
+    columns=$2
+    shift 2
+elif [ "$n_args" -gt 0 ] ; then
+    dbname=$1
+    shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-postgresql.py $dbname $columns $calls
diff --git a/scripts/python/bin/export-to-sqlite-record b/scripts/python/bin/export-to-sqlite-record
new file mode 100644
index 0000000..070204f
--- /dev/null
+++ b/scripts/python/bin/export-to-sqlite-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# export perf data to a sqlite3 database. Can cover
+# perf ip samples (excluding the tracepoints). No special
+# record requirements, just record what you want to export.
+#
+perf record $@
diff --git a/scripts/python/bin/export-to-sqlite-report b/scripts/python/bin/export-to-sqlite-report
new file mode 100644
index 0000000..5ff6033
--- /dev/null
+++ b/scripts/python/bin/export-to-sqlite-report
@@ -0,0 +1,29 @@
+#!/bin/bash
+# description: export perf data to a sqlite3 database
+# args: [database name] [columns] [calls]
+n_args=0
+for i in "$@"
+do
+    if expr match "$i" "-" > /dev/null ; then
+	break
+    fi
+    n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 3 ] ; then
+    echo "usage: export-to-sqlite-report [database name] [columns] [calls]"
+    exit
+fi
+if [ "$n_args" -gt 2 ] ; then
+    dbname=$1
+    columns=$2
+    calls=$3
+    shift 3
+elif [ "$n_args" -gt 1 ] ; then
+    dbname=$1
+    columns=$2
+    shift 2
+elif [ "$n_args" -gt 0 ] ; then
+    dbname=$1
+    shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-sqlite.py $dbname $columns $calls
diff --git a/scripts/python/bin/failed-syscalls-by-pid-record b/scripts/python/bin/failed-syscalls-by-pid-record
new file mode 100644
index 0000000..74685f3
--- /dev/null
+++ b/scripts/python/bin/failed-syscalls-by-pid-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_exit $@ || \
+ perf record -e syscalls:sys_exit $@) 2> /dev/null
diff --git a/scripts/python/bin/failed-syscalls-by-pid-report b/scripts/python/bin/failed-syscalls-by-pid-report
new file mode 100644
index 0000000..fda5096
--- /dev/null
+++ b/scripts/python/bin/failed-syscalls-by-pid-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide failed syscalls, by pid
+# args: [comm]
+if [ $# -gt 0 ] ; then
+    if ! expr match "$1" "-" > /dev/null ; then
+	comm=$1
+	shift
+    fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/scripts/python/bin/futex-contention-record b/scripts/python/bin/futex-contention-record
new file mode 100644
index 0000000..b1495c9
--- /dev/null
+++ b/scripts/python/bin/futex-contention-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
diff --git a/scripts/python/bin/futex-contention-report b/scripts/python/bin/futex-contention-report
new file mode 100644
index 0000000..6c44271
--- /dev/null
+++ b/scripts/python/bin/futex-contention-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: futext contention measurement
+
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/scripts/python/bin/intel-pt-events-record b/scripts/python/bin/intel-pt-events-record
new file mode 100644
index 0000000..10fe2b6
--- /dev/null
+++ b/scripts/python/bin/intel-pt-events-record
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+#
+# print Intel PT Power Events and PTWRITE. The intel_pt PMU event needs
+# to be specified with appropriate config terms.
+#
+if ! echo "$@" | grep -q intel_pt ; then
+	echo "Options must include the Intel PT event e.g. -e intel_pt/pwr_evt,ptw/"
+	echo "and for power events it probably needs to be system wide i.e. -a option"
+	echo "For example: -a -e intel_pt/pwr_evt,branch=0/ sleep 1"
+	exit 1
+fi
+perf record $@
diff --git a/scripts/python/bin/intel-pt-events-report b/scripts/python/bin/intel-pt-events-report
new file mode 100644
index 0000000..9a9c92f
--- /dev/null
+++ b/scripts/python/bin/intel-pt-events-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: print Intel PT Power Events and PTWRITE
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py
\ No newline at end of file
diff --git a/scripts/python/bin/mem-phys-addr-record b/scripts/python/bin/mem-phys-addr-record
new file mode 100644
index 0000000..5a87512
--- /dev/null
+++ b/scripts/python/bin/mem-phys-addr-record
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+#
+# Profiling physical memory by all retired load instructions/uops event
+# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS
+#
+
+load=`perf list | grep mem_inst_retired.all_loads`
+if [ -z "$load" ]; then
+	load=`perf list | grep mem_uops_retired.all_loads`
+fi
+if [ -z "$load" ]; then
+	echo "There is no event to count all retired load instructions/uops."
+	exit 1
+fi
+
+arg=$(echo $load | tr -d ' ')
+arg="$arg:P"
+perf record --phys-data -e $arg $@
diff --git a/scripts/python/bin/mem-phys-addr-report b/scripts/python/bin/mem-phys-addr-report
new file mode 100644
index 0000000..3f2b847
--- /dev/null
+++ b/scripts/python/bin/mem-phys-addr-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: resolve physical address samples
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py
diff --git a/scripts/python/bin/net_dropmonitor-record b/scripts/python/bin/net_dropmonitor-record
new file mode 100755
index 0000000..423fb81
--- /dev/null
+++ b/scripts/python/bin/net_dropmonitor-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e skb:kfree_skb $@
diff --git a/scripts/python/bin/net_dropmonitor-report b/scripts/python/bin/net_dropmonitor-report
new file mode 100755
index 0000000..8d698f5
--- /dev/null
+++ b/scripts/python/bin/net_dropmonitor-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: display a table of dropped frames
+
+perf script -s "$PERF_EXEC_PATH"/scripts/python/net_dropmonitor.py $@
diff --git a/scripts/python/bin/netdev-times-record b/scripts/python/bin/netdev-times-record
new file mode 100644
index 0000000..558754b
--- /dev/null
+++ b/scripts/python/bin/netdev-times-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+perf record -e net:net_dev_xmit -e net:net_dev_queue		\
+		-e net:netif_receive_skb -e net:netif_rx		\
+		-e skb:consume_skb -e skb:kfree_skb			\
+		-e skb:skb_copy_datagram_iovec -e napi:napi_poll	\
+		-e irq:irq_handler_entry -e irq:irq_handler_exit	\
+		-e irq:softirq_entry -e irq:softirq_exit		\
+		-e irq:softirq_raise $@
diff --git a/scripts/python/bin/netdev-times-report b/scripts/python/bin/netdev-times-report
new file mode 100644
index 0000000..8f75929
--- /dev/null
+++ b/scripts/python/bin/netdev-times-report
@@ -0,0 +1,5 @@
+#!/bin/bash
+# description: display a process of packet and processing time
+# args: [tx] [rx] [dev=] [debug]
+
+perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/scripts/python/bin/sched-migration-record b/scripts/python/bin/sched-migration-record
new file mode 100644
index 0000000..7493fdd
--- /dev/null
+++ b/scripts/python/bin/sched-migration-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -m 16384 -e sched:sched_wakeup -e sched:sched_wakeup_new -e sched:sched_switch -e sched:sched_migrate_task $@
diff --git a/scripts/python/bin/sched-migration-report b/scripts/python/bin/sched-migration-report
new file mode 100644
index 0000000..68b037a
--- /dev/null
+++ b/scripts/python/bin/sched-migration-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: sched migration overview
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/scripts/python/bin/sctop-record b/scripts/python/bin/sctop-record
new file mode 100644
index 0000000..d694084
--- /dev/null
+++ b/scripts/python/bin/sctop-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_enter $@ || \
+ perf record -e syscalls:sys_enter $@) 2> /dev/null
diff --git a/scripts/python/bin/sctop-report b/scripts/python/bin/sctop-report
new file mode 100644
index 0000000..c32db29
--- /dev/null
+++ b/scripts/python/bin/sctop-report
@@ -0,0 +1,24 @@
+#!/bin/bash
+# description: syscall top
+# args: [comm] [interval]
+n_args=0
+for i in "$@"
+do
+    if expr match "$i" "-" > /dev/null ; then
+	break
+    fi
+    n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 2 ] ; then
+    echo "usage: sctop-report [comm] [interval]"
+    exit
+fi
+if [ "$n_args" -gt 1 ] ; then
+    comm=$1
+    interval=$2
+    shift 2
+elif [ "$n_args" -gt 0 ] ; then
+    interval=$1
+    shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/scripts/python/bin/stackcollapse-record b/scripts/python/bin/stackcollapse-record
new file mode 100755
index 0000000..9d8f9f0
--- /dev/null
+++ b/scripts/python/bin/stackcollapse-record
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
diff --git a/scripts/python/bin/stackcollapse-report b/scripts/python/bin/stackcollapse-report
new file mode 100755
index 0000000..356b965
--- /dev/null
+++ b/scripts/python/bin/stackcollapse-report
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
diff --git a/scripts/python/bin/syscall-counts-by-pid-record b/scripts/python/bin/syscall-counts-by-pid-record
new file mode 100644
index 0000000..d694084
--- /dev/null
+++ b/scripts/python/bin/syscall-counts-by-pid-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_enter $@ || \
+ perf record -e syscalls:sys_enter $@) 2> /dev/null
diff --git a/scripts/python/bin/syscall-counts-by-pid-report b/scripts/python/bin/syscall-counts-by-pid-report
new file mode 100644
index 0000000..16eb8d6
--- /dev/null
+++ b/scripts/python/bin/syscall-counts-by-pid-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide syscall counts, by pid
+# args: [comm]
+if [ $# -gt 0 ] ; then
+    if ! expr match "$1" "-" > /dev/null ; then
+	comm=$1
+	shift
+    fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/scripts/python/bin/syscall-counts-record b/scripts/python/bin/syscall-counts-record
new file mode 100644
index 0000000..d694084
--- /dev/null
+++ b/scripts/python/bin/syscall-counts-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_enter $@ || \
+ perf record -e syscalls:sys_enter $@) 2> /dev/null
diff --git a/scripts/python/bin/syscall-counts-report b/scripts/python/bin/syscall-counts-report
new file mode 100644
index 0000000..0f0e9d4
--- /dev/null
+++ b/scripts/python/bin/syscall-counts-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide syscall counts
+# args: [comm]
+if [ $# -gt 0 ] ; then
+    if ! expr match "$1" "-" > /dev/null ; then
+	comm=$1
+	shift
+    fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/scripts/python/call-graph-from-sql.py b/scripts/python/call-graph-from-sql.py
new file mode 100644
index 0000000..b494a67
--- /dev/null
+++ b/scripts/python/call-graph-from-sql.py
@@ -0,0 +1,339 @@
+#!/usr/bin/python2
+# call-graph-from-sql.py: create call-graph from sql database
+# Copyright (c) 2014-2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+# To use this script you will need to have exported data using either the
+# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
+# scripts for details.
+#
+# Following on from the example in the export scripts, a
+# call-graph can be displayed for the pt_example database like this:
+#
+#	python tools/perf/scripts/python/call-graph-from-sql.py pt_example
+#
+# Note that for PostgreSQL, this script supports connecting to remote databases
+# by setting hostname, port, username, password, and dbname e.g.
+#
+#	python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph.  Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+#                                         Call Graph: pt_example
+# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+# v- ls
+#     v- 2638:2638
+#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+#           |- unknown               unknown       1        13198     0.1              1              0.0
+#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+#              v- main               ls            1      8182043    99.6         180254             99.9
+#
+# Points to note:
+#	The top level is a command name (comm)
+#	The next level is a thread (pid:tid)
+#	Subsequent levels are functions
+#	'Count' is the number of calls
+#	'Time' is the elapsed time until the function returns
+#	Percentages are relative to the level above
+#	'Branch Count' is the total number of branches for that function and all
+#       functions that it calls
+
+import sys
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+
+class TreeItem():
+
+	def __init__(self, db, row, parent_item):
+		self.db = db
+		self.row = row
+		self.parent_item = parent_item
+		self.query_done = False;
+		self.child_count = 0
+		self.child_items = []
+		self.data = ["", "", "", "", "", "", ""]
+		self.comm_id = 0
+		self.thread_id = 0
+		self.call_path_id = 1
+		self.branch_count = 0
+		self.time = 0
+		if not parent_item:
+			self.setUpRoot()
+
+	def setUpRoot(self):
+		self.query_done = True
+		query = QSqlQuery(self.db)
+		ret = query.exec_('SELECT id, comm FROM comms')
+		if not ret:
+			raise Exception("Query failed: " + query.lastError().text())
+		while query.next():
+			if not query.value(0):
+				continue
+			child_item = TreeItem(self.db, self.child_count, self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+			child_item.setUpLevel1(query.value(0), query.value(1))
+
+	def setUpLevel1(self, comm_id, comm):
+		self.query_done = True;
+		self.comm_id = comm_id
+		self.data[0] = comm
+		self.child_items = []
+		self.child_count = 0
+		query = QSqlQuery(self.db)
+		ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
+		if not ret:
+			raise Exception("Query failed: " + query.lastError().text())
+		while query.next():
+			child_item = TreeItem(self.db, self.child_count, self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+			child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
+
+	def setUpLevel2(self, comm_id, thread_id, pid, tid):
+		self.comm_id = comm_id
+		self.thread_id = thread_id
+		self.data[0] = str(pid) + ":" + str(tid)
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def timePercent(self, b):
+		if not self.time:
+			return "0.0"
+		x = (b * Decimal(100)) / self.time
+		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
+
+	def branchPercent(self, b):
+		if not self.branch_count:
+			return "0.0"
+		x = (b * Decimal(100)) / self.branch_count
+		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
+
+	def addChild(self, call_path_id, name, dso, count, time, branch_count):
+		child_item = TreeItem(self.db, self.child_count, self)
+		child_item.comm_id = self.comm_id
+		child_item.thread_id = self.thread_id
+		child_item.call_path_id = call_path_id
+		child_item.branch_count = branch_count
+		child_item.time = time
+		child_item.data[0] = name
+		if dso == "[kernel.kallsyms]":
+			dso = "[kernel]"
+		child_item.data[1] = dso
+		child_item.data[2] = str(count)
+		child_item.data[3] = str(time)
+		child_item.data[4] = self.timePercent(time)
+		child_item.data[5] = str(branch_count)
+		child_item.data[6] = self.branchPercent(branch_count)
+		self.child_items.append(child_item)
+		self.child_count += 1
+
+	def selectCalls(self):
+		self.query_done = True;
+		query = QSqlQuery(self.db)
+		ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
+				  '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
+				  '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
+				  '( SELECT ip FROM call_paths where id = call_path_id ) '
+				  'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
+				  ' ORDER BY call_path_id')
+		if not ret:
+			raise Exception("Query failed: " + query.lastError().text())
+		last_call_path_id = 0
+		name = ""
+		dso = ""
+		count = 0
+		branch_count = 0
+		total_branch_count = 0
+		time = 0
+		total_time = 0
+		while query.next():
+			if query.value(1) == last_call_path_id:
+				count += 1
+				branch_count += query.value(2)
+				time += query.value(4) - query.value(3)
+			else:
+				if count:
+					self.addChild(last_call_path_id, name, dso, count, time, branch_count)
+				last_call_path_id = query.value(1)
+				name = query.value(5)
+				dso = query.value(6)
+				count = 1
+				total_branch_count += branch_count
+				total_time += time
+				branch_count = query.value(2)
+				time = query.value(4) - query.value(3)
+		if count:
+			self.addChild(last_call_path_id, name, dso, count, time, branch_count)
+		total_branch_count += branch_count
+		total_time += time
+		# Top level does not have time or branch count, so fix that here
+		if total_branch_count > self.branch_count:
+			self.branch_count = total_branch_count
+			if self.branch_count:
+				for child_item in self.child_items:
+					child_item.data[6] = self.branchPercent(child_item.branch_count)
+		if total_time > self.time:
+			self.time = total_time
+			if self.time:
+				for child_item in self.child_items:
+					child_item.data[4] = self.timePercent(child_item.time)
+
+	def childCount(self):
+		if not self.query_done:
+			self.selectCalls()
+		return self.child_count
+
+	def columnCount(self):
+		return 7
+
+	def columnHeader(self, column):
+		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+		return headers[column]
+
+	def getData(self, column):
+		return self.data[column]
+
+class TreeModel(QAbstractItemModel):
+
+	def __init__(self, db, parent=None):
+		super(TreeModel, self).__init__(parent)
+		self.db = db
+		self.root = TreeItem(db, 0, None)
+
+	def columnCount(self, parent):
+		return self.root.columnCount()
+
+	def rowCount(self, parent):
+		if parent.isValid():
+			parent_item = parent.internalPointer()
+		else:
+			parent_item = self.root
+		return parent_item.childCount()
+
+	def headerData(self, section, orientation, role):
+		if role == Qt.TextAlignmentRole:
+			if section > 1:
+				return Qt.AlignRight
+		if role != Qt.DisplayRole:
+			return None
+		if orientation != Qt.Horizontal:
+			return None
+		return self.root.columnHeader(section)
+
+	def parent(self, child):
+		child_item = child.internalPointer()
+		if child_item is self.root:
+			return QModelIndex()
+		parent_item = child_item.getParentItem()
+		return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+	def index(self, row, column, parent):
+		if parent.isValid():
+			parent_item = parent.internalPointer()
+		else:
+			parent_item = self.root
+		child_item = parent_item.getChildItem(row)
+		return self.createIndex(row, column, child_item)
+
+	def data(self, index, role):
+		if role == Qt.TextAlignmentRole:
+			if index.column() > 1:
+				return Qt.AlignRight
+		if role != Qt.DisplayRole:
+			return None
+		index_item = index.internalPointer()
+		return index_item.getData(index.column())
+
+class MainWindow(QMainWindow):
+
+	def __init__(self, db, dbname, parent=None):
+		super(MainWindow, self).__init__(parent)
+
+		self.setObjectName("MainWindow")
+		self.setWindowTitle("Call Graph: " + dbname)
+		self.move(100, 100)
+		self.resize(800, 600)
+		style = self.style()
+		icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
+		self.setWindowIcon(icon);
+
+		self.model = TreeModel(db)
+
+		self.view = QTreeView()
+		self.view.setModel(self.model)
+
+		self.setCentralWidget(self.view)
+
+if __name__ == '__main__':
+	if (len(sys.argv) < 2):
+		print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
+		raise Exception("Too few arguments")
+
+	dbname = sys.argv[1]
+
+	is_sqlite3 = False
+	try:
+		f = open(dbname)
+		if f.read(15) == "SQLite format 3":
+			is_sqlite3 = True
+		f.close()
+	except:
+		pass
+
+	if is_sqlite3:
+		db = QSqlDatabase.addDatabase('QSQLITE')
+	else:
+		db = QSqlDatabase.addDatabase('QPSQL')
+		opts = dbname.split()
+		for opt in opts:
+			if '=' in opt:
+				opt = opt.split('=')
+				if opt[0] == 'hostname':
+					db.setHostName(opt[1])
+				elif opt[0] == 'port':
+					db.setPort(int(opt[1]))
+				elif opt[0] == 'username':
+					db.setUserName(opt[1])
+				elif opt[0] == 'password':
+					db.setPassword(opt[1])
+				elif opt[0] == 'dbname':
+					dbname = opt[1]
+			else:
+				dbname = opt
+
+	db.setDatabaseName(dbname)
+	if not db.open():
+		raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+
+	app = QApplication(sys.argv)
+	window = MainWindow(db, dbname)
+	window.show()
+	err = app.exec_()
+	db.close()
+	sys.exit(err)
diff --git a/scripts/python/check-perf-trace.py b/scripts/python/check-perf-trace.py
new file mode 100644
index 0000000..334599c
--- /dev/null
+++ b/scripts/python/check-perf-trace.py
@@ -0,0 +1,82 @@
+# perf script event handlers, generated by perf script -g python
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc.  Basically, if this script runs successfully and
+# displays expected results, Python scripting support should be ok.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from Core import *
+from perf_trace_context import *
+
+unhandled = autodict()
+
+def trace_begin():
+	print "trace_begin"
+	pass
+
+def trace_end():
+        print_unhandled()
+
+def irq__softirq_entry(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, vec):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+			common_pid, common_comm)
+
+                print_uncommon(context)
+
+		print "vec=%s\n" % \
+		(symbol_str("irq__softirq_entry", "vec", vec)),
+
+def kmem__kmalloc(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, call_site, ptr, bytes_req, bytes_alloc,
+	gfp_flags):
+		print_header(event_name, common_cpu, common_secs, common_nsecs,
+			common_pid, common_comm)
+
+                print_uncommon(context)
+
+		print "call_site=%u, ptr=%u, bytes_req=%u, " \
+		"bytes_alloc=%u, gfp_flags=%s\n" % \
+		(call_site, ptr, bytes_req, bytes_alloc,
+
+		flag_str("kmem__kmalloc", "gfp_flags", gfp_flags)),
+
+def trace_unhandled(event_name, context, event_fields_dict):
+    try:
+        unhandled[event_name] += 1
+    except TypeError:
+        unhandled[event_name] = 1
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+	print "%-20s %5u %05u.%09u %8u %-20s " % \
+	(event_name, cpu, secs, nsecs, pid, comm),
+
+# print trace fields not included in handler args
+def print_uncommon(context):
+    print "common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, " \
+        % (common_pc(context), trace_flag_str(common_flags(context)), \
+               common_lock_depth(context))
+
+def print_unhandled():
+    keys = unhandled.keys()
+    if not keys:
+        return
+
+    print "\nunhandled events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for event_name in keys:
+	print "%-40s  %10d\n" % (event_name, unhandled[event_name])
diff --git a/scripts/python/compaction-times.py b/scripts/python/compaction-times.py
new file mode 100644
index 0000000..239cb05
--- /dev/null
+++ b/scripts/python/compaction-times.py
@@ -0,0 +1,311 @@
+# report time spent in compaction
+# Licensed under the terms of the GNU GPL License version 2
+
+# testing:
+# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones
+
+import os
+import sys
+import re
+
+import signal
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n"
+
+class popt:
+	DISP_DFL = 0
+	DISP_PROC = 1
+	DISP_PROC_VERBOSE=2
+
+class topt:
+	DISP_TIME = 0
+	DISP_MIG = 1
+	DISP_ISOLFREE = 2
+	DISP_ISOLMIG = 4
+	DISP_ALL = 7
+
+class comm_filter:
+	def __init__(self, re):
+		self.re = re
+
+	def filter(self, pid, comm):
+		m = self.re.search(comm)
+		return m == None or m.group() == ""
+
+class pid_filter:
+	def __init__(self, low, high):
+		self.low = (0 if low == "" else int(low))
+		self.high = (0 if high == "" else int(high))
+
+	def filter(self, pid, comm):
+		return not (pid >= self.low and (self.high == 0 or pid <= self.high))
+
+def set_type(t):
+	global opt_disp
+	opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t)
+
+def ns(sec, nsec):
+	return (sec * 1000000000) + nsec
+
+def time(ns):
+	return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000)
+
+class pair:
+	def __init__(self, aval, bval, alabel = None, blabel = None):
+		self.alabel = alabel
+		self.blabel = blabel
+		self.aval = aval
+		self.bval = bval
+
+	def __add__(self, rhs):
+		self.aval += rhs.aval
+		self.bval += rhs.bval
+		return self
+
+	def __str__(self):
+		return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval)
+
+class cnode:
+	def __init__(self, ns):
+		self.ns = ns
+		self.migrated = pair(0, 0, "moved", "failed")
+		self.fscan = pair(0,0, "scanned", "isolated")
+		self.mscan = pair(0,0, "scanned", "isolated")
+
+	def __add__(self, rhs):
+		self.ns += rhs.ns
+		self.migrated += rhs.migrated
+		self.fscan += rhs.fscan
+		self.mscan += rhs.mscan
+		return self
+
+	def __str__(self):
+		prev = 0
+		s = "%s " % time(self.ns)
+		if (opt_disp & topt.DISP_MIG):
+			s += "migration: %s" % self.migrated
+			prev = 1
+		if (opt_disp & topt.DISP_ISOLFREE):
+			s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan)
+			prev = 1
+		if (opt_disp & topt.DISP_ISOLMIG):
+			s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan)
+		return s
+
+	def complete(self, secs, nsecs):
+		self.ns = ns(secs, nsecs) - self.ns
+
+	def increment(self, migrated, fscan, mscan):
+		if (migrated != None):
+			self.migrated += migrated
+		if (fscan != None):
+			self.fscan += fscan
+		if (mscan != None):
+			self.mscan += mscan
+
+
+class chead:
+	heads = {}
+	val = cnode(0);
+	fobj = None
+
+	@classmethod
+	def add_filter(cls, filter):
+		cls.fobj = filter
+
+	@classmethod
+	def create_pending(cls, pid, comm, start_secs, start_nsecs):
+		filtered = 0
+		try:
+			head = cls.heads[pid]
+			filtered = head.is_filtered()
+		except KeyError:
+			if cls.fobj != None:
+				filtered = cls.fobj.filter(pid, comm)
+			head = cls.heads[pid] = chead(comm, pid, filtered)
+
+		if not filtered:
+			head.mark_pending(start_secs, start_nsecs)
+
+	@classmethod
+	def increment_pending(cls, pid, migrated, fscan, mscan):
+		head = cls.heads[pid]
+		if not head.is_filtered():
+			if head.is_pending():
+				head.do_increment(migrated, fscan, mscan)
+			else:
+				sys.stderr.write("missing start compaction event for pid %d\n" % pid)
+
+	@classmethod
+	def complete_pending(cls, pid, secs, nsecs):
+		head = cls.heads[pid]
+		if not head.is_filtered():
+			if head.is_pending():
+				head.make_complete(secs, nsecs)
+			else:
+				sys.stderr.write("missing start compaction event for pid %d\n" % pid)
+
+	@classmethod
+	def gen(cls):
+		if opt_proc != popt.DISP_DFL:
+			for i in cls.heads:
+				yield cls.heads[i]
+
+	@classmethod
+	def str(cls):
+		return cls.val
+
+	def __init__(self, comm, pid, filtered):
+		self.comm = comm
+		self.pid = pid
+		self.val = cnode(0)
+		self.pending = None
+		self.filtered = filtered
+		self.list = []
+
+	def __add__(self, rhs):
+		self.ns += rhs.ns
+		self.val += rhs.val
+		return self
+
+	def mark_pending(self, secs, nsecs):
+		self.pending = cnode(ns(secs, nsecs))
+
+	def do_increment(self, migrated, fscan, mscan):
+		self.pending.increment(migrated, fscan, mscan)
+
+	def make_complete(self, secs, nsecs):
+		self.pending.complete(secs, nsecs)
+		chead.val += self.pending
+
+		if opt_proc != popt.DISP_DFL:
+			self.val += self.pending
+
+			if opt_proc == popt.DISP_PROC_VERBOSE:
+				self.list.append(self.pending)
+		self.pending = None
+
+	def enumerate(self):
+		if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered():
+			for i, pelem in enumerate(self.list):
+				sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem))
+
+	def is_pending(self):
+		return self.pending != None
+
+	def is_filtered(self):
+		return self.filtered
+
+	def display(self):
+		if not self.is_filtered():
+			sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val))
+
+
+def trace_end():
+	sys.stdout.write("total: %s\n" % chead.str())
+	for i in chead.gen():
+		i.display(),
+		i.enumerate()
+
+def compaction__mm_compaction_migratepages(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, nr_migrated, nr_failed):
+
+	chead.increment_pending(common_pid,
+		pair(nr_migrated, nr_failed), None, None)
+
+def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu,
+        common_secs, common_nsecs, common_pid, common_comm,
+        common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
+
+	chead.increment_pending(common_pid,
+		None, pair(nr_scanned, nr_taken), None)
+
+def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu,
+        common_secs, common_nsecs, common_pid, common_comm,
+        common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
+
+	chead.increment_pending(common_pid,
+		None, None, pair(nr_scanned, nr_taken))
+
+def compaction__mm_compaction_end(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, zone_start, migrate_start, free_start, zone_end,
+	sync, status):
+
+	chead.complete_pending(common_pid, common_secs, common_nsecs)
+
+def compaction__mm_compaction_begin(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, zone_start, migrate_start, free_start, zone_end,
+	sync):
+
+	chead.create_pending(common_pid, common_comm, common_secs, common_nsecs)
+
+def pr_help():
+	global usage
+
+	sys.stdout.write(usage)
+	sys.stdout.write("\n")
+	sys.stdout.write("-h	display this help\n")
+	sys.stdout.write("-p	display by process\n")
+	sys.stdout.write("-pv	display by process (verbose)\n")
+	sys.stdout.write("-t	display stall times only\n")
+	sys.stdout.write("-m	display stats for migration\n")
+	sys.stdout.write("-fs	display stats for free scanner\n")
+	sys.stdout.write("-ms	display stats for migration scanner\n")
+	sys.stdout.write("-u	display results in microseconds (default nanoseconds)\n")
+
+
+comm_re = None
+pid_re = None
+pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
+
+opt_proc = popt.DISP_DFL
+opt_disp = topt.DISP_ALL
+
+opt_ns = True
+
+argc = len(sys.argv) - 1
+if argc >= 1:
+	pid_re = re.compile(pid_regex)
+
+	for i, opt in enumerate(sys.argv[1:]):
+		if opt[0] == "-":
+			if opt == "-h":
+				pr_help()
+				exit(0);
+			elif opt == "-p":
+				opt_proc = popt.DISP_PROC
+			elif opt == "-pv":
+				opt_proc = popt.DISP_PROC_VERBOSE
+			elif opt == '-u':
+				opt_ns = False
+			elif opt == "-t":
+				set_type(topt.DISP_TIME)
+			elif opt == "-m":
+				set_type(topt.DISP_MIG)
+			elif opt == "-fs":
+				set_type(topt.DISP_ISOLFREE)
+			elif opt == "-ms":
+				set_type(topt.DISP_ISOLMIG)
+			else:
+				sys.exit(usage)
+
+		elif i == argc - 1:
+			m = pid_re.search(opt)
+			if m != None and m.group() != "":
+				if m.group(3) != None:
+					f = pid_filter(m.group(3), m.group(3))
+				else:
+					f = pid_filter(m.group(1), m.group(2))
+			else:
+				try:
+					comm_re=re.compile(opt)
+				except:
+					sys.stderr.write("invalid regex '%s'" % opt)
+					sys.exit(usage)
+				f = comm_filter(comm_re)
+
+			chead.add_filter(f)
diff --git a/scripts/python/event_analyzing_sample.py b/scripts/python/event_analyzing_sample.py
new file mode 100644
index 0000000..4e843b9
--- /dev/null
+++ b/scripts/python/event_analyzing_sample.py
@@ -0,0 +1,190 @@
+# event_analyzing_sample.py: general event handler in python
+# SPDX-License-Identifier: GPL-2.0
+#
+# Current perf report is already very powerful with the annotation integrated,
+# and this script is not trying to be as powerful as perf report, but
+# providing end user/developer a flexible way to analyze the events other
+# than trace points.
+#
+# The 2 database related functions in this script just show how to gather
+# the basic information, and users can modify and write their own functions
+# according to their specific requirement.
+#
+# The first function "show_general_events" just does a basic grouping for all
+# generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is
+# for a x86 HW PMU event: PEBS with load latency data.
+#
+
+import os
+import sys
+import math
+import struct
+import sqlite3
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+        '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from EventClass import *
+
+#
+# If the perf.data has a big number of samples, then the insert operation
+# will be very time consuming (about 10+ minutes for 10000 samples) if the
+# .db database is on disk. Move the .db file to RAM based FS to speedup
+# the handling, which will cut the time down to several seconds.
+#
+con = sqlite3.connect("/dev/shm/perf.db")
+con.isolation_level = None
+
+def trace_begin():
+	print "In trace_begin:\n"
+
+        #
+        # Will create several tables at the start, pebs_ll is for PEBS data with
+        # load latency info, while gen_events is for general event.
+        #
+        con.execute("""
+                create table if not exists gen_events (
+                        name text,
+                        symbol text,
+                        comm text,
+                        dso text
+                );""")
+        con.execute("""
+                create table if not exists pebs_ll (
+                        name text,
+                        symbol text,
+                        comm text,
+                        dso text,
+                        flags integer,
+                        ip integer,
+                        status integer,
+                        dse integer,
+                        dla integer,
+                        lat integer
+                );""")
+
+#
+# Create and insert event object to a database so that user could
+# do more analysis with simple database commands.
+#
+def process_event(param_dict):
+        event_attr = param_dict["attr"]
+        sample     = param_dict["sample"]
+        raw_buf    = param_dict["raw_buf"]
+        comm       = param_dict["comm"]
+        name       = param_dict["ev_name"]
+
+        # Symbol and dso info are not always resolved
+        if (param_dict.has_key("dso")):
+                dso = param_dict["dso"]
+        else:
+                dso = "Unknown_dso"
+
+        if (param_dict.has_key("symbol")):
+                symbol = param_dict["symbol"]
+        else:
+                symbol = "Unknown_symbol"
+
+        # Create the event object and insert it to the right table in database
+        event = create_event(name, comm, dso, symbol, raw_buf)
+        insert_db(event)
+
+def insert_db(event):
+        if event.ev_type == EVTYPE_GENERIC:
+                con.execute("insert into gen_events values(?, ?, ?, ?)",
+                                (event.name, event.symbol, event.comm, event.dso))
+        elif event.ev_type == EVTYPE_PEBS_LL:
+                event.ip &= 0x7fffffffffffffff
+                event.dla &= 0x7fffffffffffffff
+                con.execute("insert into pebs_ll values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                        (event.name, event.symbol, event.comm, event.dso, event.flags,
+                                event.ip, event.status, event.dse, event.dla, event.lat))
+
+def trace_end():
+	print "In trace_end:\n"
+        # We show the basic info for the 2 type of event classes
+        show_general_events()
+        show_pebs_ll()
+        con.close()
+
+#
+# As the event number may be very big, so we can't use linear way
+# to show the histogram in real number, but use a log2 algorithm.
+#
+
+def num2sym(num):
+        # Each number will have at least one '#'
+        snum = '#' * (int)(math.log(num, 2) + 1)
+        return snum
+
+def show_general_events():
+
+        # Check the total record number in the table
+        count = con.execute("select count(*) from gen_events")
+        for t in count:
+                print "There is %d records in gen_events table" % t[0]
+                if t[0] == 0:
+                        return
+
+        print "Statistics about the general events grouped by thread/symbol/dso: \n"
+
+         # Group by thread
+        commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)")
+        print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42)
+        for row in commq:
+             print "%16s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+        # Group by symbol
+        print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58)
+        symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)")
+        for row in symbolq:
+             print "%32s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+        # Group by dso
+        print "\n%40s %8s %16s\n%s" % ("dso", "number", "histogram", "="*74)
+        dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)")
+        for row in dsoq:
+             print "%40s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+#
+# This function just shows the basic info, and we could do more with the
+# data in the tables, like checking the function parameters when some
+# big latency events happen.
+#
+def show_pebs_ll():
+
+        count = con.execute("select count(*) from pebs_ll")
+        for t in count:
+                print "There is %d records in pebs_ll table" % t[0]
+                if t[0] == 0:
+                        return
+
+        print "Statistics about the PEBS Load Latency events grouped by thread/symbol/dse/latency: \n"
+
+        # Group by thread
+        commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)")
+        print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42)
+        for row in commq:
+             print "%16s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+        # Group by symbol
+        print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58)
+        symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)")
+        for row in symbolq:
+             print "%32s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+        # Group by dse
+        dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)")
+        print "\n%32s %8s %16s\n%s" % ("dse", "number", "histogram", "="*58)
+        for row in dseq:
+             print "%32s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+        # Group by latency
+        latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat")
+        print "\n%32s %8s %16s\n%s" % ("latency", "number", "histogram", "="*58)
+        for row in latq:
+             print "%32s %8d     %s" % (row[0], row[1], num2sym(row[1]))
+
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
diff --git a/scripts/python/export-to-postgresql.py b/scripts/python/export-to-postgresql.py
new file mode 100644
index 0000000..efcaf6c
--- /dev/null
+++ b/scripts/python/export-to-postgresql.py
@@ -0,0 +1,725 @@
+# export-to-postgresql.py: export perf data to a postgresql database
+# Copyright (c) 2014, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+import datetime
+
+# To use this script you will need to have installed package python-pyside which
+# provides LGPL-licensed Python bindings for Qt.  You will also need the package
+# libqt4-sql-psql for Qt postgresql support.
+#
+# The script assumes postgresql is running on the local machine and that the
+# user has postgresql permissions to create databases. Examples of installing
+# postgresql and adding such a user are:
+#
+# fedora:
+#
+#	$ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
+#	$ sudo su - postgres -c initdb
+#	$ sudo service postgresql start
+#	$ sudo su - postgres
+#	$ createuser <your user id here>
+#	Shall the new role be a superuser? (y/n) y
+#
+# ubuntu:
+#
+#	$ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
+#	$ sudo su - postgres
+#	$ createuser -s <your user id here>
+#
+# An example of using this script with Intel PT:
+#
+#	$ perf record -e intel_pt//u ls
+#	$ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls
+#	2015-05-29 12:49:23.464364 Creating database...
+#	2015-05-29 12:49:26.281717 Writing to intermediate files...
+#	2015-05-29 12:49:27.190383 Copying to database...
+#	2015-05-29 12:49:28.140451 Removing intermediate files...
+#	2015-05-29 12:49:28.147451 Adding primary keys
+#	2015-05-29 12:49:28.655683 Adding foreign keys
+#	2015-05-29 12:49:29.365350 Done
+#
+# To browse the database, psql can be used e.g.
+#
+#	$ psql pt_example
+#	pt_example=# select * from samples_view where id < 100;
+#	pt_example=# \d+
+#	pt_example=# \d+ samples_view
+#	pt_example=# \q
+#
+# An example of using the database is provided by the script
+# call-graph-from-sql.py.  Refer to that script for details.
+#
+# Tables:
+#
+#	The tables largely correspond to perf tools' data structures.  They are largely self-explanatory.
+#
+#	samples
+#
+#		'samples' is the main table. It represents what instruction was executing at a point in time
+#		when something (a selected event) happened.  The memory address is the instruction pointer or 'ip'.
+#
+#	calls
+#
+#		'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'.
+#		'calls' is only created when the 'calls' option to this script is specified.
+#
+#	call_paths
+#
+#		'call_paths' represents all the call stacks.  Each 'call' has an associated record in 'call_paths'.
+#		'calls_paths' is only created when the 'calls' option to this script is specified.
+#
+#	branch_types
+#
+#		'branch_types' provides descriptions for each type of branch.
+#
+#	comm_threads
+#
+#		'comm_threads' shows how 'comms' relates to 'threads'.
+#
+#	comms
+#
+#		'comms' contains a record for each 'comm' - the name given to the executable that is running.
+#
+#	dsos
+#
+#		'dsos' contains a record for each executable file or library.
+#
+#	machines
+#
+#		'machines' can be used to distinguish virtual machines if virtualization is supported.
+#
+#	selected_events
+#
+#		'selected_events' contains a record for each kind of event that has been sampled.
+#
+#	symbols
+#
+#		'symbols' contains a record for each symbol.  Only symbols that have samples are present.
+#
+#	threads
+#
+#		'threads' contains a record for each thread.
+#
+# Views:
+#
+#	Most of the tables have views for more friendly display.  The views are:
+#
+#		calls_view
+#		call_paths_view
+#		comm_threads_view
+#		dsos_view
+#		machines_view
+#		samples_view
+#		symbols_view
+#		threads_view
+#
+# More examples of browsing the database with psql:
+#   Note that some of the examples are not the most optimal SQL query.
+#   Note that call information is only available if the script's 'calls' option has been used.
+#
+#	Top 10 function calls (not aggregated by symbol):
+#
+#		SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10;
+#
+#	Top 10 function calls (aggregated by symbol):
+#
+#		SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,
+#			SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count
+#			FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10;
+#
+#		Note that the branch count gives a rough estimation of cpu usage, so functions
+#		that took a long time but have a relatively low branch count must have spent time
+#		waiting.
+#
+#	Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'):
+#
+#		SELECT * FROM symbols_view WHERE name LIKE '%alloc%';
+#
+#	Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187):
+#
+#		SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10;
+#
+#	Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254):
+#
+#		SELECT * FROM calls_view WHERE parent_call_path_id = 254;
+#
+#	Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670)
+#
+#		SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%';
+#
+#	Show transactions:
+#
+#		SELECT * FROM samples_view WHERE event = 'transactions';
+#
+#		Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false.
+#		Transaction aborts have branch_type_name 'transaction abort'
+#
+#	Show transaction aborts:
+#
+#		SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort';
+#
+# To print a call stack requires walking the call_paths table.  For example this python script:
+#   #!/usr/bin/python2
+#
+#   import sys
+#   from PySide.QtSql import *
+#
+#   if __name__ == '__main__':
+#           if (len(sys.argv) < 3):
+#                   print >> sys.stderr, "Usage is: printcallstack.py <database name> <call_path_id>"
+#                   raise Exception("Too few arguments")
+#           dbname = sys.argv[1]
+#           call_path_id = sys.argv[2]
+#           db = QSqlDatabase.addDatabase('QPSQL')
+#           db.setDatabaseName(dbname)
+#           if not db.open():
+#                   raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+#           query = QSqlQuery(db)
+#           print "    id          ip  symbol_id  symbol                          dso_id  dso_short_name"
+#           while call_path_id != 0 and call_path_id != 1:
+#                   ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id))
+#                   if not ret:
+#                           raise Exception("Query failed: " + query.lastError().text())
+#                   if not query.next():
+#                           raise Exception("Query failed")
+#                   print "{0:>6}  {1:>10}  {2:>9}  {3:<30}  {4:>6}  {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5))
+#                   call_path_id = query.value(6)
+
+from PySide.QtSql import *
+
+# Need to access PostgreSQL C library directly to use COPY FROM STDIN
+from ctypes import *
+libpq = CDLL("libpq.so.5")
+PQconnectdb = libpq.PQconnectdb
+PQconnectdb.restype = c_void_p
+PQfinish = libpq.PQfinish
+PQstatus = libpq.PQstatus
+PQexec = libpq.PQexec
+PQexec.restype = c_void_p
+PQresultStatus = libpq.PQresultStatus
+PQputCopyData = libpq.PQputCopyData
+PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
+PQputCopyEnd = libpq.PQputCopyEnd
+PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+perf_db_export_mode = True
+perf_db_export_calls = False
+perf_db_export_callchains = False
+
+
+def usage():
+	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
+	print >> sys.stderr, "where:	columns		'all' or 'branches'"
+	print >> sys.stderr, "		calls		'calls' => create calls and call_paths table"
+	print >> sys.stderr, "		callchains	'callchains' => create call_paths table"
+	raise Exception("Too few arguments")
+
+if (len(sys.argv) < 2):
+	usage()
+
+dbname = sys.argv[1]
+
+if (len(sys.argv) >= 3):
+	columns = sys.argv[2]
+else:
+	columns = "all"
+
+if columns not in ("all", "branches"):
+	usage()
+
+branches = (columns == "branches")
+
+for i in range(3,len(sys.argv)):
+	if (sys.argv[i] == "calls"):
+		perf_db_export_calls = True
+	elif (sys.argv[i] == "callchains"):
+		perf_db_export_callchains = True
+	else:
+		usage()
+
+output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
+os.mkdir(output_dir_name)
+
+def do_query(q, s):
+	if (q.exec_(s)):
+		return
+	raise Exception("Query failed: " + q.lastError().text())
+
+print datetime.datetime.today(), "Creating database..."
+
+db = QSqlDatabase.addDatabase('QPSQL')
+query = QSqlQuery(db)
+db.setDatabaseName('postgres')
+db.open()
+try:
+	do_query(query, 'CREATE DATABASE ' + dbname)
+except:
+	os.rmdir(output_dir_name)
+	raise
+query.finish()
+query.clear()
+db.close()
+
+db.setDatabaseName(dbname)
+db.open()
+
+query = QSqlQuery(db)
+do_query(query, 'SET client_min_messages TO WARNING')
+
+do_query(query, 'CREATE TABLE selected_events ('
+		'id		bigint		NOT NULL,'
+		'name		varchar(80))')
+do_query(query, 'CREATE TABLE machines ('
+		'id		bigint		NOT NULL,'
+		'pid		integer,'
+		'root_dir 	varchar(4096))')
+do_query(query, 'CREATE TABLE threads ('
+		'id		bigint		NOT NULL,'
+		'machine_id	bigint,'
+		'process_id	bigint,'
+		'pid		integer,'
+		'tid		integer)')
+do_query(query, 'CREATE TABLE comms ('
+		'id		bigint		NOT NULL,'
+		'comm		varchar(16))')
+do_query(query, 'CREATE TABLE comm_threads ('
+		'id		bigint		NOT NULL,'
+		'comm_id	bigint,'
+		'thread_id	bigint)')
+do_query(query, 'CREATE TABLE dsos ('
+		'id		bigint		NOT NULL,'
+		'machine_id	bigint,'
+		'short_name	varchar(256),'
+		'long_name	varchar(4096),'
+		'build_id	varchar(64))')
+do_query(query, 'CREATE TABLE symbols ('
+		'id		bigint		NOT NULL,'
+		'dso_id		bigint,'
+		'sym_start	bigint,'
+		'sym_end	bigint,'
+		'binding	integer,'
+		'name		varchar(2048))')
+do_query(query, 'CREATE TABLE branch_types ('
+		'id		integer		NOT NULL,'
+		'name		varchar(80))')
+
+if branches:
+	do_query(query, 'CREATE TABLE samples ('
+		'id		bigint		NOT NULL,'
+		'evsel_id	bigint,'
+		'machine_id	bigint,'
+		'thread_id	bigint,'
+		'comm_id	bigint,'
+		'dso_id		bigint,'
+		'symbol_id	bigint,'
+		'sym_offset	bigint,'
+		'ip		bigint,'
+		'time		bigint,'
+		'cpu		integer,'
+		'to_dso_id	bigint,'
+		'to_symbol_id	bigint,'
+		'to_sym_offset	bigint,'
+		'to_ip		bigint,'
+		'branch_type	integer,'
+		'in_tx		boolean,'
+		'call_path_id	bigint)')
+else:
+	do_query(query, 'CREATE TABLE samples ('
+		'id		bigint		NOT NULL,'
+		'evsel_id	bigint,'
+		'machine_id	bigint,'
+		'thread_id	bigint,'
+		'comm_id	bigint,'
+		'dso_id		bigint,'
+		'symbol_id	bigint,'
+		'sym_offset	bigint,'
+		'ip		bigint,'
+		'time		bigint,'
+		'cpu		integer,'
+		'to_dso_id	bigint,'
+		'to_symbol_id	bigint,'
+		'to_sym_offset	bigint,'
+		'to_ip		bigint,'
+		'period		bigint,'
+		'weight		bigint,'
+		'transaction	bigint,'
+		'data_src	bigint,'
+		'branch_type	integer,'
+		'in_tx		boolean,'
+		'call_path_id	bigint)')
+
+if perf_db_export_calls or perf_db_export_callchains:
+	do_query(query, 'CREATE TABLE call_paths ('
+		'id		bigint		NOT NULL,'
+		'parent_id	bigint,'
+		'symbol_id	bigint,'
+		'ip		bigint)')
+if perf_db_export_calls:
+	do_query(query, 'CREATE TABLE calls ('
+		'id		bigint		NOT NULL,'
+		'thread_id	bigint,'
+		'comm_id	bigint,'
+		'call_path_id	bigint,'
+		'call_time	bigint,'
+		'return_time	bigint,'
+		'branch_count	bigint,'
+		'call_id	bigint,'
+		'return_id	bigint,'
+		'parent_call_path_id	bigint,'
+		'flags		integer)')
+
+do_query(query, 'CREATE VIEW machines_view AS '
+	'SELECT '
+		'id,'
+		'pid,'
+		'root_dir,'
+		'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest'
+	' FROM machines')
+
+do_query(query, 'CREATE VIEW dsos_view AS '
+	'SELECT '
+		'id,'
+		'machine_id,'
+		'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+		'short_name,'
+		'long_name,'
+		'build_id'
+	' FROM dsos')
+
+do_query(query, 'CREATE VIEW symbols_view AS '
+	'SELECT '
+		'id,'
+		'name,'
+		'(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,'
+		'dso_id,'
+		'sym_start,'
+		'sym_end,'
+		'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding'
+	' FROM symbols')
+
+do_query(query, 'CREATE VIEW threads_view AS '
+	'SELECT '
+		'id,'
+		'machine_id,'
+		'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+		'process_id,'
+		'pid,'
+		'tid'
+	' FROM threads')
+
+do_query(query, 'CREATE VIEW comm_threads_view AS '
+	'SELECT '
+		'comm_id,'
+		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+		'thread_id,'
+		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+		'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
+	' FROM comm_threads')
+
+if perf_db_export_calls or perf_db_export_callchains:
+	do_query(query, 'CREATE VIEW call_paths_view AS '
+		'SELECT '
+			'c.id,'
+			'to_hex(c.ip) AS ip,'
+			'c.symbol_id,'
+			'(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,'
+			'(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,'
+			'(SELECT dso FROM symbols_view  WHERE id = c.symbol_id) AS dso_short_name,'
+			'c.parent_id,'
+			'to_hex(p.ip) AS parent_ip,'
+			'p.symbol_id AS parent_symbol_id,'
+			'(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,'
+			'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
+			'(SELECT dso FROM symbols_view  WHERE id = p.symbol_id) AS parent_dso_short_name'
+		' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
+	do_query(query, 'CREATE VIEW calls_view AS '
+		'SELECT '
+			'calls.id,'
+			'thread_id,'
+			'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+			'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+			'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+			'call_path_id,'
+			'to_hex(ip) AS ip,'
+			'symbol_id,'
+			'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+			'call_time,'
+			'return_time,'
+			'return_time - call_time AS elapsed_time,'
+			'branch_count,'
+			'call_id,'
+			'return_id,'
+			'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,'
+			'parent_call_path_id'
+		' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
+
+do_query(query, 'CREATE VIEW samples_view AS '
+	'SELECT '
+		'id,'
+		'time,'
+		'cpu,'
+		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+		'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+		'(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
+		'to_hex(ip) AS ip_hex,'
+		'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+		'sym_offset,'
+		'(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
+		'to_hex(to_ip) AS to_ip_hex,'
+		'(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
+		'to_sym_offset,'
+		'(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
+		'(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
+		'in_tx'
+	' FROM samples')
+
+
+file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
+file_trailer = "\377\377"
+
+def open_output_file(file_name):
+	path_name = output_dir_name + "/" + file_name
+	file = open(path_name, "w+")
+	file.write(file_header)
+	return file
+
+def close_output_file(file):
+	file.write(file_trailer)
+	file.close()
+
+def copy_output_file_direct(file, table_name):
+	close_output_file(file)
+	sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
+	do_query(query, sql)
+
+# Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
+def copy_output_file(file, table_name):
+	conn = PQconnectdb("dbname = " + dbname)
+	if (PQstatus(conn)):
+		raise Exception("COPY FROM STDIN PQconnectdb failed")
+	file.write(file_trailer)
+	file.seek(0)
+	sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
+	res = PQexec(conn, sql)
+	if (PQresultStatus(res) != 4):
+		raise Exception("COPY FROM STDIN PQexec failed")
+	data = file.read(65536)
+	while (len(data)):
+		ret = PQputCopyData(conn, data, len(data))
+		if (ret != 1):
+			raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
+		data = file.read(65536)
+	ret = PQputCopyEnd(conn, None)
+	if (ret != 1):
+		raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
+	PQfinish(conn)
+
+def remove_output_file(file):
+	name = file.name
+	file.close()
+	os.unlink(name)
+
+evsel_file		= open_output_file("evsel_table.bin")
+machine_file		= open_output_file("machine_table.bin")
+thread_file		= open_output_file("thread_table.bin")
+comm_file		= open_output_file("comm_table.bin")
+comm_thread_file	= open_output_file("comm_thread_table.bin")
+dso_file		= open_output_file("dso_table.bin")
+symbol_file		= open_output_file("symbol_table.bin")
+branch_type_file	= open_output_file("branch_type_table.bin")
+sample_file		= open_output_file("sample_table.bin")
+if perf_db_export_calls or perf_db_export_callchains:
+	call_path_file		= open_output_file("call_path_table.bin")
+if perf_db_export_calls:
+	call_file		= open_output_file("call_table.bin")
+
+def trace_begin():
+	print datetime.datetime.today(), "Writing to intermediate files..."
+	# id == 0 means unknown.  It is easier to create records for them than replace the zeroes with NULLs
+	evsel_table(0, "unknown")
+	machine_table(0, 0, "unknown")
+	thread_table(0, 0, 0, -1, -1)
+	comm_table(0, "unknown")
+	dso_table(0, 0, "unknown", "unknown", "")
+	symbol_table(0, 0, 0, 0, 0, "unknown")
+	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+	if perf_db_export_calls or perf_db_export_callchains:
+		call_path_table(0, 0, 0, 0)
+
+unhandled_count = 0
+
+def trace_end():
+	print datetime.datetime.today(), "Copying to database..."
+	copy_output_file(evsel_file,		"selected_events")
+	copy_output_file(machine_file,		"machines")
+	copy_output_file(thread_file,		"threads")
+	copy_output_file(comm_file,		"comms")
+	copy_output_file(comm_thread_file,	"comm_threads")
+	copy_output_file(dso_file,		"dsos")
+	copy_output_file(symbol_file,		"symbols")
+	copy_output_file(branch_type_file,	"branch_types")
+	copy_output_file(sample_file,		"samples")
+	if perf_db_export_calls or perf_db_export_callchains:
+		copy_output_file(call_path_file,	"call_paths")
+	if perf_db_export_calls:
+		copy_output_file(call_file,		"calls")
+
+	print datetime.datetime.today(), "Removing intermediate files..."
+	remove_output_file(evsel_file)
+	remove_output_file(machine_file)
+	remove_output_file(thread_file)
+	remove_output_file(comm_file)
+	remove_output_file(comm_thread_file)
+	remove_output_file(dso_file)
+	remove_output_file(symbol_file)
+	remove_output_file(branch_type_file)
+	remove_output_file(sample_file)
+	if perf_db_export_calls or perf_db_export_callchains:
+		remove_output_file(call_path_file)
+	if perf_db_export_calls:
+		remove_output_file(call_file)
+	os.rmdir(output_dir_name)
+	print datetime.datetime.today(), "Adding primary keys"
+	do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE machines        ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE threads         ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE comms           ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE comm_threads    ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE dsos            ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE symbols         ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE branch_types    ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE samples         ADD PRIMARY KEY (id)')
+	if perf_db_export_calls or perf_db_export_callchains:
+		do_query(query, 'ALTER TABLE call_paths      ADD PRIMARY KEY (id)')
+	if perf_db_export_calls:
+		do_query(query, 'ALTER TABLE calls           ADD PRIMARY KEY (id)')
+
+	print datetime.datetime.today(), "Adding foreign keys"
+	do_query(query, 'ALTER TABLE threads '
+					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
+					'ADD CONSTRAINT processfk  FOREIGN KEY (process_id)   REFERENCES threads    (id)')
+	do_query(query, 'ALTER TABLE comm_threads '
+					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
+					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id)')
+	do_query(query, 'ALTER TABLE dsos '
+					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id)')
+	do_query(query, 'ALTER TABLE symbols '
+					'ADD CONSTRAINT dsofk      FOREIGN KEY (dso_id)       REFERENCES dsos       (id)')
+	do_query(query, 'ALTER TABLE samples '
+					'ADD CONSTRAINT evselfk    FOREIGN KEY (evsel_id)     REFERENCES selected_events (id),'
+					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
+					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
+					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
+					'ADD CONSTRAINT dsofk      FOREIGN KEY (dso_id)       REFERENCES dsos       (id),'
+					'ADD CONSTRAINT symbolfk   FOREIGN KEY (symbol_id)    REFERENCES symbols    (id),'
+					'ADD CONSTRAINT todsofk    FOREIGN KEY (to_dso_id)    REFERENCES dsos       (id),'
+					'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols    (id)')
+	if perf_db_export_calls or perf_db_export_callchains:
+		do_query(query, 'ALTER TABLE call_paths '
+					'ADD CONSTRAINT parentfk    FOREIGN KEY (parent_id)    REFERENCES call_paths (id),'
+					'ADD CONSTRAINT symbolfk    FOREIGN KEY (symbol_id)    REFERENCES symbols    (id)')
+	if perf_db_export_calls:
+		do_query(query, 'ALTER TABLE calls '
+					'ADD CONSTRAINT threadfk    FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
+					'ADD CONSTRAINT commfk      FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
+					'ADD CONSTRAINT call_pathfk FOREIGN KEY (call_path_id) REFERENCES call_paths (id),'
+					'ADD CONSTRAINT callfk      FOREIGN KEY (call_id)      REFERENCES samples    (id),'
+					'ADD CONSTRAINT returnfk    FOREIGN KEY (return_id)    REFERENCES samples    (id),'
+					'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
+		do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
+
+	if (unhandled_count):
+		print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
+	print datetime.datetime.today(), "Done"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+	global unhandled_count
+	unhandled_count += 1
+
+def sched__sched_switch(*x):
+	pass
+
+def evsel_table(evsel_id, evsel_name, *x):
+	n = len(evsel_name)
+	fmt = "!hiqi" + str(n) + "s"
+	value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
+	evsel_file.write(value)
+
+def machine_table(machine_id, pid, root_dir, *x):
+	n = len(root_dir)
+	fmt = "!hiqiii" + str(n) + "s"
+	value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
+	machine_file.write(value)
+
+def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
+	value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
+	thread_file.write(value)
+
+def comm_table(comm_id, comm_str, *x):
+	n = len(comm_str)
+	fmt = "!hiqi" + str(n) + "s"
+	value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
+	comm_file.write(value)
+
+def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
+	fmt = "!hiqiqiq"
+	value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
+	comm_thread_file.write(value)
+
+def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
+	n1 = len(short_name)
+	n2 = len(long_name)
+	n3 = len(build_id)
+	fmt = "!hiqiqi" + str(n1) + "si"  + str(n2) + "si" + str(n3) + "s"
+	value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
+	dso_file.write(value)
+
+def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
+	n = len(symbol_name)
+	fmt = "!hiqiqiqiqiii" + str(n) + "s"
+	value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
+	symbol_file.write(value)
+
+def branch_type_table(branch_type, name, *x):
+	n = len(name)
+	fmt = "!hiii" + str(n) + "s"
+	value = struct.pack(fmt, 2, 4, branch_type, n, name)
+	branch_type_file.write(value)
+
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
+	if branches:
+		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
+	else:
+		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
+	sample_file.write(value)
+
+def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
+	fmt = "!hiqiqiqiq"
+	value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
+	call_path_file.write(value)
+
+def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x):
+	fmt = "!hiqiqiqiqiqiqiqiqiqiqii"
+	value = struct.pack(fmt, 11, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags)
+	call_file.write(value)
diff --git a/scripts/python/export-to-sqlite.py b/scripts/python/export-to-sqlite.py
new file mode 100644
index 0000000..f827bf7
--- /dev/null
+++ b/scripts/python/export-to-sqlite.py
@@ -0,0 +1,451 @@
+# export-to-sqlite.py: export perf data to a sqlite3 database
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+import datetime
+
+# To use this script you will need to have installed package python-pyside which
+# provides LGPL-licensed Python bindings for Qt.  You will also need the package
+# libqt4-sql-sqlite for Qt sqlite3 support.
+#
+# An example of using this script with Intel PT:
+#
+#	$ perf record -e intel_pt//u ls
+#	$ perf script -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py pt_example branches calls
+#	2017-07-31 14:26:07.326913 Creating database...
+#	2017-07-31 14:26:07.538097 Writing records...
+#	2017-07-31 14:26:09.889292 Adding indexes
+#	2017-07-31 14:26:09.958746 Done
+#
+# To browse the database, sqlite3 can be used e.g.
+#
+#	$ sqlite3 pt_example
+#	sqlite> .header on
+#	sqlite> select * from samples_view where id < 10;
+#	sqlite> .mode column
+#	sqlite> select * from samples_view where id < 10;
+#	sqlite> .tables
+#	sqlite> .schema samples_view
+#	sqlite> .quit
+#
+# An example of using the database is provided by the script
+# call-graph-from-sql.py.  Refer to that script for details.
+#
+# The database structure is practically the same as created by the script
+# export-to-postgresql.py. Refer to that script for details.  A notable
+# difference is  the 'transaction' column of the 'samples' table which is
+# renamed 'transaction_' in sqlite because 'transaction' is a reserved word.
+
+from PySide.QtSql import *
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+perf_db_export_mode = True
+perf_db_export_calls = False
+perf_db_export_callchains = False
+
+def usage():
+	print >> sys.stderr, "Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"
+	print >> sys.stderr, "where:	columns		'all' or 'branches'"
+	print >> sys.stderr, "		calls		'calls' => create calls and call_paths table"
+	print >> sys.stderr, "		callchains	'callchains' => create call_paths table"
+	raise Exception("Too few arguments")
+
+if (len(sys.argv) < 2):
+	usage()
+
+dbname = sys.argv[1]
+
+if (len(sys.argv) >= 3):
+	columns = sys.argv[2]
+else:
+	columns = "all"
+
+if columns not in ("all", "branches"):
+	usage()
+
+branches = (columns == "branches")
+
+for i in range(3,len(sys.argv)):
+	if (sys.argv[i] == "calls"):
+		perf_db_export_calls = True
+	elif (sys.argv[i] == "callchains"):
+		perf_db_export_callchains = True
+	else:
+		usage()
+
+def do_query(q, s):
+	if (q.exec_(s)):
+		return
+	raise Exception("Query failed: " + q.lastError().text())
+
+def do_query_(q):
+	if (q.exec_()):
+		return
+	raise Exception("Query failed: " + q.lastError().text())
+
+print datetime.datetime.today(), "Creating database..."
+
+db_exists = False
+try:
+	f = open(dbname)
+	f.close()
+	db_exists = True
+except:
+	pass
+
+if db_exists:
+	raise Exception(dbname + " already exists")
+
+db = QSqlDatabase.addDatabase('QSQLITE')
+db.setDatabaseName(dbname)
+db.open()
+
+query = QSqlQuery(db)
+
+do_query(query, 'PRAGMA journal_mode = OFF')
+do_query(query, 'BEGIN TRANSACTION')
+
+do_query(query, 'CREATE TABLE selected_events ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'name		varchar(80))')
+do_query(query, 'CREATE TABLE machines ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'pid		integer,'
+		'root_dir 	varchar(4096))')
+do_query(query, 'CREATE TABLE threads ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'machine_id	bigint,'
+		'process_id	bigint,'
+		'pid		integer,'
+		'tid		integer)')
+do_query(query, 'CREATE TABLE comms ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'comm		varchar(16))')
+do_query(query, 'CREATE TABLE comm_threads ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'comm_id	bigint,'
+		'thread_id	bigint)')
+do_query(query, 'CREATE TABLE dsos ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'machine_id	bigint,'
+		'short_name	varchar(256),'
+		'long_name	varchar(4096),'
+		'build_id	varchar(64))')
+do_query(query, 'CREATE TABLE symbols ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'dso_id		bigint,'
+		'sym_start	bigint,'
+		'sym_end	bigint,'
+		'binding	integer,'
+		'name		varchar(2048))')
+do_query(query, 'CREATE TABLE branch_types ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'name		varchar(80))')
+
+if branches:
+	do_query(query, 'CREATE TABLE samples ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'evsel_id	bigint,'
+		'machine_id	bigint,'
+		'thread_id	bigint,'
+		'comm_id	bigint,'
+		'dso_id		bigint,'
+		'symbol_id	bigint,'
+		'sym_offset	bigint,'
+		'ip		bigint,'
+		'time		bigint,'
+		'cpu		integer,'
+		'to_dso_id	bigint,'
+		'to_symbol_id	bigint,'
+		'to_sym_offset	bigint,'
+		'to_ip		bigint,'
+		'branch_type	integer,'
+		'in_tx		boolean,'
+		'call_path_id	bigint)')
+else:
+	do_query(query, 'CREATE TABLE samples ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'evsel_id	bigint,'
+		'machine_id	bigint,'
+		'thread_id	bigint,'
+		'comm_id	bigint,'
+		'dso_id		bigint,'
+		'symbol_id	bigint,'
+		'sym_offset	bigint,'
+		'ip		bigint,'
+		'time		bigint,'
+		'cpu		integer,'
+		'to_dso_id	bigint,'
+		'to_symbol_id	bigint,'
+		'to_sym_offset	bigint,'
+		'to_ip		bigint,'
+		'period		bigint,'
+		'weight		bigint,'
+		'transaction_	bigint,'
+		'data_src	bigint,'
+		'branch_type	integer,'
+		'in_tx		boolean,'
+		'call_path_id	bigint)')
+
+if perf_db_export_calls or perf_db_export_callchains:
+	do_query(query, 'CREATE TABLE call_paths ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'parent_id	bigint,'
+		'symbol_id	bigint,'
+		'ip		bigint)')
+if perf_db_export_calls:
+	do_query(query, 'CREATE TABLE calls ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'thread_id	bigint,'
+		'comm_id	bigint,'
+		'call_path_id	bigint,'
+		'call_time	bigint,'
+		'return_time	bigint,'
+		'branch_count	bigint,'
+		'call_id	bigint,'
+		'return_id	bigint,'
+		'parent_call_path_id	bigint,'
+		'flags		integer)')
+
+# printf was added to sqlite in version 3.8.3
+sqlite_has_printf = False
+try:
+	do_query(query, 'SELECT printf("") FROM machines')
+	sqlite_has_printf = True
+except:
+	pass
+
+def emit_to_hex(x):
+	if sqlite_has_printf:
+		return 'printf("%x", ' + x + ')'
+	else:
+		return x
+
+do_query(query, 'CREATE VIEW machines_view AS '
+	'SELECT '
+		'id,'
+		'pid,'
+		'root_dir,'
+		'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest'
+	' FROM machines')
+
+do_query(query, 'CREATE VIEW dsos_view AS '
+	'SELECT '
+		'id,'
+		'machine_id,'
+		'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+		'short_name,'
+		'long_name,'
+		'build_id'
+	' FROM dsos')
+
+do_query(query, 'CREATE VIEW symbols_view AS '
+	'SELECT '
+		'id,'
+		'name,'
+		'(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,'
+		'dso_id,'
+		'sym_start,'
+		'sym_end,'
+		'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding'
+	' FROM symbols')
+
+do_query(query, 'CREATE VIEW threads_view AS '
+	'SELECT '
+		'id,'
+		'machine_id,'
+		'(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+		'process_id,'
+		'pid,'
+		'tid'
+	' FROM threads')
+
+do_query(query, 'CREATE VIEW comm_threads_view AS '
+	'SELECT '
+		'comm_id,'
+		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+		'thread_id,'
+		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+		'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
+	' FROM comm_threads')
+
+if perf_db_export_calls or perf_db_export_callchains:
+	do_query(query, 'CREATE VIEW call_paths_view AS '
+		'SELECT '
+			'c.id,'
+			+ emit_to_hex('c.ip') + ' AS ip,'
+			'c.symbol_id,'
+			'(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,'
+			'(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,'
+			'(SELECT dso FROM symbols_view  WHERE id = c.symbol_id) AS dso_short_name,'
+			'c.parent_id,'
+			+ emit_to_hex('p.ip') + ' AS parent_ip,'
+			'p.symbol_id AS parent_symbol_id,'
+			'(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,'
+			'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
+			'(SELECT dso FROM symbols_view  WHERE id = p.symbol_id) AS parent_dso_short_name'
+		' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
+	do_query(query, 'CREATE VIEW calls_view AS '
+		'SELECT '
+			'calls.id,'
+			'thread_id,'
+			'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+			'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+			'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+			'call_path_id,'
+			+ emit_to_hex('ip') + ' AS ip,'
+			'symbol_id,'
+			'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+			'call_time,'
+			'return_time,'
+			'return_time - call_time AS elapsed_time,'
+			'branch_count,'
+			'call_id,'
+			'return_id,'
+			'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,'
+			'parent_call_path_id'
+		' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
+
+do_query(query, 'CREATE VIEW samples_view AS '
+	'SELECT '
+		'id,'
+		'time,'
+		'cpu,'
+		'(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+		'(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+		'(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+		'(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
+		+ emit_to_hex('ip') + ' AS ip_hex,'
+		'(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+		'sym_offset,'
+		'(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
+		+ emit_to_hex('to_ip') + ' AS to_ip_hex,'
+		'(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
+		'to_sym_offset,'
+		'(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
+		'(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
+		'in_tx'
+	' FROM samples')
+
+do_query(query, 'END TRANSACTION')
+
+evsel_query = QSqlQuery(db)
+evsel_query.prepare("INSERT INTO selected_events VALUES (?, ?)")
+machine_query = QSqlQuery(db)
+machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)")
+thread_query = QSqlQuery(db)
+thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)")
+comm_query = QSqlQuery(db)
+comm_query.prepare("INSERT INTO comms VALUES (?, ?)")
+comm_thread_query = QSqlQuery(db)
+comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)")
+dso_query = QSqlQuery(db)
+dso_query.prepare("INSERT INTO dsos VALUES (?, ?, ?, ?, ?)")
+symbol_query = QSqlQuery(db)
+symbol_query.prepare("INSERT INTO symbols VALUES (?, ?, ?, ?, ?, ?)")
+branch_type_query = QSqlQuery(db)
+branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)")
+sample_query = QSqlQuery(db)
+if branches:
+	sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+else:
+	sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+if perf_db_export_calls or perf_db_export_callchains:
+	call_path_query = QSqlQuery(db)
+	call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)")
+if perf_db_export_calls:
+	call_query = QSqlQuery(db)
+	call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+
+def trace_begin():
+	print datetime.datetime.today(), "Writing records..."
+	do_query(query, 'BEGIN TRANSACTION')
+	# id == 0 means unknown.  It is easier to create records for them than replace the zeroes with NULLs
+	evsel_table(0, "unknown")
+	machine_table(0, 0, "unknown")
+	thread_table(0, 0, 0, -1, -1)
+	comm_table(0, "unknown")
+	dso_table(0, 0, "unknown", "unknown", "")
+	symbol_table(0, 0, 0, 0, 0, "unknown")
+	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+	if perf_db_export_calls or perf_db_export_callchains:
+		call_path_table(0, 0, 0, 0)
+
+unhandled_count = 0
+
+def trace_end():
+	do_query(query, 'END TRANSACTION')
+
+	print datetime.datetime.today(), "Adding indexes"
+	if perf_db_export_calls:
+		do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
+
+	if (unhandled_count):
+		print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
+	print datetime.datetime.today(), "Done"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+	global unhandled_count
+	unhandled_count += 1
+
+def sched__sched_switch(*x):
+	pass
+
+def bind_exec(q, n, x):
+	for xx in x[0:n]:
+		q.addBindValue(str(xx))
+	do_query_(q)
+
+def evsel_table(*x):
+	bind_exec(evsel_query, 2, x)
+
+def machine_table(*x):
+	bind_exec(machine_query, 3, x)
+
+def thread_table(*x):
+	bind_exec(thread_query, 5, x)
+
+def comm_table(*x):
+	bind_exec(comm_query, 2, x)
+
+def comm_thread_table(*x):
+	bind_exec(comm_thread_query, 3, x)
+
+def dso_table(*x):
+	bind_exec(dso_query, 5, x)
+
+def symbol_table(*x):
+	bind_exec(symbol_query, 6, x)
+
+def branch_type_table(*x):
+	bind_exec(branch_type_query, 2, x)
+
+def sample_table(*x):
+	if branches:
+		bind_exec(sample_query, 18, x)
+	else:
+		bind_exec(sample_query, 22, x)
+
+def call_path_table(*x):
+	bind_exec(call_path_query, 4, x)
+
+def call_return_table(*x):
+	bind_exec(call_query, 11, x)
diff --git a/scripts/python/failed-syscalls-by-pid.py b/scripts/python/failed-syscalls-by-pid.py
new file mode 100644
index 0000000..cafeff3
--- /dev/null
+++ b/scripts/python/failed-syscalls-by-pid.py
@@ -0,0 +1,78 @@
+# failed system call counts, by pid
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide failed system call totals, broken down by pid.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
+
+for_comm = None
+for_pid = None
+
+if len(sys.argv) > 2:
+	sys.exit(usage)
+
+if len(sys.argv) > 1:
+	try:
+		for_pid = int(sys.argv[1])
+	except:
+		for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+	print "Press control+C to stop and show the summary"
+
+def trace_end():
+	print_error_totals()
+
+def raw_syscalls__sys_exit(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, id, ret):
+	if (for_comm and common_comm != for_comm) or \
+	   (for_pid  and common_pid  != for_pid ):
+		return
+
+	if ret < 0:
+		try:
+			syscalls[common_comm][common_pid][id][ret] += 1
+		except TypeError:
+			syscalls[common_comm][common_pid][id][ret] = 1
+
+def syscalls__sys_exit(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, ret):
+	raw_syscalls__sys_exit(**locals())
+
+def print_error_totals():
+    if for_comm is not None:
+	    print "\nsyscall errors for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall errors:\n\n",
+
+    print "%-30s  %10s\n" % ("comm [pid]", "count"),
+    print "%-30s  %10s\n" % ("------------------------------", \
+                                 "----------"),
+
+    comm_keys = syscalls.keys()
+    for comm in comm_keys:
+	    pid_keys = syscalls[comm].keys()
+	    for pid in pid_keys:
+		    print "\n%s [%d]\n" % (comm, pid),
+		    id_keys = syscalls[comm][pid].keys()
+		    for id in id_keys:
+			    print "  syscall: %-16s\n" % syscall_name(id),
+			    ret_keys = syscalls[comm][pid][id].keys()
+			    for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k),  reverse = True):
+				    print "    err = %-20s  %10d\n" % (strerror(ret), val),
diff --git a/scripts/python/futex-contention.py b/scripts/python/futex-contention.py
new file mode 100644
index 0000000..0f5cf43
--- /dev/null
+++ b/scripts/python/futex-contention.py
@@ -0,0 +1,50 @@
+# futex contention
+# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Translation of:
+#
+# http://sourceware.org/systemtap/wiki/WSFutexContention
+#
+# to perf python scripting.
+#
+# Measures futex contention
+
+import os, sys
+sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+from Util import *
+
+process_names = {}
+thread_thislock = {}
+thread_blocktime = {}
+
+lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
+process_names = {} # long-lived pid-to-execname mapping
+
+def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
+			      nr, uaddr, op, val, utime, uaddr2, val3):
+	cmd = op & FUTEX_CMD_MASK
+	if cmd != FUTEX_WAIT:
+		return # we don't care about originators of WAKE events
+
+	process_names[tid] = comm
+	thread_thislock[tid] = uaddr
+	thread_blocktime[tid] = nsecs(s, ns)
+
+def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
+			     nr, ret):
+	if thread_blocktime.has_key(tid):
+		elapsed = nsecs(s, ns) - thread_blocktime[tid]
+		add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
+		del thread_blocktime[tid]
+		del thread_thislock[tid]
+
+def trace_begin():
+	print "Press control+C to stop and show the summary"
+
+def trace_end():
+	for (tid, lock) in lock_waits:
+		min, max, avg, count = lock_waits[tid, lock]
+		print "%s[%d] lock %x contended %d times, %d avg ns" % \
+		      (process_names[tid], tid, lock, count, avg)
+
diff --git a/scripts/python/intel-pt-events.py b/scripts/python/intel-pt-events.py
new file mode 100644
index 0000000..b19172d
--- /dev/null
+++ b/scripts/python/intel-pt-events.py
@@ -0,0 +1,128 @@
+# intel-pt-events.py: Print Intel PT Power Events and PTWRITE
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+def trace_begin():
+	print "Intel PT Power Events and PTWRITE"
+
+def trace_end():
+	print "End"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
+
+def print_ptwrite(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	flags = data[0]
+	payload = data[1]
+	exact_ip = flags & 1
+	print "IP: %u payload: %#x" % (exact_ip, payload),
+
+def print_cbr(raw_buf):
+	data = struct.unpack_from("<BBBBII", raw_buf)
+	cbr = data[0]
+	f = (data[4] + 500) / 1000
+	p = ((cbr * 1000 / data[2]) + 5) / 10
+	print "%3u  freq: %4u MHz  (%3u%%)" % (cbr, f, p),
+
+def print_mwait(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	hints = payload & 0xff
+	extensions = (payload >> 32) & 0x3
+	print "hints: %#x extensions: %#x" % (hints, extensions),
+
+def print_pwre(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	hw = (payload >> 7) & 1
+	cstate = (payload >> 12) & 0xf
+	subcstate = (payload >> 8) & 0xf
+	print "hw: %u cstate: %u sub-cstate: %u" % (hw, cstate, subcstate),
+
+def print_exstop(raw_buf):
+	data = struct.unpack_from("<I", raw_buf)
+	flags = data[0]
+	exact_ip = flags & 1
+	print "IP: %u" % (exact_ip),
+
+def print_pwrx(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	deepest_cstate = payload & 0xf
+	last_cstate = (payload >> 4) & 0xf
+	wake_reason = (payload >> 8) & 0xf
+	print "deepest cstate: %u last cstate: %u wake reason: %#x" % (deepest_cstate, last_cstate, wake_reason),
+
+def print_common_start(comm, sample, name):
+	ts = sample["time"]
+	cpu = sample["cpu"]
+	pid = sample["pid"]
+	tid = sample["tid"]
+	print "%16s %5u/%-5u [%03u] %9u.%09u %7s:" % (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000, name),
+
+def print_common_ip(sample, symbol, dso):
+	ip = sample["ip"]
+	print "%16x %s (%s)" % (ip, symbol, dso)
+
+def process_event(param_dict):
+        event_attr = param_dict["attr"]
+        sample     = param_dict["sample"]
+        raw_buf    = param_dict["raw_buf"]
+        comm       = param_dict["comm"]
+        name       = param_dict["ev_name"]
+
+        # Symbol and dso info are not always resolved
+        if (param_dict.has_key("dso")):
+                dso = param_dict["dso"]
+        else:
+                dso = "[unknown]"
+
+        if (param_dict.has_key("symbol")):
+                symbol = param_dict["symbol"]
+        else:
+                symbol = "[unknown]"
+
+	if name == "ptwrite":
+		print_common_start(comm, sample, name)
+		print_ptwrite(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "cbr":
+		print_common_start(comm, sample, name)
+		print_cbr(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "mwait":
+		print_common_start(comm, sample, name)
+		print_mwait(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "pwre":
+		print_common_start(comm, sample, name)
+		print_pwre(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "exstop":
+		print_common_start(comm, sample, name)
+		print_exstop(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "pwrx":
+		print_common_start(comm, sample, name)
+		print_pwrx(raw_buf)
+		print_common_ip(sample, symbol, dso)
diff --git a/scripts/python/mem-phys-addr.py b/scripts/python/mem-phys-addr.py
new file mode 100644
index 0000000..ebee2c5
--- /dev/null
+++ b/scripts/python/mem-phys-addr.py
@@ -0,0 +1,95 @@
+# mem-phys-addr.py: Resolve physical address samples
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2018, Intel Corporation.
+
+from __future__ import division
+import os
+import sys
+import struct
+import re
+import bisect
+import collections
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+#physical address ranges for System RAM
+system_ram = []
+#physical address ranges for Persistent Memory
+pmem = []
+#file object for proc iomem
+f = None
+#Count for each type of memory
+load_mem_type_cnt = collections.Counter()
+#perf event name
+event_name = None
+
+def parse_iomem():
+	global f
+	f = open('/proc/iomem', 'r')
+	for i, j in enumerate(f):
+		m = re.split('-|:',j,2)
+		if m[2].strip() == 'System RAM':
+			system_ram.append(long(m[0], 16))
+			system_ram.append(long(m[1], 16))
+		if m[2].strip() == 'Persistent Memory':
+			pmem.append(long(m[0], 16))
+			pmem.append(long(m[1], 16))
+
+def print_memory_type():
+	print "Event: %s" % (event_name)
+	print "%-40s  %10s  %10s\n" % ("Memory type", "count", "percentage"),
+	print "%-40s  %10s  %10s\n" % ("----------------------------------------", \
+					"-----------", "-----------"),
+	total = sum(load_mem_type_cnt.values())
+	for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
+					key = lambda(k, v): (v, k), reverse = True):
+		print "%-40s  %10d  %10.1f%%\n" % (mem_type, count, 100 * count / total),
+
+def trace_begin():
+	parse_iomem()
+
+def trace_end():
+	print_memory_type()
+	f.close()
+
+def is_system_ram(phys_addr):
+	#/proc/iomem is sorted
+	position = bisect.bisect(system_ram, phys_addr)
+	if position % 2 == 0:
+		return False
+	return True
+
+def is_persistent_mem(phys_addr):
+	position = bisect.bisect(pmem, phys_addr)
+	if position % 2 == 0:
+		return False
+	return True
+
+def find_memory_type(phys_addr):
+	if phys_addr == 0:
+		return "N/A"
+	if is_system_ram(phys_addr):
+		return "System RAM"
+
+	if is_persistent_mem(phys_addr):
+		return "Persistent Memory"
+
+	#slow path, search all
+	f.seek(0, 0)
+	for j in f:
+		m = re.split('-|:',j,2)
+		if long(m[0], 16) <= phys_addr <= long(m[1], 16):
+			return m[2]
+	return "N/A"
+
+def process_event(param_dict):
+	name       = param_dict["ev_name"]
+	sample     = param_dict["sample"]
+	phys_addr  = sample["phys_addr"]
+
+	global event_name
+	if event_name == None:
+		event_name = name
+	load_mem_type_cnt[find_memory_type(phys_addr)] += 1
diff --git a/scripts/python/net_dropmonitor.py b/scripts/python/net_dropmonitor.py
new file mode 100755
index 0000000..a150164
--- /dev/null
+++ b/scripts/python/net_dropmonitor.py
@@ -0,0 +1,76 @@
+# Monitor the system for dropped packets and proudce a report of drop locations and counts
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+		'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+drop_log = {}
+kallsyms = []
+
+def get_kallsyms_table():
+	global kallsyms
+
+	try:
+		f = open("/proc/kallsyms", "r")
+	except:
+		return
+
+	for line in f:
+		loc = int(line.split()[0], 16)
+		name = line.split()[2]
+		kallsyms.append((loc, name))
+	kallsyms.sort()
+
+def get_sym(sloc):
+	loc = int(sloc)
+
+	# Invariant: kallsyms[i][0] <= loc for all 0 <= i <= start
+	#            kallsyms[i][0] > loc for all end <= i < len(kallsyms)
+	start, end = -1, len(kallsyms)
+	while end != start + 1:
+		pivot = (start + end) // 2
+		if loc < kallsyms[pivot][0]:
+			end = pivot
+		else:
+			start = pivot
+
+	# Now (start == -1 or kallsyms[start][0] <= loc)
+	# and (start == len(kallsyms) - 1 or loc < kallsyms[start + 1][0])
+	if start >= 0:
+		symloc, name = kallsyms[start]
+		return (name, loc - symloc)
+	else:
+		return (None, 0)
+
+def print_drop_table():
+	print "%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT")
+	for i in drop_log.keys():
+		(sym, off) = get_sym(i)
+		if sym == None:
+			sym = i
+		print "%25s %25s %25s" % (sym, off, drop_log[i])
+
+
+def trace_begin():
+	print "Starting trace (Ctrl-C to dump results)"
+
+def trace_end():
+	print "Gathering kallsyms data"
+	get_kallsyms_table()
+	print_drop_table()
+
+# called from perf, when it finds a correspoinding event
+def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
+		   skbaddr, location, protocol):
+	slocation = str(location)
+	try:
+		drop_log[slocation] = drop_log[slocation] + 1
+	except:
+		drop_log[slocation] = 1
diff --git a/scripts/python/netdev-times.py b/scripts/python/netdev-times.py
new file mode 100644
index 0000000..9b2050f
--- /dev/null
+++ b/scripts/python/netdev-times.py
@@ -0,0 +1,468 @@
+# Display a process of packets and processed time.
+# SPDX-License-Identifier: GPL-2.0
+# It helps us to investigate networking or network device.
+#
+# options
+# tx: show only tx chart
+# rx: show only rx chart
+# dev=: show only thing related to specified device
+# debug: work with debug mode. It shows buffer status.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+all_event_list = []; # insert all tracepoint event related with this script
+irq_dic = {}; # key is cpu and value is a list which stacks irqs
+              # which raise NET_RX softirq
+net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry
+		 # and a list which stacks receive
+receive_hunk_list = []; # a list which include a sequence of receive events
+rx_skb_list = []; # received packet list for matching
+		       # skb_copy_datagram_iovec
+
+buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and
+		       # tx_xmit_list
+of_count_rx_skb_list = 0; # overflow count
+
+tx_queue_list = []; # list of packets which pass through dev_queue_xmit
+of_count_tx_queue_list = 0; # overflow count
+
+tx_xmit_list = [];  # list of packets which pass through dev_hard_start_xmit
+of_count_tx_xmit_list = 0; # overflow count
+
+tx_free_list = [];  # list of packets which is freed
+
+# options
+show_tx = 0;
+show_rx = 0;
+dev = 0; # store a name of device specified by option "dev="
+debug = 0;
+
+# indices of event_info tuple
+EINFO_IDX_NAME=   0
+EINFO_IDX_CONTEXT=1
+EINFO_IDX_CPU=    2
+EINFO_IDX_TIME=   3
+EINFO_IDX_PID=    4
+EINFO_IDX_COMM=   5
+
+# Calculate a time interval(msec) from src(nsec) to dst(nsec)
+def diff_msec(src, dst):
+	return (dst - src) / 1000000.0
+
+# Display a process of transmitting a packet
+def print_transmit(hunk):
+	if dev != 0 and hunk['dev'].find(dev) < 0:
+		return
+	print "%7s %5d %6d.%06dsec %12.3fmsec      %12.3fmsec" % \
+		(hunk['dev'], hunk['len'],
+		nsecs_secs(hunk['queue_t']),
+		nsecs_nsecs(hunk['queue_t'])/1000,
+		diff_msec(hunk['queue_t'], hunk['xmit_t']),
+		diff_msec(hunk['xmit_t'], hunk['free_t']))
+
+# Format for displaying rx packet processing
+PF_IRQ_ENTRY= "  irq_entry(+%.3fmsec irq=%d:%s)"
+PF_SOFT_ENTRY="  softirq_entry(+%.3fmsec)"
+PF_NAPI_POLL= "  napi_poll_exit(+%.3fmsec %s)"
+PF_JOINT=     "         |"
+PF_WJOINT=    "         |            |"
+PF_NET_RECV=  "         |---netif_receive_skb(+%.3fmsec skb=%x len=%d)"
+PF_NET_RX=    "         |---netif_rx(+%.3fmsec skb=%x)"
+PF_CPY_DGRAM= "         |      skb_copy_datagram_iovec(+%.3fmsec %d:%s)"
+PF_KFREE_SKB= "         |      kfree_skb(+%.3fmsec location=%x)"
+PF_CONS_SKB=  "         |      consume_skb(+%.3fmsec)"
+
+# Display a process of received packets and interrputs associated with
+# a NET_RX softirq
+def print_receive(hunk):
+	show_hunk = 0
+	irq_list = hunk['irq_list']
+	cpu = irq_list[0]['cpu']
+	base_t = irq_list[0]['irq_ent_t']
+	# check if this hunk should be showed
+	if dev != 0:
+		for i in range(len(irq_list)):
+			if irq_list[i]['name'].find(dev) >= 0:
+				show_hunk = 1
+				break
+	else:
+		show_hunk = 1
+	if show_hunk == 0:
+		return
+
+	print "%d.%06dsec cpu=%d" % \
+		(nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu)
+	for i in range(len(irq_list)):
+		print PF_IRQ_ENTRY % \
+			(diff_msec(base_t, irq_list[i]['irq_ent_t']),
+			irq_list[i]['irq'], irq_list[i]['name'])
+		print PF_JOINT
+		irq_event_list = irq_list[i]['event_list']
+		for j in range(len(irq_event_list)):
+			irq_event = irq_event_list[j]
+			if irq_event['event'] == 'netif_rx':
+				print PF_NET_RX % \
+					(diff_msec(base_t, irq_event['time']),
+					irq_event['skbaddr'])
+				print PF_JOINT
+	print PF_SOFT_ENTRY % \
+		diff_msec(base_t, hunk['sirq_ent_t'])
+	print PF_JOINT
+	event_list = hunk['event_list']
+	for i in range(len(event_list)):
+		event = event_list[i]
+		if event['event_name'] == 'napi_poll':
+			print PF_NAPI_POLL % \
+			    (diff_msec(base_t, event['event_t']), event['dev'])
+			if i == len(event_list) - 1:
+				print ""
+			else:
+				print PF_JOINT
+		else:
+			print PF_NET_RECV % \
+			    (diff_msec(base_t, event['event_t']), event['skbaddr'],
+				event['len'])
+			if 'comm' in event.keys():
+				print PF_WJOINT
+				print PF_CPY_DGRAM % \
+					(diff_msec(base_t, event['comm_t']),
+					event['pid'], event['comm'])
+			elif 'handle' in event.keys():
+				print PF_WJOINT
+				if event['handle'] == "kfree_skb":
+					print PF_KFREE_SKB % \
+						(diff_msec(base_t,
+						event['comm_t']),
+						event['location'])
+				elif event['handle'] == "consume_skb":
+					print PF_CONS_SKB % \
+						diff_msec(base_t,
+							event['comm_t'])
+			print PF_JOINT
+
+def trace_begin():
+	global show_tx
+	global show_rx
+	global dev
+	global debug
+
+	for i in range(len(sys.argv)):
+		if i == 0:
+			continue
+		arg = sys.argv[i]
+		if arg == 'tx':
+			show_tx = 1
+		elif arg =='rx':
+			show_rx = 1
+		elif arg.find('dev=',0, 4) >= 0:
+			dev = arg[4:]
+		elif arg == 'debug':
+			debug = 1
+	if show_tx == 0  and show_rx == 0:
+		show_tx = 1
+		show_rx = 1
+
+def trace_end():
+	# order all events in time
+	all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME],
+					    b[EINFO_IDX_TIME]))
+	# process all events
+	for i in range(len(all_event_list)):
+		event_info = all_event_list[i]
+		name = event_info[EINFO_IDX_NAME]
+		if name == 'irq__softirq_exit':
+			handle_irq_softirq_exit(event_info)
+		elif name == 'irq__softirq_entry':
+			handle_irq_softirq_entry(event_info)
+		elif name == 'irq__softirq_raise':
+			handle_irq_softirq_raise(event_info)
+		elif name == 'irq__irq_handler_entry':
+			handle_irq_handler_entry(event_info)
+		elif name == 'irq__irq_handler_exit':
+			handle_irq_handler_exit(event_info)
+		elif name == 'napi__napi_poll':
+			handle_napi_poll(event_info)
+		elif name == 'net__netif_receive_skb':
+			handle_netif_receive_skb(event_info)
+		elif name == 'net__netif_rx':
+			handle_netif_rx(event_info)
+		elif name == 'skb__skb_copy_datagram_iovec':
+			handle_skb_copy_datagram_iovec(event_info)
+		elif name == 'net__net_dev_queue':
+			handle_net_dev_queue(event_info)
+		elif name == 'net__net_dev_xmit':
+			handle_net_dev_xmit(event_info)
+		elif name == 'skb__kfree_skb':
+			handle_kfree_skb(event_info)
+		elif name == 'skb__consume_skb':
+			handle_consume_skb(event_info)
+	# display receive hunks
+	if show_rx:
+		for i in range(len(receive_hunk_list)):
+			print_receive(receive_hunk_list[i])
+	# display transmit hunks
+	if show_tx:
+		print "   dev    len      Qdisc        " \
+			"       netdevice             free"
+		for i in range(len(tx_free_list)):
+			print_transmit(tx_free_list[i])
+	if debug:
+		print "debug buffer status"
+		print "----------------------------"
+		print "xmit Qdisc:remain:%d overflow:%d" % \
+			(len(tx_queue_list), of_count_tx_queue_list)
+		print "xmit netdevice:remain:%d overflow:%d" % \
+			(len(tx_xmit_list), of_count_tx_xmit_list)
+		print "receive:remain:%d overflow:%d" % \
+			(len(rx_skb_list), of_count_rx_skb_list)
+
+# called from perf, when it finds a correspoinding event
+def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
+	if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
+		return
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
+	all_event_list.append(event_info)
+
+def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
+	if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
+		return
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
+	all_event_list.append(event_info)
+
+def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
+	if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
+		return
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
+	all_event_list.append(event_info)
+
+def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm,
+			callchain, irq, irq_name):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			irq, irq_name)
+	all_event_list.append(event_info)
+
+def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, irq, ret):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
+	all_event_list.append(event_info)
+
+def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi,
+                    dev_name, work=None, budget=None):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			napi, dev_name, work, budget)
+	all_event_list.append(event_info)
+
+def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
+			skblen, dev_name):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr, skblen, dev_name)
+	all_event_list.append(event_info)
+
+def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
+			skblen, dev_name):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr, skblen, dev_name)
+	all_event_list.append(event_info)
+
+def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm, callchain,
+			skbaddr, skblen, dev_name):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr, skblen, dev_name)
+	all_event_list.append(event_info)
+
+def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm, callchain,
+			skbaddr, skblen, rc, dev_name):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr, skblen, rc ,dev_name)
+	all_event_list.append(event_info)
+
+def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
+			skbaddr, protocol, location):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr, protocol, location)
+	all_event_list.append(event_info)
+
+def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr)
+	all_event_list.append(event_info)
+
+def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm, callchain,
+	skbaddr, skblen):
+	event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+			skbaddr, skblen)
+	all_event_list.append(event_info)
+
+def handle_irq_handler_entry(event_info):
+	(name, context, cpu, time, pid, comm, irq, irq_name) = event_info
+	if cpu not in irq_dic.keys():
+		irq_dic[cpu] = []
+	irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time}
+	irq_dic[cpu].append(irq_record)
+
+def handle_irq_handler_exit(event_info):
+	(name, context, cpu, time, pid, comm, irq, ret) = event_info
+	if cpu not in irq_dic.keys():
+		return
+	irq_record = irq_dic[cpu].pop()
+	if irq != irq_record['irq']:
+		return
+	irq_record.update({'irq_ext_t':time})
+	# if an irq doesn't include NET_RX softirq, drop.
+	if 'event_list' in irq_record.keys():
+		irq_dic[cpu].append(irq_record)
+
+def handle_irq_softirq_raise(event_info):
+	(name, context, cpu, time, pid, comm, vec) = event_info
+	if cpu not in irq_dic.keys() \
+	or len(irq_dic[cpu]) == 0:
+		return
+	irq_record = irq_dic[cpu].pop()
+	if 'event_list' in irq_record.keys():
+		irq_event_list = irq_record['event_list']
+	else:
+		irq_event_list = []
+	irq_event_list.append({'time':time, 'event':'sirq_raise'})
+	irq_record.update({'event_list':irq_event_list})
+	irq_dic[cpu].append(irq_record)
+
+def handle_irq_softirq_entry(event_info):
+	(name, context, cpu, time, pid, comm, vec) = event_info
+	net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]}
+
+def handle_irq_softirq_exit(event_info):
+	(name, context, cpu, time, pid, comm, vec) = event_info
+	irq_list = []
+	event_list = 0
+	if cpu in irq_dic.keys():
+		irq_list = irq_dic[cpu]
+		del irq_dic[cpu]
+	if cpu in net_rx_dic.keys():
+		sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t']
+		event_list = net_rx_dic[cpu]['event_list']
+		del net_rx_dic[cpu]
+	if irq_list == [] or event_list == 0:
+		return
+	rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
+		    'irq_list':irq_list, 'event_list':event_list}
+	# merge information realted to a NET_RX softirq
+	receive_hunk_list.append(rec_data)
+
+def handle_napi_poll(event_info):
+	(name, context, cpu, time, pid, comm, napi, dev_name,
+		work, budget) = event_info
+	if cpu in net_rx_dic.keys():
+		event_list = net_rx_dic[cpu]['event_list']
+		rec_data = {'event_name':'napi_poll',
+				'dev':dev_name, 'event_t':time,
+				'work':work, 'budget':budget}
+		event_list.append(rec_data)
+
+def handle_netif_rx(event_info):
+	(name, context, cpu, time, pid, comm,
+		skbaddr, skblen, dev_name) = event_info
+	if cpu not in irq_dic.keys() \
+	or len(irq_dic[cpu]) == 0:
+		return
+	irq_record = irq_dic[cpu].pop()
+	if 'event_list' in irq_record.keys():
+		irq_event_list = irq_record['event_list']
+	else:
+		irq_event_list = []
+	irq_event_list.append({'time':time, 'event':'netif_rx',
+		'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name})
+	irq_record.update({'event_list':irq_event_list})
+	irq_dic[cpu].append(irq_record)
+
+def handle_netif_receive_skb(event_info):
+	global of_count_rx_skb_list
+
+	(name, context, cpu, time, pid, comm,
+		skbaddr, skblen, dev_name) = event_info
+	if cpu in net_rx_dic.keys():
+		rec_data = {'event_name':'netif_receive_skb',
+			    'event_t':time, 'skbaddr':skbaddr, 'len':skblen}
+		event_list = net_rx_dic[cpu]['event_list']
+		event_list.append(rec_data)
+		rx_skb_list.insert(0, rec_data)
+		if len(rx_skb_list) > buffer_budget:
+			rx_skb_list.pop()
+			of_count_rx_skb_list += 1
+
+def handle_net_dev_queue(event_info):
+	global of_count_tx_queue_list
+
+	(name, context, cpu, time, pid, comm,
+		skbaddr, skblen, dev_name) = event_info
+	skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time}
+	tx_queue_list.insert(0, skb)
+	if len(tx_queue_list) > buffer_budget:
+		tx_queue_list.pop()
+		of_count_tx_queue_list += 1
+
+def handle_net_dev_xmit(event_info):
+	global of_count_tx_xmit_list
+
+	(name, context, cpu, time, pid, comm,
+		skbaddr, skblen, rc, dev_name) = event_info
+	if rc == 0: # NETDEV_TX_OK
+		for i in range(len(tx_queue_list)):
+			skb = tx_queue_list[i]
+			if skb['skbaddr'] == skbaddr:
+				skb['xmit_t'] = time
+				tx_xmit_list.insert(0, skb)
+				del tx_queue_list[i]
+				if len(tx_xmit_list) > buffer_budget:
+					tx_xmit_list.pop()
+					of_count_tx_xmit_list += 1
+				return
+
+def handle_kfree_skb(event_info):
+	(name, context, cpu, time, pid, comm,
+		skbaddr, protocol, location) = event_info
+	for i in range(len(tx_queue_list)):
+		skb = tx_queue_list[i]
+		if skb['skbaddr'] == skbaddr:
+			del tx_queue_list[i]
+			return
+	for i in range(len(tx_xmit_list)):
+		skb = tx_xmit_list[i]
+		if skb['skbaddr'] == skbaddr:
+			skb['free_t'] = time
+			tx_free_list.append(skb)
+			del tx_xmit_list[i]
+			return
+	for i in range(len(rx_skb_list)):
+		rec_data = rx_skb_list[i]
+		if rec_data['skbaddr'] == skbaddr:
+			rec_data.update({'handle':"kfree_skb",
+					'comm':comm, 'pid':pid, 'comm_t':time})
+			del rx_skb_list[i]
+			return
+
+def handle_consume_skb(event_info):
+	(name, context, cpu, time, pid, comm, skbaddr) = event_info
+	for i in range(len(tx_xmit_list)):
+		skb = tx_xmit_list[i]
+		if skb['skbaddr'] == skbaddr:
+			skb['free_t'] = time
+			tx_free_list.append(skb)
+			del tx_xmit_list[i]
+			return
+
+def handle_skb_copy_datagram_iovec(event_info):
+	(name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info
+	for i in range(len(rx_skb_list)):
+		rec_data = rx_skb_list[i]
+		if skbaddr == rec_data['skbaddr']:
+			rec_data.update({'handle':"skb_copy_datagram_iovec",
+					'comm':comm, 'pid':pid, 'comm_t':time})
+			del rx_skb_list[i]
+			return
diff --git a/scripts/python/sched-migration.py b/scripts/python/sched-migration.py
new file mode 100644
index 0000000..de66cb3
--- /dev/null
+++ b/scripts/python/sched-migration.py
@@ -0,0 +1,460 @@
+#!/usr/bin/python
+#
+# Cpu task migration overview toy
+#
+# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
+#
+# perf script event handlers have been generated by perf script -g python
+#
+# This software is distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+
+import os
+import sys
+
+from collections import defaultdict
+from UserList import UserList
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+sys.path.append('scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from SchedGui import *
+
+
+threads = { 0 : "idle"}
+
+def thread_name(pid):
+	return "%s:%d" % (threads[pid], pid)
+
+class RunqueueEventUnknown:
+	@staticmethod
+	def color():
+		return None
+
+	def __repr__(self):
+		return "unknown"
+
+class RunqueueEventSleep:
+	@staticmethod
+	def color():
+		return (0, 0, 0xff)
+
+	def __init__(self, sleeper):
+		self.sleeper = sleeper
+
+	def __repr__(self):
+		return "%s gone to sleep" % thread_name(self.sleeper)
+
+class RunqueueEventWakeup:
+	@staticmethod
+	def color():
+		return (0xff, 0xff, 0)
+
+	def __init__(self, wakee):
+		self.wakee = wakee
+
+	def __repr__(self):
+		return "%s woke up" % thread_name(self.wakee)
+
+class RunqueueEventFork:
+	@staticmethod
+	def color():
+		return (0, 0xff, 0)
+
+	def __init__(self, child):
+		self.child = child
+
+	def __repr__(self):
+		return "new forked task %s" % thread_name(self.child)
+
+class RunqueueMigrateIn:
+	@staticmethod
+	def color():
+		return (0, 0xf0, 0xff)
+
+	def __init__(self, new):
+		self.new = new
+
+	def __repr__(self):
+		return "task migrated in %s" % thread_name(self.new)
+
+class RunqueueMigrateOut:
+	@staticmethod
+	def color():
+		return (0xff, 0, 0xff)
+
+	def __init__(self, old):
+		self.old = old
+
+	def __repr__(self):
+		return "task migrated out %s" % thread_name(self.old)
+
+class RunqueueSnapshot:
+	def __init__(self, tasks = [0], event = RunqueueEventUnknown()):
+		self.tasks = tuple(tasks)
+		self.event = event
+
+	def sched_switch(self, prev, prev_state, next):
+		event = RunqueueEventUnknown()
+
+		if taskState(prev_state) == "R" and next in self.tasks \
+			and prev in self.tasks:
+			return self
+
+		if taskState(prev_state) != "R":
+			event = RunqueueEventSleep(prev)
+
+		next_tasks = list(self.tasks[:])
+		if prev in self.tasks:
+			if taskState(prev_state) != "R":
+				next_tasks.remove(prev)
+		elif taskState(prev_state) == "R":
+			next_tasks.append(prev)
+
+		if next not in next_tasks:
+			next_tasks.append(next)
+
+		return RunqueueSnapshot(next_tasks, event)
+
+	def migrate_out(self, old):
+		if old not in self.tasks:
+			return self
+		next_tasks = [task for task in self.tasks if task != old]
+
+		return RunqueueSnapshot(next_tasks, RunqueueMigrateOut(old))
+
+	def __migrate_in(self, new, event):
+		if new in self.tasks:
+			self.event = event
+			return self
+		next_tasks = self.tasks[:] + tuple([new])
+
+		return RunqueueSnapshot(next_tasks, event)
+
+	def migrate_in(self, new):
+		return self.__migrate_in(new, RunqueueMigrateIn(new))
+
+	def wake_up(self, new):
+		return self.__migrate_in(new, RunqueueEventWakeup(new))
+
+	def wake_up_new(self, new):
+		return self.__migrate_in(new, RunqueueEventFork(new))
+
+	def load(self):
+		""" Provide the number of tasks on the runqueue.
+		    Don't count idle"""
+		return len(self.tasks) - 1
+
+	def __repr__(self):
+		ret = self.tasks.__repr__()
+		ret += self.origin_tostring()
+
+		return ret
+
+class TimeSlice:
+	def __init__(self, start, prev):
+		self.start = start
+		self.prev = prev
+		self.end = start
+		# cpus that triggered the event
+		self.event_cpus = []
+		if prev is not None:
+			self.total_load = prev.total_load
+			self.rqs = prev.rqs.copy()
+		else:
+			self.rqs = defaultdict(RunqueueSnapshot)
+			self.total_load = 0
+
+	def __update_total_load(self, old_rq, new_rq):
+		diff = new_rq.load() - old_rq.load()
+		self.total_load += diff
+
+	def sched_switch(self, ts_list, prev, prev_state, next, cpu):
+		old_rq = self.prev.rqs[cpu]
+		new_rq = old_rq.sched_switch(prev, prev_state, next)
+
+		if old_rq is new_rq:
+			return
+
+		self.rqs[cpu] = new_rq
+		self.__update_total_load(old_rq, new_rq)
+		ts_list.append(self)
+		self.event_cpus = [cpu]
+
+	def migrate(self, ts_list, new, old_cpu, new_cpu):
+		if old_cpu == new_cpu:
+			return
+		old_rq = self.prev.rqs[old_cpu]
+		out_rq = old_rq.migrate_out(new)
+		self.rqs[old_cpu] = out_rq
+		self.__update_total_load(old_rq, out_rq)
+
+		new_rq = self.prev.rqs[new_cpu]
+		in_rq = new_rq.migrate_in(new)
+		self.rqs[new_cpu] = in_rq
+		self.__update_total_load(new_rq, in_rq)
+
+		ts_list.append(self)
+
+		if old_rq is not out_rq:
+			self.event_cpus.append(old_cpu)
+		self.event_cpus.append(new_cpu)
+
+	def wake_up(self, ts_list, pid, cpu, fork):
+		old_rq = self.prev.rqs[cpu]
+		if fork:
+			new_rq = old_rq.wake_up_new(pid)
+		else:
+			new_rq = old_rq.wake_up(pid)
+
+		if new_rq is old_rq:
+			return
+		self.rqs[cpu] = new_rq
+		self.__update_total_load(old_rq, new_rq)
+		ts_list.append(self)
+		self.event_cpus = [cpu]
+
+	def next(self, t):
+		self.end = t
+		return TimeSlice(t, self)
+
+class TimeSliceList(UserList):
+	def __init__(self, arg = []):
+		self.data = arg
+
+	def get_time_slice(self, ts):
+		if len(self.data) == 0:
+			slice = TimeSlice(ts, TimeSlice(-1, None))
+		else:
+			slice = self.data[-1].next(ts)
+		return slice
+
+	def find_time_slice(self, ts):
+		start = 0
+		end = len(self.data)
+		found = -1
+		searching = True
+		while searching:
+			if start == end or start == end - 1:
+				searching = False
+
+			i = (end + start) / 2
+			if self.data[i].start <= ts and self.data[i].end >= ts:
+				found = i
+				end = i
+				continue
+
+			if self.data[i].end < ts:
+				start = i
+
+			elif self.data[i].start > ts:
+				end = i
+
+		return found
+
+	def set_root_win(self, win):
+		self.root_win = win
+
+	def mouse_down(self, cpu, t):
+		idx = self.find_time_slice(t)
+		if idx == -1:
+			return
+
+		ts = self[idx]
+		rq = ts.rqs[cpu]
+		raw = "CPU: %d\n" % cpu
+		raw += "Last event : %s\n" % rq.event.__repr__()
+		raw += "Timestamp : %d.%06d\n" % (ts.start / (10 ** 9), (ts.start % (10 ** 9)) / 1000)
+		raw += "Duration : %6d us\n" % ((ts.end - ts.start) / (10 ** 6))
+		raw += "Load = %d\n" % rq.load()
+		for t in rq.tasks:
+			raw += "%s \n" % thread_name(t)
+
+		self.root_win.update_summary(raw)
+
+	def update_rectangle_cpu(self, slice, cpu):
+		rq = slice.rqs[cpu]
+
+		if slice.total_load != 0:
+			load_rate = rq.load() / float(slice.total_load)
+		else:
+			load_rate = 0
+
+		red_power = int(0xff - (0xff * load_rate))
+		color = (0xff, red_power, red_power)
+
+		top_color = None
+
+		if cpu in slice.event_cpus:
+			top_color = rq.event.color()
+
+		self.root_win.paint_rectangle_zone(cpu, color, top_color, slice.start, slice.end)
+
+	def fill_zone(self, start, end):
+		i = self.find_time_slice(start)
+		if i == -1:
+			return
+
+		for i in xrange(i, len(self.data)):
+			timeslice = self.data[i]
+			if timeslice.start > end:
+				return
+
+			for cpu in timeslice.rqs:
+				self.update_rectangle_cpu(timeslice, cpu)
+
+	def interval(self):
+		if len(self.data) == 0:
+			return (0, 0)
+
+		return (self.data[0].start, self.data[-1].end)
+
+	def nr_rectangles(self):
+		last_ts = self.data[-1]
+		max_cpu = 0
+		for cpu in last_ts.rqs:
+			if cpu > max_cpu:
+				max_cpu = cpu
+		return max_cpu
+
+
+class SchedEventProxy:
+	def __init__(self):
+		self.current_tsk = defaultdict(lambda : -1)
+		self.timeslices = TimeSliceList()
+
+	def sched_switch(self, headers, prev_comm, prev_pid, prev_prio, prev_state,
+			 next_comm, next_pid, next_prio):
+		""" Ensure the task we sched out this cpu is really the one
+		    we logged. Otherwise we may have missed traces """
+
+		on_cpu_task = self.current_tsk[headers.cpu]
+
+		if on_cpu_task != -1 and on_cpu_task != prev_pid:
+			print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
+				(headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
+
+		threads[prev_pid] = prev_comm
+		threads[next_pid] = next_comm
+		self.current_tsk[headers.cpu] = next_pid
+
+		ts = self.timeslices.get_time_slice(headers.ts())
+		ts.sched_switch(self.timeslices, prev_pid, prev_state, next_pid, headers.cpu)
+
+	def migrate(self, headers, pid, prio, orig_cpu, dest_cpu):
+		ts = self.timeslices.get_time_slice(headers.ts())
+		ts.migrate(self.timeslices, pid, orig_cpu, dest_cpu)
+
+	def wake_up(self, headers, comm, pid, success, target_cpu, fork):
+		if success == 0:
+			return
+		ts = self.timeslices.get_time_slice(headers.ts())
+		ts.wake_up(self.timeslices, pid, target_cpu, fork)
+
+
+def trace_begin():
+	global parser
+	parser = SchedEventProxy()
+
+def trace_end():
+	app = wx.App(False)
+	timeslices = parser.timeslices
+	frame = RootFrame(timeslices, "Migration")
+	app.MainLoop()
+
+def sched__sched_stat_runtime(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, runtime, vruntime):
+	pass
+
+def sched__sched_stat_iowait(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, delay):
+	pass
+
+def sched__sched_stat_sleep(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, delay):
+	pass
+
+def sched__sched_stat_wait(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, delay):
+	pass
+
+def sched__sched_process_fork(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, parent_comm, parent_pid, child_comm, child_pid):
+	pass
+
+def sched__sched_process_wait(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio):
+	pass
+
+def sched__sched_process_exit(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio):
+	pass
+
+def sched__sched_process_free(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio):
+	pass
+
+def sched__sched_migrate_task(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio, orig_cpu,
+	dest_cpu):
+	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+				common_pid, common_comm, common_callchain)
+	parser.migrate(headers, pid, prio, orig_cpu, dest_cpu)
+
+def sched__sched_switch(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm, common_callchain,
+	prev_comm, prev_pid, prev_prio, prev_state,
+	next_comm, next_pid, next_prio):
+
+	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+				common_pid, common_comm, common_callchain)
+	parser.sched_switch(headers, prev_comm, prev_pid, prev_prio, prev_state,
+			 next_comm, next_pid, next_prio)
+
+def sched__sched_wakeup_new(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio, success,
+	target_cpu):
+	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+				common_pid, common_comm, common_callchain)
+	parser.wake_up(headers, comm, pid, success, target_cpu, 1)
+
+def sched__sched_wakeup(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio, success,
+	target_cpu):
+	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+				common_pid, common_comm, common_callchain)
+	parser.wake_up(headers, comm, pid, success, target_cpu, 0)
+
+def sched__sched_wait_task(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid, prio):
+	pass
+
+def sched__sched_kthread_stop_ret(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, ret):
+	pass
+
+def sched__sched_kthread_stop(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, comm, pid):
+	pass
+
+def trace_unhandled(event_name, context, event_fields_dict):
+	pass
diff --git a/scripts/python/sctop.py b/scripts/python/sctop.py
new file mode 100644
index 0000000..61621b9
--- /dev/null
+++ b/scripts/python/sctop.py
@@ -0,0 +1,80 @@
+# system call top
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Periodically displays system-wide system call totals, broken down by
+# syscall.  If a [comm] arg is specified, only syscalls called by
+# [comm] are displayed. If an [interval] arg is specified, the display
+# will be refreshed every [interval] seconds.  The default interval is
+# 3 seconds.
+
+import os, sys, thread, time
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+usage = "perf script -s sctop.py [comm] [interval]\n";
+
+for_comm = None
+default_interval = 3
+interval = default_interval
+
+if len(sys.argv) > 3:
+	sys.exit(usage)
+
+if len(sys.argv) > 2:
+	for_comm = sys.argv[1]
+	interval = int(sys.argv[2])
+elif len(sys.argv) > 1:
+	try:
+		interval = int(sys.argv[1])
+	except ValueError:
+		for_comm = sys.argv[1]
+		interval = default_interval
+
+syscalls = autodict()
+
+def trace_begin():
+	thread.start_new_thread(print_syscall_totals, (interval,))
+	pass
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, id, args):
+	if for_comm is not None:
+		if common_comm != for_comm:
+			return
+	try:
+		syscalls[id] += 1
+	except TypeError:
+		syscalls[id] = 1
+
+def syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	raw_syscalls__sys_enter(**locals())
+
+def print_syscall_totals(interval):
+	while 1:
+		clear_term()
+		if for_comm is not None:
+			print "\nsyscall events for %s:\n\n" % (for_comm),
+		else:
+			print "\nsyscall events:\n\n",
+
+		print "%-40s  %10s\n" % ("event", "count"),
+		print "%-40s  %10s\n" % ("----------------------------------------", \
+						 "----------"),
+
+		for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+					      reverse = True):
+			try:
+				print "%-40s  %10d\n" % (syscall_name(id), val),
+			except TypeError:
+				pass
+		syscalls.clear()
+		time.sleep(interval)
diff --git a/scripts/python/stackcollapse.py b/scripts/python/stackcollapse.py
new file mode 100755
index 0000000..1697b5e
--- /dev/null
+++ b/scripts/python/stackcollapse.py
@@ -0,0 +1,126 @@
+# stackcollapse.py - format perf samples with one line per distinct call stack
+# SPDX-License-Identifier: GPL-2.0
+#
+# This script's output has two space-separated fields.  The first is a semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack.  The second is a count:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 99 sleep 60
+#  perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini <pbonzini@redhat.com>
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+                '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+    # formatting options for the bottom entry of the stack
+    make_option("--include-tid", dest="include_tid",
+                 action="store_true", default=False,
+                 help="include thread id in stack"),
+    make_option("--include-pid", dest="include_pid",
+                 action="store_true", default=False,
+                 help="include process id in stack"),
+    make_option("--no-comm", dest="include_comm",
+                 action="store_false", default=True,
+                 help="do not separate stacks according to comm"),
+    make_option("--tidy-java", dest="tidy_java",
+                 action="store_true", default=False,
+                 help="beautify Java signatures"),
+    make_option("--kernel", dest="annotate_kernel",
+                 action="store_true", default=False,
+                 help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+    parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+    parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+    parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines = defaultdict(lambda: 0)
+
+def process_event(param_dict):
+    def tidy_function_name(sym, dso):
+        if sym is None:
+            sym = '[unknown]'
+
+        sym = sym.replace(';', ':')
+        if opts.tidy_java:
+            # the original stackcollapse-perf.pl script gives the
+            # example of converting this:
+            #    Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+            # to this:
+            #    org/mozilla/javascript/MemberBox:.init
+            sym = sym.replace('<', '')
+            sym = sym.replace('>', '')
+            if sym[0] == 'L' and sym.find('/'):
+                sym = sym[1:]
+            try:
+                sym = sym[:sym.index('(')]
+            except ValueError:
+                pass
+
+        if opts.annotate_kernel and dso == '[kernel.kallsyms]':
+            return sym + '_[k]'
+        else:
+            return sym
+
+    stack = list()
+    if 'callchain' in param_dict:
+        for entry in param_dict['callchain']:
+            entry.setdefault('sym', dict())
+            entry['sym'].setdefault('name', None)
+            entry.setdefault('dso', None)
+            stack.append(tidy_function_name(entry['sym']['name'],
+                                            entry['dso']))
+    else:
+        param_dict.setdefault('symbol', None)
+        param_dict.setdefault('dso', None)
+        stack.append(tidy_function_name(param_dict['symbol'],
+                                        param_dict['dso']))
+
+    if opts.include_comm:
+        comm = param_dict["comm"].replace(' ', '_')
+        sep = "-"
+        if opts.include_pid:
+            comm = comm + sep + str(param_dict['sample']['pid'])
+            sep = "/"
+        if opts.include_tid:
+            comm = comm + sep + str(param_dict['sample']['tid'])
+        stack.append(comm)
+
+    stack_string = ';'.join(reversed(stack))
+    lines[stack_string] = lines[stack_string] + 1
+
+def trace_end():
+    list = lines.keys()
+    list.sort()
+    for stack in list:
+        print "%s %d" % (stack, lines[stack])
diff --git a/scripts/python/stat-cpi.py b/scripts/python/stat-cpi.py
new file mode 100644
index 0000000..8410672
--- /dev/null
+++ b/scripts/python/stat-cpi.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+
+data    = {}
+times   = []
+threads = []
+cpus    = []
+
+def get_key(time, event, cpu, thread):
+    return "%d-%s-%d-%d" % (time, event, cpu, thread)
+
+def store_key(time, cpu, thread):
+    if (time not in times):
+        times.append(time)
+
+    if (cpu not in cpus):
+        cpus.append(cpu)
+
+    if (thread not in threads):
+        threads.append(thread)
+
+def store(time, event, cpu, thread, val, ena, run):
+    #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
+    #      (event, cpu, thread, time, val, ena, run)
+
+    store_key(time, cpu, thread)
+    key = get_key(time, event, cpu, thread)
+    data[key] = [ val, ena, run]
+
+def get(time, event, cpu, thread):
+    key = get_key(time, event, cpu, thread)
+    return data[key][0]
+
+def stat__cycles_k(cpu, thread, time, val, ena, run):
+    store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_k(cpu, thread, time, val, ena, run):
+    store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles_u(cpu, thread, time, val, ena, run):
+    store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_u(cpu, thread, time, val, ena, run):
+    store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles(cpu, thread, time, val, ena, run):
+    store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions(cpu, thread, time, val, ena, run):
+    store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__interval(time):
+    for cpu in cpus:
+        for thread in threads:
+            cyc = get(time, "cycles", cpu, thread)
+            ins = get(time, "instructions", cpu, thread)
+            cpi = 0
+
+            if ins != 0:
+                cpi = cyc/float(ins)
+
+            print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
+
+def trace_end():
+    pass
+# XXX trace_end callback could be used as an alternative place
+#     to compute same values as in the script above:
+#
+#    for time in times:
+#        for cpu in cpus:
+#            for thread in threads:
+#                cyc = get(time, "cycles", cpu, thread)
+#                ins = get(time, "instructions", cpu, thread)
+#
+#                if ins != 0:
+#                    cpi = cyc/float(ins)
+#
+#                print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)
diff --git a/scripts/python/syscall-counts-by-pid.py b/scripts/python/syscall-counts-by-pid.py
new file mode 100644
index 0000000..daf314c
--- /dev/null
+++ b/scripts/python/syscall-counts-by-pid.py
@@ -0,0 +1,74 @@
+# system call counts, by pid
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide system call totals, broken down by syscall.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os, sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import syscall_name
+
+usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
+
+for_comm = None
+for_pid = None
+
+if len(sys.argv) > 2:
+	sys.exit(usage)
+
+if len(sys.argv) > 1:
+	try:
+		for_pid = int(sys.argv[1])
+	except:
+		for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+	print "Press control+C to stop and show the summary"
+
+def trace_end():
+	print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, id, args):
+
+	if (for_comm and common_comm != for_comm) or \
+	   (for_pid  and common_pid  != for_pid ):
+		return
+	try:
+		syscalls[common_comm][common_pid][id] += 1
+	except TypeError:
+		syscalls[common_comm][common_pid][id] = 1
+
+def syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	raw_syscalls__sys_enter(**locals())
+
+def print_syscall_totals():
+    if for_comm is not None:
+	    print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall events by comm/pid:\n\n",
+
+    print "%-40s  %10s\n" % ("comm [pid]/syscalls", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "----------"),
+
+    comm_keys = syscalls.keys()
+    for comm in comm_keys:
+	    pid_keys = syscalls[comm].keys()
+	    for pid in pid_keys:
+		    print "\n%s [%d]\n" % (comm, pid),
+		    id_keys = syscalls[comm][pid].keys()
+		    for id, val in sorted(syscalls[comm][pid].iteritems(), \
+				  key = lambda(k, v): (v, k),  reverse = True):
+			    print "  %-38s  %10d\n" % (syscall_name(id), val),
diff --git a/scripts/python/syscall-counts.py b/scripts/python/syscall-counts.py
new file mode 100644
index 0000000..e66a773
--- /dev/null
+++ b/scripts/python/syscall-counts.py
@@ -0,0 +1,64 @@
+# system call counts
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide system call totals, broken down by syscall.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import syscall_name
+
+usage = "perf script -s syscall-counts.py [comm]\n";
+
+for_comm = None
+
+if len(sys.argv) > 2:
+	sys.exit(usage)
+
+if len(sys.argv) > 1:
+	for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+	print "Press control+C to stop and show the summary"
+
+def trace_end():
+	print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	common_callchain, id, args):
+	if for_comm is not None:
+		if common_comm != for_comm:
+			return
+	try:
+		syscalls[id] += 1
+	except TypeError:
+		syscalls[id] = 1
+
+def syscalls__sys_enter(event_name, context, common_cpu,
+	common_secs, common_nsecs, common_pid, common_comm,
+	id, args):
+	raw_syscalls__sys_enter(**locals())
+
+def print_syscall_totals():
+    if for_comm is not None:
+	    print "\nsyscall events for %s:\n\n" % (for_comm),
+    else:
+	    print "\nsyscall events:\n\n",
+
+    print "%-40s  %10s\n" % ("event", "count"),
+    print "%-40s  %10s\n" % ("----------------------------------------", \
+                                 "-----------"),
+
+    for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+				  reverse = True):
+	    print "%-40s  %10d\n" % (syscall_name(id), val),
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..8cc30e7
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,4 @@
+llvm-src-base.c
+llvm-src-kbuild.c
+llvm-src-prologue.c
+llvm-src-relocation.c
diff --git a/tests/Build b/tests/Build
new file mode 100644
index 0000000..6c108fa
--- /dev/null
+++ b/tests/Build
@@ -0,0 +1,87 @@
+perf-y += builtin-test.o
+perf-y += parse-events.o
+perf-y += dso-data.o
+perf-y += attr.o
+perf-y += vmlinux-kallsyms.o
+perf-y += openat-syscall.o
+perf-y += openat-syscall-all-cpus.o
+perf-y += openat-syscall-tp-fields.o
+perf-y += mmap-basic.o
+perf-y += perf-record.o
+perf-y += evsel-roundtrip-name.o
+perf-y += evsel-tp-sched.o
+perf-y += fdarray.o
+perf-y += pmu.o
+perf-y += hists_common.o
+perf-y += hists_link.o
+perf-y += hists_filter.o
+perf-y += hists_output.o
+perf-y += hists_cumulate.o
+perf-y += python-use.o
+perf-y += bp_signal.o
+perf-y += bp_signal_overflow.o
+perf-y += bp_account.o
+perf-y += task-exit.o
+perf-y += sw-clock.o
+perf-y += mmap-thread-lookup.o
+perf-y += thread-mg-share.o
+perf-y += switch-tracking.o
+perf-y += keep-tracking.o
+perf-y += code-reading.o
+perf-y += sample-parsing.o
+perf-y += parse-no-sample-id-all.o
+perf-y += kmod-path.o
+perf-y += thread-map.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
+perf-y += bpf.o
+perf-y += topology.o
+perf-y += mem.o
+perf-y += cpumap.o
+perf-y += stat.o
+perf-y += event_update.o
+perf-y += event-times.o
+perf-y += expr.o
+perf-y += backward-ring-buffer.o
+perf-y += sdt.o
+perf-y += is_printable_array.o
+perf-y += bitmap.o
+perf-y += perf-hooks.o
+perf-y += clang.o
+perf-y += unit_number__scnprintf.o
+perf-y += mem2node.o
+
+$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
+	$(call rule_mkdir)
+	$(Q)echo '#include <tests/llvm.h>' > $@
+	$(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@
+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+	$(Q)echo ';' >> $@
+
+$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build
+	$(call rule_mkdir)
+	$(Q)echo '#include <tests/llvm.h>' > $@
+	$(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@
+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+	$(Q)echo ';' >> $@
+
+$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
+	$(call rule_mkdir)
+	$(Q)echo '#include <tests/llvm.h>' > $@
+	$(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+	$(Q)echo ';' >> $@
+
+$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
+	$(call rule_mkdir)
+	$(Q)echo '#include <tests/llvm.h>' > $@
+	$(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+	$(Q)echo ';' >> $@
+
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
+perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+endif
+
+CFLAGS_attr.o         += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
+CFLAGS_python-use.o   += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
+CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
diff --git a/tests/attr.c b/tests/attr.c
new file mode 100644
index 0000000..05dfe11
--- /dev/null
+++ b/tests/attr.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The struct perf_event_attr test support.
+ *
+ * This test is embedded inside into perf directly and is governed
+ * by the PERF_TEST_ATTR environment variable and hook inside
+ * sys_perf_event_open function.
+ *
+ * The general idea is to store 'struct perf_event_attr' details for
+ * each event created within single perf command. Each event details
+ * are stored into separate text file. Once perf command is finished
+ * these files can be checked for values we expect for command.
+ *
+ * Besides 'struct perf_event_attr' values we also store 'fd' and
+ * 'group_fd' values to allow checking for groups created.
+ *
+ * This all is triggered by setting PERF_TEST_ATTR environment variable.
+ * It must contain name of existing directory with access and write
+ * permissions. All the event text files are stored there.
+ */
+
+#include <debug.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "../perf.h"
+#include <subcmd/exec-cmd.h>
+#include "tests.h"
+
+#define ENV "PERF_TEST_ATTR"
+
+static char *dir;
+static bool ready;
+
+void test_attr__init(void)
+{
+	dir = getenv(ENV);
+	test_attr__enabled = (dir != NULL);
+}
+
+#define BUFSIZE 1024
+
+#define __WRITE_ASS(str, fmt, data)					\
+do {									\
+	char buf[BUFSIZE];						\
+	size_t size;							\
+									\
+	size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data);		\
+	if (1 != fwrite(buf, size, 1, file)) {				\
+		perror("test attr - failed to write event file");	\
+		fclose(file);						\
+		return -1;						\
+	}								\
+									\
+} while (0)
+
+#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
+
+static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
+		       int fd, int group_fd, unsigned long flags)
+{
+	FILE *file;
+	char path[PATH_MAX];
+
+	if (!ready)
+		return 0;
+
+	snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
+		 attr->type, attr->config, fd);
+
+	file = fopen(path, "w+");
+	if (!file) {
+		perror("test attr - failed to open event file");
+		return -1;
+	}
+
+	if (fprintf(file, "[event-%d-%llu-%d]\n",
+		    attr->type, attr->config, fd) < 0) {
+		perror("test attr - failed to write event file");
+		fclose(file);
+		return -1;
+	}
+
+	/* syscall arguments */
+	__WRITE_ASS(fd,       "d", fd);
+	__WRITE_ASS(group_fd, "d", group_fd);
+	__WRITE_ASS(cpu,      "d", cpu);
+	__WRITE_ASS(pid,      "d", pid);
+	__WRITE_ASS(flags,   "lu", flags);
+
+	/* struct perf_event_attr */
+	WRITE_ASS(type,   PRIu32);
+	WRITE_ASS(size,   PRIu32);
+	WRITE_ASS(config,  "llu");
+	WRITE_ASS(sample_period, "llu");
+	WRITE_ASS(sample_type,   "llu");
+	WRITE_ASS(read_format,   "llu");
+	WRITE_ASS(disabled,       "d");
+	WRITE_ASS(inherit,        "d");
+	WRITE_ASS(pinned,         "d");
+	WRITE_ASS(exclusive,      "d");
+	WRITE_ASS(exclude_user,   "d");
+	WRITE_ASS(exclude_kernel, "d");
+	WRITE_ASS(exclude_hv,     "d");
+	WRITE_ASS(exclude_idle,   "d");
+	WRITE_ASS(mmap,           "d");
+	WRITE_ASS(comm,           "d");
+	WRITE_ASS(freq,           "d");
+	WRITE_ASS(inherit_stat,   "d");
+	WRITE_ASS(enable_on_exec, "d");
+	WRITE_ASS(task,           "d");
+	WRITE_ASS(watermark,      "d");
+	WRITE_ASS(precise_ip,     "d");
+	WRITE_ASS(mmap_data,      "d");
+	WRITE_ASS(sample_id_all,  "d");
+	WRITE_ASS(exclude_host,   "d");
+	WRITE_ASS(exclude_guest,  "d");
+	WRITE_ASS(exclude_callchain_kernel, "d");
+	WRITE_ASS(exclude_callchain_user, "d");
+	WRITE_ASS(mmap2,	  "d");
+	WRITE_ASS(comm_exec,	  "d");
+	WRITE_ASS(context_switch, "d");
+	WRITE_ASS(write_backward, "d");
+	WRITE_ASS(namespaces,	  "d");
+	WRITE_ASS(use_clockid,    "d");
+	WRITE_ASS(wakeup_events, PRIu32);
+	WRITE_ASS(bp_type, PRIu32);
+	WRITE_ASS(config1, "llu");
+	WRITE_ASS(config2, "llu");
+	WRITE_ASS(branch_sample_type, "llu");
+	WRITE_ASS(sample_regs_user,   "llu");
+	WRITE_ASS(sample_stack_user,  PRIu32);
+
+	fclose(file);
+	return 0;
+}
+
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+		     int fd, int group_fd, unsigned long flags)
+{
+	int errno_saved = errno;
+
+	if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) {
+		pr_err("test attr FAILED");
+		exit(128);
+	}
+
+	errno = errno_saved;
+}
+
+void test_attr__ready(void)
+{
+	if (unlikely(test_attr__enabled) && !ready)
+		ready = true;
+}
+
+static int run_dir(const char *d, const char *perf)
+{
+	char v[] = "-vvvvv";
+	int vcnt = min(verbose, (int) sizeof(v) - 1);
+	char cmd[3*PATH_MAX];
+
+	if (verbose > 0)
+		vcnt++;
+
+	scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
+		  d, d, perf, vcnt, v);
+
+	return system(cmd) ? TEST_FAIL : TEST_OK;
+}
+
+int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct stat st;
+	char path_perf[PATH_MAX];
+	char path_dir[PATH_MAX];
+
+	/* First try developement tree tests. */
+	if (!lstat("./tests", &st))
+		return run_dir("./tests", "./perf");
+
+	/* Then installed path. */
+	snprintf(path_dir,  PATH_MAX, "%s/tests", get_argv_exec_path());
+	snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+
+	if (!lstat(path_dir, &st) &&
+	    !lstat(path_perf, &st))
+		return run_dir(path_dir, path_perf);
+
+	return TEST_SKIP;
+}
diff --git a/tests/attr.py b/tests/attr.py
new file mode 100644
index 0000000..ff9b60b
--- /dev/null
+++ b/tests/attr.py
@@ -0,0 +1,392 @@
+#! /usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import sys
+import glob
+import optparse
+import tempfile
+import logging
+import shutil
+import ConfigParser
+
+def data_equal(a, b):
+    # Allow multiple values in assignment separated by '|'
+    a_list = a.split('|')
+    b_list = b.split('|')
+
+    for a_item in a_list:
+        for b_item in b_list:
+            if (a_item == b_item):
+                return True
+            elif (a_item == '*') or (b_item == '*'):
+                return True
+
+    return False
+
+class Fail(Exception):
+    def __init__(self, test, msg):
+        self.msg = msg
+        self.test = test
+    def getMsg(self):
+        return '\'%s\' - %s' % (self.test.path, self.msg)
+
+class Notest(Exception):
+    def __init__(self, test, arch):
+        self.arch = arch
+        self.test = test
+    def getMsg(self):
+        return '[%s] \'%s\'' % (self.arch, self.test.path)
+
+class Unsup(Exception):
+    def __init__(self, test):
+        self.test = test
+    def getMsg(self):
+        return '\'%s\'' % self.test.path
+
+class Event(dict):
+    terms = [
+        'cpu',
+        'flags',
+        'type',
+        'size',
+        'config',
+        'sample_period',
+        'sample_type',
+        'read_format',
+        'disabled',
+        'inherit',
+        'pinned',
+        'exclusive',
+        'exclude_user',
+        'exclude_kernel',
+        'exclude_hv',
+        'exclude_idle',
+        'mmap',
+        'comm',
+        'freq',
+        'inherit_stat',
+        'enable_on_exec',
+        'task',
+        'watermark',
+        'precise_ip',
+        'mmap_data',
+        'sample_id_all',
+        'exclude_host',
+        'exclude_guest',
+        'exclude_callchain_kernel',
+        'exclude_callchain_user',
+        'wakeup_events',
+        'bp_type',
+        'config1',
+        'config2',
+        'branch_sample_type',
+        'sample_regs_user',
+        'sample_stack_user',
+    ]
+
+    def add(self, data):
+        for key, val in data:
+            log.debug("      %s = %s" % (key, val))
+            self[key] = val
+
+    def __init__(self, name, data, base):
+        log.debug("    Event %s" % name);
+        self.name  = name;
+        self.group = ''
+        self.add(base)
+        self.add(data)
+
+    def equal(self, other):
+        for t in Event.terms:
+            log.debug("      [%s] %s %s" % (t, self[t], other[t]));
+            if not self.has_key(t) or not other.has_key(t):
+                return False
+            if not data_equal(self[t], other[t]):
+                return False
+        return True
+
+    def optional(self):
+        if self.has_key('optional') and self['optional'] == '1':
+            return True
+        return False
+
+    def diff(self, other):
+        for t in Event.terms:
+            if not self.has_key(t) or not other.has_key(t):
+                continue
+            if not data_equal(self[t], other[t]):
+		log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
+
+# Test file description needs to have following sections:
+# [config]
+#   - just single instance in file
+#   - needs to specify:
+#     'command' - perf command name
+#     'args'    - special command arguments
+#     'ret'     - expected command return value (0 by default)
+#     'arch'    - architecture specific test (optional)
+#                 comma separated list, ! at the beginning
+#                 negates it.
+#
+# [eventX:base]
+#   - one or multiple instances in file
+#   - expected values assignments
+class Test(object):
+    def __init__(self, path, options):
+        parser = ConfigParser.SafeConfigParser()
+        parser.read(path)
+
+        log.warning("running '%s'" % path)
+
+        self.path     = path
+        self.test_dir = options.test_dir
+        self.perf     = options.perf
+        self.command  = parser.get('config', 'command')
+        self.args     = parser.get('config', 'args')
+
+        try:
+            self.ret  = parser.get('config', 'ret')
+        except:
+            self.ret  = 0
+
+        try:
+            self.arch  = parser.get('config', 'arch')
+            log.warning("test limitation '%s'" % self.arch)
+        except:
+            self.arch  = ''
+
+        self.expect   = {}
+        self.result   = {}
+        log.debug("  loading expected events");
+        self.load_events(path, self.expect)
+
+    def is_event(self, name):
+        if name.find("event") == -1:
+            return False
+        else:
+            return True
+
+    def skip_test(self, myarch):
+        # If architecture not set always run test
+        if self.arch == '':
+            # log.warning("test for arch %s is ok" % myarch)
+            return False
+
+        # Allow multiple values in assignment separated by ','
+        arch_list = self.arch.split(',')
+
+        # Handle negated list such as !s390x,ppc
+        if arch_list[0][0] == '!':
+            arch_list[0] = arch_list[0][1:]
+            log.warning("excluded architecture list %s" % arch_list)
+            for arch_item in arch_list:
+                # log.warning("test for %s arch is %s" % (arch_item, myarch))
+                if arch_item == myarch:
+                    return True
+            return False
+
+        for arch_item in arch_list:
+            # log.warning("test for architecture '%s' current '%s'" % (arch_item, myarch))
+            if arch_item == myarch:
+                return False
+        return True
+
+    def load_events(self, path, events):
+        parser_event = ConfigParser.SafeConfigParser()
+        parser_event.read(path)
+
+        # The event record section header contains 'event' word,
+        # optionaly followed by ':' allowing to load 'parent
+        # event' first as a base
+        for section in filter(self.is_event, parser_event.sections()):
+
+            parser_items = parser_event.items(section);
+            base_items   = {}
+
+            # Read parent event if there's any
+            if (':' in section):
+                base = section[section.index(':') + 1:]
+                parser_base = ConfigParser.SafeConfigParser()
+                parser_base.read(self.test_dir + '/' + base)
+                base_items = parser_base.items('event')
+
+            e = Event(section, parser_items, base_items)
+            events[section] = e
+
+    def run_cmd(self, tempdir):
+        junk1, junk2, junk3, junk4, myarch = (os.uname())
+
+        if self.skip_test(myarch):
+            raise Notest(self, myarch)
+
+        cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
+              self.perf, self.command, tempdir, self.args)
+        ret = os.WEXITSTATUS(os.system(cmd))
+
+        log.info("  '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret)))
+
+        if not data_equal(str(ret), str(self.ret)):
+            raise Unsup(self)
+
+    def compare(self, expect, result):
+        match = {}
+
+        log.debug("  compare");
+
+        # For each expected event find all matching
+        # events in result. Fail if there's not any.
+        for exp_name, exp_event in expect.items():
+            exp_list = []
+            res_event = {}
+            log.debug("    matching [%s]" % exp_name)
+            for res_name, res_event in result.items():
+                log.debug("      to [%s]" % res_name)
+                if (exp_event.equal(res_event)):
+                    exp_list.append(res_name)
+                    log.debug("    ->OK")
+                else:
+                    log.debug("    ->FAIL");
+
+            log.debug("    match: [%s] matches %s" % (exp_name, str(exp_list)))
+
+            # we did not any matching event - fail
+            if not exp_list:
+                if exp_event.optional():
+                    log.debug("    %s does not match, but is optional" % exp_name)
+                else:
+                    if not res_event:
+                        log.debug("    res_event is empty");
+                    else:
+                        exp_event.diff(res_event)
+                    raise Fail(self, 'match failure');
+
+            match[exp_name] = exp_list
+
+        # For each defined group in the expected events
+        # check we match the same group in the result.
+        for exp_name, exp_event in expect.items():
+            group = exp_event.group
+
+            if (group == ''):
+                continue
+
+            for res_name in match[exp_name]:
+                res_group = result[res_name].group
+                if res_group not in match[group]:
+                    raise Fail(self, 'group failure')
+
+                log.debug("    group: [%s] matches group leader %s" %
+                         (exp_name, str(match[group])))
+
+        log.debug("  matched")
+
+    def resolve_groups(self, events):
+        for name, event in events.items():
+            group_fd = event['group_fd'];
+            if group_fd == '-1':
+                continue;
+
+            for iname, ievent in events.items():
+                if (ievent['fd'] == group_fd):
+                    event.group = iname
+                    log.debug('[%s] has group leader [%s]' % (name, iname))
+                    break;
+
+    def run(self):
+        tempdir = tempfile.mkdtemp();
+
+        try:
+            # run the test script
+            self.run_cmd(tempdir);
+
+            # load events expectation for the test
+            log.debug("  loading result events");
+            for f in glob.glob(tempdir + '/event*'):
+                self.load_events(f, self.result);
+
+            # resolve group_fd to event names
+            self.resolve_groups(self.expect);
+            self.resolve_groups(self.result);
+
+            # do the expectation - results matching - both ways
+            self.compare(self.expect, self.result)
+            self.compare(self.result, self.expect)
+
+        finally:
+            # cleanup
+            shutil.rmtree(tempdir)
+
+
+def run_tests(options):
+    for f in glob.glob(options.test_dir + '/' + options.test):
+        try:
+            Test(f, options).run()
+        except Unsup, obj:
+            log.warning("unsupp  %s" % obj.getMsg())
+        except Notest, obj:
+            log.warning("skipped %s" % obj.getMsg())
+
+def setup_log(verbose):
+    global log
+    level = logging.CRITICAL
+
+    if verbose == 1:
+        level = logging.WARNING
+    if verbose == 2:
+        level = logging.INFO
+    if verbose >= 3:
+        level = logging.DEBUG
+
+    log = logging.getLogger('test')
+    log.setLevel(level)
+    ch  = logging.StreamHandler()
+    ch.setLevel(level)
+    formatter = logging.Formatter('%(message)s')
+    ch.setFormatter(formatter)
+    log.addHandler(ch)
+
+USAGE = '''%s [OPTIONS]
+  -d dir  # tests dir
+  -p path # perf binary
+  -t test # single test
+  -v      # verbose level
+''' % sys.argv[0]
+
+def main():
+    parser = optparse.OptionParser(usage=USAGE)
+
+    parser.add_option("-t", "--test",
+                      action="store", type="string", dest="test")
+    parser.add_option("-d", "--test-dir",
+                      action="store", type="string", dest="test_dir")
+    parser.add_option("-p", "--perf",
+                      action="store", type="string", dest="perf")
+    parser.add_option("-v", "--verbose",
+                      action="count", dest="verbose")
+
+    options, args = parser.parse_args()
+    if args:
+        parser.error('FAILED wrong arguments %s' %  ' '.join(args))
+        return -1
+
+    setup_log(options.verbose)
+
+    if not options.test_dir:
+        print 'FAILED no -d option specified'
+        sys.exit(-1)
+
+    if not options.test:
+        options.test = 'test*'
+
+    try:
+        run_tests(options)
+
+    except Fail, obj:
+        print "FAILED %s" % obj.getMsg();
+        sys.exit(-1)
+
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/attr/README b/tests/attr/README
new file mode 100644
index 0000000..430024f
--- /dev/null
+++ b/tests/attr/README
@@ -0,0 +1,64 @@
+The struct perf_event_attr test (attr tests) support
+====================================================
+This testing support is embedded into perf directly and is governed
+by the PERF_TEST_ATTR environment variable and hook inside the
+sys_perf_event_open function.
+
+The general idea is to store 'struct perf_event_attr' details for
+each event created within single perf command. Each event details
+are stored into separate text file. Once perf command is finished
+these files are checked for values we expect for command.
+
+The attr tests consist of following parts:
+
+tests/attr.c
+------------
+This is the sys_perf_event_open hook implementation. The hook
+is triggered when the PERF_TEST_ATTR environment variable is
+defined. It must contain name of existing directory with access
+and write permissions.
+
+For each sys_perf_event_open call event details are stored in
+separate file. Besides 'struct perf_event_attr' values we also
+store 'fd' and 'group_fd' values to allow checking for groups.
+
+tests/attr.py
+-------------
+This is the python script that does all the hard work. It reads
+the test definition, executes it and checks results.
+
+tests/attr/
+-----------
+Directory containing all attr test definitions.
+Following tests are defined (with perf commands):
+
+  perf record kill                              (test-record-basic)
+  perf record -b kill                           (test-record-branch-any)
+  perf record -j any kill                       (test-record-branch-filter-any)
+  perf record -j any_call kill                  (test-record-branch-filter-any_call)
+  perf record -j any_ret kill                   (test-record-branch-filter-any_ret)
+  perf record -j hv kill                        (test-record-branch-filter-hv)
+  perf record -j ind_call kill                  (test-record-branch-filter-ind_call)
+  perf record -j k kill                         (test-record-branch-filter-k)
+  perf record -j u kill                         (test-record-branch-filter-u)
+  perf record -c 123 kill                       (test-record-count)
+  perf record -d kill                           (test-record-data)
+  perf record -F 100 kill                       (test-record-freq)
+  perf record -g kill                           (test-record-graph-default)
+  perf record --call-graph dwarf kill		(test-record-graph-dwarf)
+  perf record --call-graph fp kill              (test-record-graph-fp)
+  perf record --group -e cycles,instructions kill (test-record-group)
+  perf record -e '{cycles,instructions}' kill   (test-record-group1)
+  perf record -D kill                           (test-record-no-delay)
+  perf record -i kill                           (test-record-no-inherit)
+  perf record -n kill                           (test-record-no-samples)
+  perf record -c 100 -P kill                    (test-record-period)
+  perf record -R kill                           (test-record-raw)
+  perf stat -e cycles kill                      (test-stat-basic)
+  perf stat kill                                (test-stat-default)
+  perf stat -d kill                             (test-stat-detailed-1)
+  perf stat -dd kill                            (test-stat-detailed-2)
+  perf stat -ddd kill                           (test-stat-detailed-3)
+  perf stat --group -e cycles,instructions kill (test-stat-group)
+  perf stat -e '{cycles,instructions}' kill     (test-stat-group1)
+  perf stat -i -e cycles kill                   (test-stat-no-inherit)
diff --git a/tests/attr/base-record b/tests/attr/base-record
new file mode 100644
index 0000000..3794066
--- /dev/null
+++ b/tests/attr/base-record
@@ -0,0 +1,41 @@
+[event]
+fd=1
+group_fd=-1
+# 0 or PERF_FLAG_FD_CLOEXEC flag
+flags=0|8
+cpu=*
+type=0|1
+size=112
+config=0
+sample_period=*
+sample_type=263
+read_format=0
+disabled=1
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0|1
+exclude_hv=0
+exclude_idle=0
+mmap=1
+comm=1
+freq=1
+inherit_stat=0
+enable_on_exec=1
+task=1
+watermark=0
+precise_ip=0|1|2|3
+mmap_data=0
+sample_id_all=1
+exclude_host=0|1
+exclude_guest=0|1
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tests/attr/base-stat b/tests/attr/base-stat
new file mode 100644
index 0000000..4d0c2e4
--- /dev/null
+++ b/tests/attr/base-stat
@@ -0,0 +1,41 @@
+[event]
+fd=1
+group_fd=-1
+# 0 or PERF_FLAG_FD_CLOEXEC flag
+flags=0|8
+cpu=*
+type=0
+size=112
+config=0
+sample_period=0
+sample_type=65536
+read_format=3
+disabled=1
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0|1
+exclude_hv=0
+exclude_idle=0
+mmap=0
+comm=0
+freq=0
+inherit_stat=0
+enable_on_exec=1
+task=0
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=0
+exclude_host=0|1
+exclude_guest=0|1
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tests/attr/test-record-C0 b/tests/attr/test-record-C0
new file mode 100644
index 0000000..cb0a313
--- /dev/null
+++ b/tests/attr/test-record-C0
@@ -0,0 +1,14 @@
+[config]
+command = record
+args    = -C 0 kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+cpu=0
+
+# no enable on exec for CPU attached
+enable_on_exec=0
+
+# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD
+# + PERF_SAMPLE_CPU added by -C 0
+sample_type=391
diff --git a/tests/attr/test-record-basic b/tests/attr/test-record-basic
new file mode 100644
index 0000000..85a23cf
--- /dev/null
+++ b/tests/attr/test-record-basic
@@ -0,0 +1,6 @@
+[config]
+command = record
+args    = kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
diff --git a/tests/attr/test-record-branch-any b/tests/attr/test-record-branch-any
new file mode 100644
index 0000000..81f839e
--- /dev/null
+++ b/tests/attr/test-record-branch-any
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -b kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tests/attr/test-record-branch-filter-any b/tests/attr/test-record-branch-filter-any
new file mode 100644
index 0000000..357421f
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-any
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j any kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tests/attr/test-record-branch-filter-any_call b/tests/attr/test-record-branch-filter-any_call
new file mode 100644
index 0000000..dbc55f2
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-any_call
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j any_call kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=16
diff --git a/tests/attr/test-record-branch-filter-any_ret b/tests/attr/test-record-branch-filter-any_ret
new file mode 100644
index 0000000..a0824ff
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-any_ret
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j any_ret kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=32
diff --git a/tests/attr/test-record-branch-filter-hv b/tests/attr/test-record-branch-filter-hv
new file mode 100644
index 0000000..f34d6f1
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-hv
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j hv kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tests/attr/test-record-branch-filter-ind_call b/tests/attr/test-record-branch-filter-ind_call
new file mode 100644
index 0000000..b86a352
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-ind_call
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j ind_call kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=64
diff --git a/tests/attr/test-record-branch-filter-k b/tests/attr/test-record-branch-filter-k
new file mode 100644
index 0000000..d3fbc5e
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-k
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j k kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tests/attr/test-record-branch-filter-u b/tests/attr/test-record-branch-filter-u
new file mode 100644
index 0000000..a318f0d
--- /dev/null
+++ b/tests/attr/test-record-branch-filter-u
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -j u kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tests/attr/test-record-count b/tests/attr/test-record-count
new file mode 100644
index 0000000..34f6cc5
--- /dev/null
+++ b/tests/attr/test-record-count
@@ -0,0 +1,9 @@
+[config]
+command = record
+args    = -c 123 kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_period=123
+sample_type=7
+freq=0
diff --git a/tests/attr/test-record-data b/tests/attr/test-record-data
new file mode 100644
index 0000000..a9cf223
--- /dev/null
+++ b/tests/attr/test-record-data
@@ -0,0 +1,10 @@
+[config]
+command = record
+args    = -d kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+# sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
+# PERF_SAMPLE_ADDR | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC
+sample_type=33039
+mmap_data=1
diff --git a/tests/attr/test-record-freq b/tests/attr/test-record-freq
new file mode 100644
index 0000000..bf4cb45
--- /dev/null
+++ b/tests/attr/test-record-freq
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -F 100 kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_period=100
diff --git a/tests/attr/test-record-graph-default b/tests/attr/test-record-graph-default
new file mode 100644
index 0000000..0b216e6
--- /dev/null
+++ b/tests/attr/test-record-graph-default
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -g kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=295
diff --git a/tests/attr/test-record-graph-dwarf b/tests/attr/test-record-graph-dwarf
new file mode 100644
index 0000000..da2fa73
--- /dev/null
+++ b/tests/attr/test-record-graph-dwarf
@@ -0,0 +1,12 @@
+[config]
+command = record
+args    = --call-graph dwarf -- kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=45359
+exclude_callchain_user=1
+sample_stack_user=8192
+# TODO different for each arch, no support for that now
+sample_regs_user=*
+mmap_data=1
diff --git a/tests/attr/test-record-graph-fp b/tests/attr/test-record-graph-fp
new file mode 100644
index 0000000..625d190
--- /dev/null
+++ b/tests/attr/test-record-graph-fp
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = --call-graph fp kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=295
diff --git a/tests/attr/test-record-group b/tests/attr/test-record-group
new file mode 100644
index 0000000..618ba1c
--- /dev/null
+++ b/tests/attr/test-record-group
@@ -0,0 +1,22 @@
+[config]
+command = record
+args    = --group -e cycles,instructions kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=327
+read_format=4
+
+[event-2:base-record]
+fd=2
+group_fd=1
+config=1
+sample_type=327
+read_format=4
+mmap=0
+comm=0
+task=0
+enable_on_exec=0
+disabled=0
diff --git a/tests/attr/test-record-group-sampling b/tests/attr/test-record-group-sampling
new file mode 100644
index 0000000..8a33ca4
--- /dev/null
+++ b/tests/attr/test-record-group-sampling
@@ -0,0 +1,40 @@
+[config]
+command = record
+args    = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=343
+read_format=12
+inherit=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+
+# cache-misses
+type=0
+config=3
+
+# default | PERF_SAMPLE_READ
+sample_type=343
+
+# PERF_FORMAT_ID | PERF_FORMAT_GROUP
+read_format=12
+task=0
+mmap=0
+comm=0
+enable_on_exec=0
+disabled=0
+
+# inherit is disabled for group sampling
+inherit=0
+
+# sampling disabled
+sample_freq=0
+sample_period=0
+freq=0
+write_backward=0
+sample_id_all=0
diff --git a/tests/attr/test-record-group1 b/tests/attr/test-record-group1
new file mode 100644
index 0000000..48e8bd1
--- /dev/null
+++ b/tests/attr/test-record-group1
@@ -0,0 +1,23 @@
+[config]
+command = record
+args    = -e '{cycles,instructions}' kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=327
+read_format=4
+
+[event-2:base-record]
+fd=2
+group_fd=1
+type=0
+config=1
+sample_type=327
+read_format=4
+mmap=0
+comm=0
+task=0
+enable_on_exec=0
+disabled=0
diff --git a/tests/attr/test-record-no-buffering b/tests/attr/test-record-no-buffering
new file mode 100644
index 0000000..aa3956d
--- /dev/null
+++ b/tests/attr/test-record-no-buffering
@@ -0,0 +1,9 @@
+[config]
+command = record
+args    = --no-buffering kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=263
+watermark=0
+wakeup_events=1
diff --git a/tests/attr/test-record-no-inherit b/tests/attr/test-record-no-inherit
new file mode 100644
index 0000000..560943d
--- /dev/null
+++ b/tests/attr/test-record-no-inherit
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -i kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=263
+inherit=0
diff --git a/tests/attr/test-record-no-samples b/tests/attr/test-record-no-samples
new file mode 100644
index 0000000..8eb73ab
--- /dev/null
+++ b/tests/attr/test-record-no-samples
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -n kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_period=0
diff --git a/tests/attr/test-record-period b/tests/attr/test-record-period
new file mode 100644
index 0000000..69bc748
--- /dev/null
+++ b/tests/attr/test-record-period
@@ -0,0 +1,8 @@
+[config]
+command = record
+args    = -c 100 -P kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_period=100
+freq=0
diff --git a/tests/attr/test-record-raw b/tests/attr/test-record-raw
new file mode 100644
index 0000000..a188a61
--- /dev/null
+++ b/tests/attr/test-record-raw
@@ -0,0 +1,7 @@
+[config]
+command = record
+args    = -R kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-record]
+sample_type=1415
diff --git a/tests/attr/test-stat-C0 b/tests/attr/test-stat-C0
new file mode 100644
index 0000000..a2c76d1
--- /dev/null
+++ b/tests/attr/test-stat-C0
@@ -0,0 +1,10 @@
+[config]
+command = stat
+args    = -e cycles -C 0 kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-stat]
+# events are disabled by default when attached to cpu
+disabled=1
+enable_on_exec=0
+optional=1
diff --git a/tests/attr/test-stat-basic b/tests/attr/test-stat-basic
new file mode 100644
index 0000000..69867d0
--- /dev/null
+++ b/tests/attr/test-stat-basic
@@ -0,0 +1,7 @@
+[config]
+command = stat
+args    = -e cycles kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-stat]
+optional=1
diff --git a/tests/attr/test-stat-default b/tests/attr/test-stat-default
new file mode 100644
index 0000000..d9e99b3
--- /dev/null
+++ b/tests/attr/test-stat-default
@@ -0,0 +1,70 @@
+[config]
+command = stat
+args    = kill >/dev/null 2>&1
+ret     = 1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
diff --git a/tests/attr/test-stat-detailed-1 b/tests/attr/test-stat-detailed-1
new file mode 100644
index 0000000..8b04a05
--- /dev/null
+++ b/tests/attr/test-stat-detailed-1
@@ -0,0 +1,111 @@
+[config]
+command = stat
+args    = -d kill >/dev/null 2>&1
+ret     = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+optional=1
diff --git a/tests/attr/test-stat-detailed-2 b/tests/attr/test-stat-detailed-2
new file mode 100644
index 0000000..4fca9f1
--- /dev/null
+++ b/tests/attr/test-stat-detailed-2
@@ -0,0 +1,171 @@
+[config]
+command = stat
+args    = -dd kill >/dev/null 2>&1
+ret     = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event15:base-stat]
+fd=15
+type=3
+config=1
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event16:base-stat]
+fd=16
+type=3
+config=65537
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event17:base-stat]
+fd=17
+type=3
+config=3
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event18:base-stat]
+fd=18
+type=3
+config=65539
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event19:base-stat]
+fd=19
+type=3
+config=4
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event20:base-stat]
+fd=20
+type=3
+config=65540
+optional=1
diff --git a/tests/attr/test-stat-detailed-3 b/tests/attr/test-stat-detailed-3
new file mode 100644
index 0000000..4bb58e1
--- /dev/null
+++ b/tests/attr/test-stat-detailed-3
@@ -0,0 +1,191 @@
+[config]
+command = stat
+args    = -ddd kill >/dev/null 2>&1
+ret     = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_LL                 <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event15:base-stat]
+fd=15
+type=3
+config=1
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1I                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event16:base-stat]
+fd=16
+type=3
+config=65537
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event17:base-stat]
+fd=17
+type=3
+config=3
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_DTLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event18:base-stat]
+fd=18
+type=3
+config=65539
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event19:base-stat]
+fd=19
+type=3
+config=4
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_ITLB               <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event20:base-stat]
+fd=20
+type=3
+config=65540
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)
+[event21:base-stat]
+fd=21
+type=3
+config=512
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+#  PERF_COUNT_HW_CACHE_L1D                <<  0  |
+# (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)
+[event22:base-stat]
+fd=22
+type=3
+config=66048
+optional=1
diff --git a/tests/attr/test-stat-group b/tests/attr/test-stat-group
new file mode 100644
index 0000000..e15d694
--- /dev/null
+++ b/tests/attr/test-stat-group
@@ -0,0 +1,17 @@
+[config]
+command = stat
+args    = --group -e cycles,instructions kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-stat]
+fd=1
+group_fd=-1
+read_format=3|15
+
+[event-2:base-stat]
+fd=2
+group_fd=1
+config=1
+disabled=0
+enable_on_exec=0
+read_format=3|15
diff --git a/tests/attr/test-stat-group1 b/tests/attr/test-stat-group1
new file mode 100644
index 0000000..1746751
--- /dev/null
+++ b/tests/attr/test-stat-group1
@@ -0,0 +1,17 @@
+[config]
+command = stat
+args    = -e '{cycles,instructions}' kill >/dev/null 2>&1
+ret     = 1
+
+[event-1:base-stat]
+fd=1
+group_fd=-1
+read_format=3|15
+
+[event-2:base-stat]
+fd=2
+group_fd=1
+config=1
+disabled=0
+enable_on_exec=0
+read_format=3|15
diff --git a/tests/attr/test-stat-no-inherit b/tests/attr/test-stat-no-inherit
new file mode 100644
index 0000000..924fbb9
--- /dev/null
+++ b/tests/attr/test-stat-no-inherit
@@ -0,0 +1,8 @@
+[config]
+command = stat
+args    = -i -e cycles kill >/dev/null 2>&1
+ret     = 1
+
+[event:base-stat]
+inherit=0
+optional=1
diff --git a/tests/backward-ring-buffer.c b/tests/backward-ring-buffer.c
new file mode 100644
index 0000000..6d598cc
--- /dev/null
+++ b/tests/backward-ring-buffer.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test backward bit in event attribute, read ring buffer from end to
+ * beginning
+ */
+
+#include <perf.h>
+#include <evlist.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "debug.h"
+#include <errno.h>
+
+#define NR_ITERS 111
+
+static void testcase(void)
+{
+	int i;
+
+	for (i = 0; i < NR_ITERS; i++) {
+		char proc_name[10];
+
+		snprintf(proc_name, sizeof(proc_name), "p:%d\n", i);
+		prctl(PR_SET_NAME, proc_name);
+	}
+}
+
+static int count_samples(struct perf_evlist *evlist, int *sample_count,
+			 int *comm_count)
+{
+	int i;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *map = &evlist->overwrite_mmap[i];
+		union perf_event *event;
+
+		perf_mmap__read_init(map);
+		while ((event = perf_mmap__read_event(map)) != NULL) {
+			const u32 type = event->header.type;
+
+			switch (type) {
+			case PERF_RECORD_SAMPLE:
+				(*sample_count)++;
+				break;
+			case PERF_RECORD_COMM:
+				(*comm_count)++;
+				break;
+			default:
+				pr_err("Unexpected record of type %d\n", type);
+				return TEST_FAIL;
+			}
+		}
+		perf_mmap__read_done(map);
+	}
+	return TEST_OK;
+}
+
+static int do_test(struct perf_evlist *evlist, int mmap_pages,
+		   int *sample_count, int *comm_count)
+{
+	int err;
+	char sbuf[STRERR_BUFSIZE];
+
+	err = perf_evlist__mmap(evlist, mmap_pages);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		return TEST_FAIL;
+	}
+
+	perf_evlist__enable(evlist);
+	testcase();
+	perf_evlist__disable(evlist);
+
+	err = count_samples(evlist, sample_count, comm_count);
+	perf_evlist__munmap(evlist);
+	return err;
+}
+
+
+int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
+	char pid[16], sbuf[STRERR_BUFSIZE];
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel __maybe_unused;
+	struct parse_events_error parse_error;
+	struct record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.freq	      = 0,
+		.mmap_pages   = 256,
+		.default_interval = 1,
+	};
+
+	snprintf(pid, sizeof(pid), "%d", getpid());
+	pid[sizeof(pid) - 1] = '\0';
+	opts.target.tid = opts.target.pid = pid;
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("Not enough memory to create evlist\n");
+		return TEST_FAIL;
+	}
+
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	bzero(&parse_error, sizeof(parse_error));
+	/*
+	 * Set backward bit, ring buffer should be writing from end. Record
+	 * it in aux evlist
+	 */
+	err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error);
+	if (err) {
+		pr_debug("Failed to parse tracepoint event, try use root\n");
+		ret = TEST_SKIP;
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__config(evlist, &opts, NULL);
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	ret = TEST_FAIL;
+	err = do_test(evlist, opts.mmap_pages, &sample_count,
+		      &comm_count);
+	if (err != TEST_OK)
+		goto out_delete_evlist;
+
+	if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) {
+		pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n",
+		       sample_count, comm_count);
+		goto out_delete_evlist;
+	}
+
+	err = do_test(evlist, 1, &sample_count, &comm_count);
+	if (err != TEST_OK)
+		goto out_delete_evlist;
+
+	ret = TEST_OK;
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return ret;
+}
diff --git a/tests/bitmap.c b/tests/bitmap.c
new file mode 100644
index 0000000..47bedf2
--- /dev/null
+++ b/tests/bitmap.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include "tests.h"
+#include "cpumap.h"
+#include "debug.h"
+
+#define NBITS 100
+
+static unsigned long *get_bitmap(const char *str, int nbits)
+{
+	struct cpu_map *map = cpu_map__new(str);
+	unsigned long *bm = NULL;
+	int i;
+
+	bm = bitmap_alloc(nbits);
+
+	if (map && bm) {
+		bitmap_zero(bm, nbits);
+
+		for (i = 0; i < map->nr; i++)
+			set_bit(map->map[i], bm);
+	}
+
+	if (map)
+		cpu_map__put(map);
+	return bm;
+}
+
+static int test_bitmap(const char *str)
+{
+	unsigned long *bm = get_bitmap(str, NBITS);
+	char buf[100];
+	int ret;
+
+	bitmap_scnprintf(bm, NBITS, buf, sizeof(buf));
+	pr_debug("bitmap: %s\n", buf);
+
+	ret = !strcmp(buf, str);
+	free(bm);
+	return ret;
+}
+
+int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3,5,7,9,11,13,15,17,19,21-40"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("2-5"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40"));
+	return 0;
+}
diff --git a/tests/bp_account.c b/tests/bp_account.c
new file mode 100644
index 0000000..a20cbc4
--- /dev/null
+++ b/tests/bp_account.c
@@ -0,0 +1,193 @@
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+#include <sys/ioctl.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+volatile long the_var;
+
+static noinline int test_function(void)
+{
+	return 0;
+}
+
+static int __event(bool is_x, void *addr, struct perf_event_attr *attr)
+{
+	int fd;
+
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type = PERF_TYPE_BREAKPOINT;
+	attr->size = sizeof(struct perf_event_attr);
+
+	attr->config = 0;
+	attr->bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+	attr->bp_addr = (unsigned long) addr;
+	attr->bp_len = sizeof(long);
+
+	attr->sample_period = 1;
+	attr->sample_type = PERF_SAMPLE_IP;
+
+	attr->exclude_kernel = 1;
+	attr->exclude_hv = 1;
+
+	fd = sys_perf_event_open(attr, -1, 0, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0) {
+		pr_debug("failed opening event %llx\n", attr->config);
+		return TEST_FAIL;
+	}
+
+	return fd;
+}
+
+static int wp_event(void *addr, struct perf_event_attr *attr)
+{
+	return __event(false, addr, attr);
+}
+
+static int bp_event(void *addr, struct perf_event_attr *attr)
+{
+	return __event(true, addr, attr);
+}
+
+static int bp_accounting(int wp_cnt, int share)
+{
+	struct perf_event_attr attr, attr_mod, attr_new;
+	int i, fd[wp_cnt], fd_wp, ret;
+
+	for (i = 0; i < wp_cnt; i++) {
+		fd[i] = wp_event((void *)&the_var, &attr);
+		TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+		pr_debug("wp %d created\n", i);
+	}
+
+	attr_mod = attr;
+	attr_mod.bp_type = HW_BREAKPOINT_X;
+	attr_mod.bp_addr = (unsigned long) test_function;
+
+	ret = ioctl(fd[0], PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr_mod);
+	TEST_ASSERT_VAL("failed to modify wp\n", ret == 0);
+
+	pr_debug("wp 0 modified to bp\n");
+
+	if (!share) {
+		fd_wp = wp_event((void *)&the_var, &attr_new);
+		TEST_ASSERT_VAL("failed to create max wp\n", fd_wp != -1);
+		pr_debug("wp max created\n");
+	}
+
+	for (i = 0; i < wp_cnt; i++)
+		close(fd[i]);
+
+	return 0;
+}
+
+static int detect_cnt(bool is_x)
+{
+	struct perf_event_attr attr;
+	void *addr = is_x ? (void *)test_function : (void *)&the_var;
+	int fd[100], cnt = 0, i;
+
+	while (1) {
+		if (cnt == 100) {
+			pr_debug("way too many debug registers, fix the test\n");
+			return 0;
+		}
+		fd[cnt] = __event(is_x, addr, &attr);
+
+		if (fd[cnt] < 0)
+			break;
+		cnt++;
+	}
+
+	for (i = 0; i < cnt; i++)
+		close(fd[i]);
+
+	return cnt;
+}
+
+static int detect_ioctl(void)
+{
+	struct perf_event_attr attr;
+	int fd, ret = 1;
+
+	fd = wp_event((void *) &the_var, &attr);
+	if (fd > 0) {
+		ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr);
+		close(fd);
+	}
+
+	return ret ? 0 : 1;
+}
+
+static int detect_share(int wp_cnt, int bp_cnt)
+{
+	struct perf_event_attr attr;
+	int i, fd[wp_cnt + bp_cnt], ret;
+
+	for (i = 0; i < wp_cnt; i++) {
+		fd[i] = wp_event((void *)&the_var, &attr);
+		TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+	}
+
+	for (; i < (bp_cnt + wp_cnt); i++) {
+		fd[i] = bp_event((void *)test_function, &attr);
+		if (fd[i] == -1)
+			break;
+	}
+
+	ret = i != (bp_cnt + wp_cnt);
+
+	while (i--)
+		close(fd[i]);
+
+	return ret;
+}
+
+/*
+ * This test does following:
+ *   - detects the number of watch/break-points,
+ *     skip test if any is missing
+ *   - detects PERF_EVENT_IOC_MODIFY_ATTRIBUTES ioctl,
+ *     skip test if it's missing
+ *   - detects if watchpoints and breakpoints share
+ *     same slots
+ *   - create all possible watchpoints on cpu 0
+ *   - change one of it to breakpoint
+ *   - in case wp and bp do not share slots,
+ *     we create another watchpoint to ensure
+ *     the slot accounting is correct
+ */
+int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int has_ioctl = detect_ioctl();
+	int wp_cnt = detect_cnt(false);
+	int bp_cnt = detect_cnt(true);
+	int share  = detect_share(wp_cnt, bp_cnt);
+
+	pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n",
+		 wp_cnt, bp_cnt, has_ioctl, share);
+
+	if (!wp_cnt || !bp_cnt || !has_ioctl)
+		return TEST_SKIP;
+
+	return bp_accounting(wp_cnt, share);
+}
diff --git a/tests/bp_signal.c b/tests/bp_signal.c
new file mode 100644
index 0000000..a467615
--- /dev/null
+++ b/tests/bp_signal.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inspired by breakpoint overflow test done by
+ * Vince Weaver <vincent.weaver@maine.edu> for perf_event_tests
+ * (git://github.com/deater/perf_event_tests)
+ */
+
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+static int fd1;
+static int fd2;
+static int fd3;
+static int overflows;
+static int overflows_2;
+
+volatile long the_var;
+
+
+/*
+ * Use ASM to ensure watchpoint and breakpoint can be triggered
+ * at one instruction.
+ */
+#if defined (__x86_64__)
+extern void __test_function(volatile long *ptr);
+asm (
+	".globl __test_function\n"
+	"__test_function:\n"
+	"incq (%rdi)\n"
+	"ret\n");
+#elif defined (__aarch64__)
+extern void __test_function(volatile long *ptr);
+asm (
+	".globl __test_function\n"
+	"__test_function:\n"
+	"str x30, [x0]\n"
+	"ret\n");
+
+#else
+static void __test_function(volatile long *ptr)
+{
+	*ptr = 0x1234;
+}
+#endif
+
+static noinline int test_function(void)
+{
+	__test_function(&the_var);
+	the_var++;
+	return time(NULL);
+}
+
+static void sig_handler_2(int signum __maybe_unused,
+			  siginfo_t *oh __maybe_unused,
+			  void *uc __maybe_unused)
+{
+	overflows_2++;
+	if (overflows_2 > 10) {
+		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+	}
+}
+
+static void sig_handler(int signum __maybe_unused,
+			siginfo_t *oh __maybe_unused,
+			void *uc __maybe_unused)
+{
+	overflows++;
+
+	if (overflows > 10) {
+		/*
+		 * This should be executed only once during
+		 * this test, if we are here for the 10th
+		 * time, consider this the recursive issue.
+		 *
+		 * We can get out of here by disable events,
+		 * so no new SIGIO is delivered.
+		 */
+		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+	}
+}
+
+static int __event(bool is_x, void *addr, int sig)
+{
+	struct perf_event_attr pe;
+	int fd;
+
+	memset(&pe, 0, sizeof(struct perf_event_attr));
+	pe.type = PERF_TYPE_BREAKPOINT;
+	pe.size = sizeof(struct perf_event_attr);
+
+	pe.config = 0;
+	pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+	pe.bp_addr = (unsigned long) addr;
+	pe.bp_len = sizeof(long);
+
+	pe.sample_period = 1;
+	pe.sample_type = PERF_SAMPLE_IP;
+	pe.wakeup_events = 1;
+
+	pe.disabled = 1;
+	pe.exclude_kernel = 1;
+	pe.exclude_hv = 1;
+
+	fd = sys_perf_event_open(&pe, 0, -1, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0) {
+		pr_debug("failed opening event %llx\n", pe.config);
+		return TEST_FAIL;
+	}
+
+	fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+	fcntl(fd, F_SETSIG, sig);
+	fcntl(fd, F_SETOWN, getpid());
+
+	ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+
+	return fd;
+}
+
+static int bp_event(void *addr, int sig)
+{
+	return __event(true, addr, sig);
+}
+
+static int wp_event(void *addr, int sig)
+{
+	return __event(false, addr, sig);
+}
+
+static long long bp_count(int fd)
+{
+	long long count;
+	int ret;
+
+	ret = read(fd, &count, sizeof(long long));
+	if (ret != sizeof(long long)) {
+		pr_debug("failed to read: %d\n", ret);
+		return TEST_FAIL;
+	}
+
+	return count;
+}
+
+int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct sigaction sa;
+	long long count1, count2, count3;
+
+	/* setup SIGIO signal handler */
+	memset(&sa, 0, sizeof(struct sigaction));
+	sa.sa_sigaction = (void *) sig_handler;
+	sa.sa_flags = SA_SIGINFO;
+
+	if (sigaction(SIGIO, &sa, NULL) < 0) {
+		pr_debug("failed setting up signal handler\n");
+		return TEST_FAIL;
+	}
+
+	sa.sa_sigaction = (void *) sig_handler_2;
+	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+		pr_debug("failed setting up signal handler 2\n");
+		return TEST_FAIL;
+	}
+
+	/*
+	 * We create following events:
+	 *
+	 * fd1 - breakpoint event on __test_function with SIGIO
+	 *       signal configured. We should get signal
+	 *       notification each time the breakpoint is hit
+	 *
+	 * fd2 - breakpoint event on sig_handler with SIGUSR1
+	 *       configured. We should get SIGUSR1 each time when
+	 *       breakpoint is hit
+	 *
+	 * fd3 - watchpoint event on __test_function with SIGIO
+	 *       configured.
+	 *
+	 * Following processing should happen:
+	 *   Exec:               Action:                       Result:
+	 *   incq (%rdi)       - fd1 event breakpoint hit   -> count1 == 1
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 1
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 1  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows = 1
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *   incq (%rdi)       - fd3 event watchpoint hit   -> count3 == 1       (wp and bp in one insn)
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 2
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 2  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows = 2
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *   the_var++         - fd3 event watchpoint hit   -> count3 == 2       (standalone watchpoint)
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 3
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 3  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows == 3
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *
+	 * The test case check following error conditions:
+	 * - we get stuck in signal handler because of debug
+	 *   exception being triggered receursively due to
+	 *   the wrong RF EFLAG management
+	 *
+	 * - we never trigger the sig_handler breakpoint due
+	 *   to the rong RF EFLAG management
+	 *
+	 */
+
+	fd1 = bp_event(__test_function, SIGIO);
+	fd2 = bp_event(sig_handler, SIGUSR1);
+	fd3 = wp_event((void *)&the_var, SIGIO);
+
+	ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
+
+	/*
+	 * Kick off the test by trigering 'fd1'
+	 * breakpoint.
+	 */
+	test_function();
+
+	ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+	ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+	ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+
+	count1 = bp_count(fd1);
+	count2 = bp_count(fd2);
+	count3 = bp_count(fd3);
+
+	close(fd1);
+	close(fd2);
+	close(fd3);
+
+	pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n",
+		 count1, count2, count3, overflows, overflows_2);
+
+	if (count1 != 1) {
+		if (count1 == 11)
+			pr_debug("failed: RF EFLAG recursion issue detected\n");
+		else
+			pr_debug("failed: wrong count for bp1%lld\n", count1);
+	}
+
+	if (overflows != 3)
+		pr_debug("failed: wrong overflow hit\n");
+
+	if (overflows_2 != 3)
+		pr_debug("failed: wrong overflow_2 hit\n");
+
+	if (count2 != 3)
+		pr_debug("failed: wrong count for bp2\n");
+
+	if (count3 != 2)
+		pr_debug("failed: wrong count for bp3\n");
+
+	return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
+		TEST_OK : TEST_FAIL;
+}
+
+bool test__bp_signal_is_supported(void)
+{
+/*
+ * The powerpc so far does not have support to even create
+ * instruction breakpoint using the perf event interface.
+ * Once it's there we can release this.
+ */
+#if defined(__powerpc__) || defined(__s390x__)
+	return false;
+#else
+	return true;
+#endif
+}
diff --git a/tests/bp_signal_overflow.c b/tests/bp_signal_overflow.c
new file mode 100644
index 0000000..ca96255
--- /dev/null
+++ b/tests/bp_signal_overflow.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Originally done by Vince Weaver <vincent.weaver@maine.edu> for
+ * perf_event_tests (git://github.com/deater/perf_event_tests)
+ */
+
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+static int overflows;
+
+static noinline int test_function(void)
+{
+	return time(NULL);
+}
+
+static void sig_handler(int signum __maybe_unused,
+			siginfo_t *oh __maybe_unused,
+			void *uc __maybe_unused)
+{
+	overflows++;
+}
+
+static long long bp_count(int fd)
+{
+	long long count;
+	int ret;
+
+	ret = read(fd, &count, sizeof(long long));
+	if (ret != sizeof(long long)) {
+		pr_debug("failed to read: %d\n", ret);
+		return TEST_FAIL;
+	}
+
+	return count;
+}
+
+#define EXECUTIONS 10000
+#define THRESHOLD  100
+
+int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct perf_event_attr pe;
+	struct sigaction sa;
+	long long count;
+	int fd, i, fails = 0;
+
+	/* setup SIGIO signal handler */
+	memset(&sa, 0, sizeof(struct sigaction));
+	sa.sa_sigaction = (void *) sig_handler;
+	sa.sa_flags = SA_SIGINFO;
+
+	if (sigaction(SIGIO, &sa, NULL) < 0) {
+		pr_debug("failed setting up signal handler\n");
+		return TEST_FAIL;
+	}
+
+	memset(&pe, 0, sizeof(struct perf_event_attr));
+	pe.type = PERF_TYPE_BREAKPOINT;
+	pe.size = sizeof(struct perf_event_attr);
+
+	pe.config = 0;
+	pe.bp_type = HW_BREAKPOINT_X;
+	pe.bp_addr = (unsigned long) test_function;
+	pe.bp_len = sizeof(long);
+
+	pe.sample_period = THRESHOLD;
+	pe.sample_type = PERF_SAMPLE_IP;
+	pe.wakeup_events = 1;
+
+	pe.disabled = 1;
+	pe.exclude_kernel = 1;
+	pe.exclude_hv = 1;
+
+	fd = sys_perf_event_open(&pe, 0, -1, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0) {
+		pr_debug("failed opening event %llx\n", pe.config);
+		return TEST_FAIL;
+	}
+
+	fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+	fcntl(fd, F_SETSIG, SIGIO);
+	fcntl(fd, F_SETOWN, getpid());
+
+	ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+	ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+	for (i = 0; i < EXECUTIONS; i++)
+		test_function();
+
+	ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+
+	count = bp_count(fd);
+
+	close(fd);
+
+	pr_debug("count %lld, overflow %d\n",
+		 count, overflows);
+
+	if (count != EXECUTIONS) {
+		pr_debug("\tWrong number of executions %lld != %d\n",
+		count, EXECUTIONS);
+		fails++;
+	}
+
+	if (overflows != EXECUTIONS / THRESHOLD) {
+		pr_debug("\tWrong number of overflows %d != %d\n",
+		overflows, EXECUTIONS / THRESHOLD);
+		fails++;
+	}
+
+	return fails ? TEST_FAIL : TEST_OK;
+}
diff --git a/tests/bpf-script-example.c b/tests/bpf-script-example.c
new file mode 100644
index 0000000..1ca5106
--- /dev/null
+++ b/tests/bpf-script-example.c
@@ -0,0 +1,48 @@
+/*
+ * bpf-script-example.c
+ * Test basic LLVM building
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+	(void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") flip_table = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+SEC("func=do_epoll_wait")
+int bpf_func__SyS_epoll_pwait(void *ctx)
+{
+	int ind =0;
+	int *flag = bpf_map_lookup_elem(&flip_table, &ind);
+	int new_flag;
+	if (!flag)
+		return 0;
+	/* flip flag and store back */
+	new_flag = !*flag;
+	bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY);
+	return new_flag;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tests/bpf-script-test-kbuild.c b/tests/bpf-script-test-kbuild.c
new file mode 100644
index 0000000..ff3ec83
--- /dev/null
+++ b/tests/bpf-script-test-kbuild.c
@@ -0,0 +1,20 @@
+/*
+ * bpf-script-test-kbuild.c
+ * Test include from kernel header
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#include <uapi/linux/fs.h>
+
+SEC("func=vfs_llseek")
+int bpf_func__vfs_llseek(void *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tests/bpf-script-test-prologue.c b/tests/bpf-script-test-prologue.c
new file mode 100644
index 0000000..43f1e16
--- /dev/null
+++ b/tests/bpf-script-test-prologue.c
@@ -0,0 +1,46 @@
+/*
+ * bpf-script-test-prologue.c
+ * Test BPF prologue
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#include <uapi/linux/fs.h>
+
+/*
+ * If CONFIG_PROFILE_ALL_BRANCHES is selected,
+ * 'if' is redefined after include kernel header.
+ * Recover 'if' for BPF object code.
+ */
+#ifdef if
+# undef if
+#endif
+
+#define FMODE_READ		0x1
+#define FMODE_WRITE		0x2
+
+static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+	(void *) 6;
+
+SEC("func=null_lseek file->f_mode offset orig")
+int bpf_func__null_lseek(void *ctx, int err, unsigned long _f_mode,
+			 unsigned long offset, unsigned long orig)
+{
+	fmode_t f_mode = (fmode_t)_f_mode;
+
+	if (err)
+		return 0;
+	if (f_mode & FMODE_WRITE)
+		return 0;
+	if (offset & 1)
+		return 0;
+	if (orig == SEEK_CUR)
+		return 0;
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tests/bpf-script-test-relocation.c b/tests/bpf-script-test-relocation.c
new file mode 100644
index 0000000..93af774
--- /dev/null
+++ b/tests/bpf-script-test-relocation.c
@@ -0,0 +1,50 @@
+/*
+ * bpf-script-test-relocation.c
+ * Test BPF loader checking relocation
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+	(void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") my_table = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+int this_is_a_global_val;
+
+SEC("func=sys_write")
+int bpf_func__sys_write(void *ctx)
+{
+	int key = 0;
+	int value = 0;
+
+	/*
+	 * Incorrect relocation. Should not allow this program be
+	 * loaded into kernel.
+	 */
+	bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tests/bpf.c b/tests/bpf.c
new file mode 100644
index 0000000..79b54f8
--- /dev/null
+++ b/tests/bpf.c
@@ -0,0 +1,361 @@
+#include <errno.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <util/util.h>
+#include <util/bpf-loader.h>
+#include <util/evlist.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <api/fs/fs.h>
+#include <bpf/bpf.h>
+#include "tests.h"
+#include "llvm.h"
+#include "debug.h"
+#define NR_ITERS       111
+#define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+static int epoll_pwait_loop(void)
+{
+	int i;
+
+	/* Should fail NR_ITERS times */
+	for (i = 0; i < NR_ITERS; i++)
+		epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
+	return 0;
+}
+
+#ifdef HAVE_BPF_PROLOGUE
+
+static int llseek_loop(void)
+{
+	int fds[2], i;
+
+	fds[0] = open("/dev/null", O_RDONLY);
+	fds[1] = open("/dev/null", O_RDWR);
+
+	if (fds[0] < 0 || fds[1] < 0)
+		return -1;
+
+	for (i = 0; i < NR_ITERS; i++) {
+		lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+		lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+	}
+	close(fds[0]);
+	close(fds[1]);
+	return 0;
+}
+
+#endif
+
+static struct {
+	enum test_llvm__testcase prog_id;
+	const char *desc;
+	const char *name;
+	const char *msg_compile_fail;
+	const char *msg_load_fail;
+	int (*target_func)(void);
+	int expect_result;
+	bool	pin;
+} bpf_testcase_table[] = {
+	{
+		.prog_id	  = LLVM_TESTCASE_BASE,
+		.desc		  = "Basic BPF filtering",
+		.name		  = "[basic_bpf_test]",
+		.msg_compile_fail = "fix 'perf test LLVM' first",
+		.msg_load_fail	  = "load bpf object failed",
+		.target_func	  = &epoll_pwait_loop,
+		.expect_result	  = (NR_ITERS + 1) / 2,
+	},
+	{
+		.prog_id	  = LLVM_TESTCASE_BASE,
+		.desc		  = "BPF pinning",
+		.name		  = "[bpf_pinning]",
+		.msg_compile_fail = "fix kbuild first",
+		.msg_load_fail	  = "check your vmlinux setting?",
+		.target_func	  = &epoll_pwait_loop,
+		.expect_result	  = (NR_ITERS + 1) / 2,
+		.pin 		  = true,
+	},
+#ifdef HAVE_BPF_PROLOGUE
+	{
+		.prog_id	  = LLVM_TESTCASE_BPF_PROLOGUE,
+		.desc		  = "BPF prologue generation",
+		.name		  = "[bpf_prologue_test]",
+		.msg_compile_fail = "fix kbuild first",
+		.msg_load_fail	  = "check your vmlinux setting?",
+		.target_func	  = &llseek_loop,
+		.expect_result	  = (NR_ITERS + 1) / 4,
+	},
+#endif
+	{
+		.prog_id	  = LLVM_TESTCASE_BPF_RELOCATION,
+		.desc		  = "BPF relocation checker",
+		.name		  = "[bpf_relocation_test]",
+		.msg_compile_fail = "fix 'perf test LLVM' first",
+		.msg_load_fail	  = "libbpf error when dealing with relocation",
+	},
+};
+
+static int do_test(struct bpf_object *obj, int (*func)(void),
+		   int expect)
+{
+	struct record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.freq	      = 0,
+		.mmap_pages   = 256,
+		.default_interval = 1,
+	};
+
+	char pid[16];
+	char sbuf[STRERR_BUFSIZE];
+	struct perf_evlist *evlist;
+	int i, ret = TEST_FAIL, err = 0, count = 0;
+
+	struct parse_events_state parse_state;
+	struct parse_events_error parse_error;
+
+	bzero(&parse_error, sizeof(parse_error));
+	bzero(&parse_state, sizeof(parse_state));
+	parse_state.error = &parse_error;
+	INIT_LIST_HEAD(&parse_state.list);
+
+	err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL);
+	if (err || list_empty(&parse_state.list)) {
+		pr_debug("Failed to add events selected by BPF\n");
+		return TEST_FAIL;
+	}
+
+	snprintf(pid, sizeof(pid), "%d", getpid());
+	pid[sizeof(pid) - 1] = '\0';
+	opts.target.tid = opts.target.pid = pid;
+
+	/* Instead of perf_evlist__new_default, don't add default events */
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("Not enough memory to create evlist\n");
+		return TEST_FAIL;
+	}
+
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__splice_list_tail(evlist, &parse_state.list);
+	evlist->nr_groups = parse_state.nr_groups;
+
+	perf_evlist__config(evlist, &opts, NULL);
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	err = perf_evlist__mmap(evlist, opts.mmap_pages);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__enable(evlist);
+	(*func)();
+	perf_evlist__disable(evlist);
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		union perf_event *event;
+		struct perf_mmap *md;
+
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md)) != NULL) {
+			const u32 type = event->header.type;
+
+			if (type == PERF_RECORD_SAMPLE)
+				count ++;
+		}
+		perf_mmap__read_done(md);
+	}
+
+	if (count != expect) {
+		pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
+		goto out_delete_evlist;
+	}
+
+	ret = TEST_OK;
+
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return ret;
+}
+
+static struct bpf_object *
+prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
+{
+	struct bpf_object *obj;
+
+	obj = bpf__prepare_load_buffer(obj_buf, obj_buf_sz, name);
+	if (IS_ERR(obj)) {
+		pr_debug("Compile BPF program failed.\n");
+		return NULL;
+	}
+	return obj;
+}
+
+static int __test__bpf(int idx)
+{
+	int ret;
+	void *obj_buf;
+	size_t obj_buf_sz;
+	struct bpf_object *obj;
+
+	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
+				       bpf_testcase_table[idx].prog_id,
+				       true, NULL);
+	if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
+		pr_debug("Unable to get BPF object, %s\n",
+			 bpf_testcase_table[idx].msg_compile_fail);
+		if (idx == 0)
+			return TEST_SKIP;
+		else
+			return TEST_FAIL;
+	}
+
+	obj = prepare_bpf(obj_buf, obj_buf_sz,
+			  bpf_testcase_table[idx].name);
+	if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
+		if (!obj)
+			pr_debug("Fail to load BPF object: %s\n",
+				 bpf_testcase_table[idx].msg_load_fail);
+		else
+			pr_debug("Success unexpectedly: %s\n",
+				 bpf_testcase_table[idx].msg_load_fail);
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	if (obj) {
+		ret = do_test(obj,
+			      bpf_testcase_table[idx].target_func,
+			      bpf_testcase_table[idx].expect_result);
+		if (ret != TEST_OK)
+			goto out;
+		if (bpf_testcase_table[idx].pin) {
+			int err;
+
+			if (!bpf_fs__mount()) {
+				pr_debug("BPF filesystem not mounted\n");
+				ret = TEST_FAIL;
+				goto out;
+			}
+			err = mkdir(PERF_TEST_BPF_PATH, 0777);
+			if (err && errno != EEXIST) {
+				pr_debug("Failed to make perf_test dir: %s\n",
+					 strerror(errno));
+				ret = TEST_FAIL;
+				goto out;
+			}
+			if (bpf_object__pin(obj, PERF_TEST_BPF_PATH))
+				ret = TEST_FAIL;
+			if (rm_rf(PERF_TEST_BPF_PATH))
+				ret = TEST_FAIL;
+		}
+	}
+
+out:
+	bpf__clear();
+	return ret;
+}
+
+int test__bpf_subtest_get_nr(void)
+{
+	return (int)ARRAY_SIZE(bpf_testcase_table);
+}
+
+const char *test__bpf_subtest_get_desc(int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+		return NULL;
+	return bpf_testcase_table[i].desc;
+}
+
+static int check_env(void)
+{
+	int err;
+	unsigned int kver_int;
+	char license[] = "GPL";
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	err = fetch_kernel_version(&kver_int, NULL, 0);
+	if (err) {
+		pr_debug("Unable to get kernel version\n");
+		return err;
+	}
+
+	err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+			       sizeof(insns) / sizeof(insns[0]),
+			       license, kver_int, NULL, 0);
+	if (err < 0) {
+		pr_err("Missing basic BPF support, skip this test: %s\n",
+		       strerror(errno));
+		return err;
+	}
+	close(err);
+
+	return 0;
+}
+
+int test__bpf(struct test *test __maybe_unused, int i)
+{
+	int err;
+
+	if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+		return TEST_FAIL;
+
+	if (geteuid() != 0) {
+		pr_debug("Only root can run BPF test\n");
+		return TEST_SKIP;
+	}
+
+	if (check_env())
+		return TEST_SKIP;
+
+	err = __test__bpf(i);
+	return err;
+}
+
+#else
+int test__bpf_subtest_get_nr(void)
+{
+	return 0;
+}
+
+const char *test__bpf_subtest_get_desc(int i __maybe_unused)
+{
+	return NULL;
+}
+
+int test__bpf(struct test *test __maybe_unused, int i __maybe_unused)
+{
+	pr_debug("Skip BPF test because BPF support is not compiled\n");
+	return TEST_SKIP;
+}
+#endif
diff --git a/tests/builtin-test.c b/tests/builtin-test.c
new file mode 100644
index 0000000..cac8f88
--- /dev/null
+++ b/tests/builtin-test.c
@@ -0,0 +1,701 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-test.c
+ *
+ * Builtin regression testing command: ever growing number of sanity tests
+ */
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include "builtin.h"
+#include "hist.h"
+#include "intlist.h"
+#include "tests.h"
+#include "debug.h"
+#include "color.h"
+#include <subcmd/parse-options.h>
+#include "string2.h"
+#include "symbol.h"
+#include <linux/kernel.h>
+#include <subcmd/exec-cmd.h>
+
+static bool dont_fork;
+
+struct test __weak arch_tests[] = {
+	{
+		.func = NULL,
+	},
+};
+
+static struct test generic_tests[] = {
+	{
+		.desc = "vmlinux symtab matches kallsyms",
+		.func = test__vmlinux_matches_kallsyms,
+	},
+	{
+		.desc = "Detect openat syscall event",
+		.func = test__openat_syscall_event,
+	},
+	{
+		.desc = "Detect openat syscall event on all cpus",
+		.func = test__openat_syscall_event_on_all_cpus,
+	},
+	{
+		.desc = "Read samples using the mmap interface",
+		.func = test__basic_mmap,
+	},
+	{
+		.desc = "Test data source output",
+		.func = test__mem,
+	},
+	{
+		.desc = "Parse event definition strings",
+		.func = test__parse_events,
+	},
+	{
+		.desc = "Simple expression parser",
+		.func = test__expr,
+	},
+	{
+		.desc = "PERF_RECORD_* events & perf_sample fields",
+		.func = test__PERF_RECORD,
+	},
+	{
+		.desc = "Parse perf pmu format",
+		.func = test__pmu,
+	},
+	{
+		.desc = "DSO data read",
+		.func = test__dso_data,
+	},
+	{
+		.desc = "DSO data cache",
+		.func = test__dso_data_cache,
+	},
+	{
+		.desc = "DSO data reopen",
+		.func = test__dso_data_reopen,
+	},
+	{
+		.desc = "Roundtrip evsel->name",
+		.func = test__perf_evsel__roundtrip_name_test,
+	},
+	{
+		.desc = "Parse sched tracepoints fields",
+		.func = test__perf_evsel__tp_sched_test,
+	},
+	{
+		.desc = "syscalls:sys_enter_openat event fields",
+		.func = test__syscall_openat_tp_fields,
+	},
+	{
+		.desc = "Setup struct perf_event_attr",
+		.func = test__attr,
+	},
+	{
+		.desc = "Match and link multiple hists",
+		.func = test__hists_link,
+	},
+	{
+		.desc = "'import perf' in python",
+		.func = test__python_use,
+	},
+	{
+		.desc = "Breakpoint overflow signal handler",
+		.func = test__bp_signal,
+		.is_supported = test__bp_signal_is_supported,
+	},
+	{
+		.desc = "Breakpoint overflow sampling",
+		.func = test__bp_signal_overflow,
+		.is_supported = test__bp_signal_is_supported,
+	},
+	{
+		.desc = "Breakpoint accounting",
+		.func = test__bp_accounting,
+		.is_supported = test__bp_signal_is_supported,
+	},
+	{
+		.desc = "Number of exit events of a simple workload",
+		.func = test__task_exit,
+	},
+	{
+		.desc = "Software clock events period values",
+		.func = test__sw_clock_freq,
+	},
+	{
+		.desc = "Object code reading",
+		.func = test__code_reading,
+	},
+	{
+		.desc = "Sample parsing",
+		.func = test__sample_parsing,
+	},
+	{
+		.desc = "Use a dummy software event to keep tracking",
+		.func = test__keep_tracking,
+	},
+	{
+		.desc = "Parse with no sample_id_all bit set",
+		.func = test__parse_no_sample_id_all,
+	},
+	{
+		.desc = "Filter hist entries",
+		.func = test__hists_filter,
+	},
+	{
+		.desc = "Lookup mmap thread",
+		.func = test__mmap_thread_lookup,
+	},
+	{
+		.desc = "Share thread mg",
+		.func = test__thread_mg_share,
+	},
+	{
+		.desc = "Sort output of hist entries",
+		.func = test__hists_output,
+	},
+	{
+		.desc = "Cumulate child hist entries",
+		.func = test__hists_cumulate,
+	},
+	{
+		.desc = "Track with sched_switch",
+		.func = test__switch_tracking,
+	},
+	{
+		.desc = "Filter fds with revents mask in a fdarray",
+		.func = test__fdarray__filter,
+	},
+	{
+		.desc = "Add fd to a fdarray, making it autogrow",
+		.func = test__fdarray__add,
+	},
+	{
+		.desc = "kmod_path__parse",
+		.func = test__kmod_path__parse,
+	},
+	{
+		.desc = "Thread map",
+		.func = test__thread_map,
+	},
+	{
+		.desc = "LLVM search and compile",
+		.func = test__llvm,
+		.subtest = {
+			.skip_if_fail	= true,
+			.get_nr		= test__llvm_subtest_get_nr,
+			.get_desc	= test__llvm_subtest_get_desc,
+		},
+	},
+	{
+		.desc = "Session topology",
+		.func = test__session_topology,
+	},
+	{
+		.desc = "BPF filter",
+		.func = test__bpf,
+		.subtest = {
+			.skip_if_fail	= true,
+			.get_nr		= test__bpf_subtest_get_nr,
+			.get_desc	= test__bpf_subtest_get_desc,
+		},
+	},
+	{
+		.desc = "Synthesize thread map",
+		.func = test__thread_map_synthesize,
+	},
+	{
+		.desc = "Remove thread map",
+		.func = test__thread_map_remove,
+	},
+	{
+		.desc = "Synthesize cpu map",
+		.func = test__cpu_map_synthesize,
+	},
+	{
+		.desc = "Synthesize stat config",
+		.func = test__synthesize_stat_config,
+	},
+	{
+		.desc = "Synthesize stat",
+		.func = test__synthesize_stat,
+	},
+	{
+		.desc = "Synthesize stat round",
+		.func = test__synthesize_stat_round,
+	},
+	{
+		.desc = "Synthesize attr update",
+		.func = test__event_update,
+	},
+	{
+		.desc = "Event times",
+		.func = test__event_times,
+	},
+	{
+		.desc = "Read backward ring buffer",
+		.func = test__backward_ring_buffer,
+	},
+	{
+		.desc = "Print cpu map",
+		.func = test__cpu_map_print,
+	},
+	{
+		.desc = "Probe SDT events",
+		.func = test__sdt_event,
+	},
+	{
+		.desc = "is_printable_array",
+		.func = test__is_printable_array,
+	},
+	{
+		.desc = "Print bitmap",
+		.func = test__bitmap_print,
+	},
+	{
+		.desc = "perf hooks",
+		.func = test__perf_hooks,
+	},
+	{
+		.desc = "builtin clang support",
+		.func = test__clang,
+		.subtest = {
+			.skip_if_fail	= true,
+			.get_nr		= test__clang_subtest_get_nr,
+			.get_desc	= test__clang_subtest_get_desc,
+		}
+	},
+	{
+		.desc = "unit_number__scnprintf",
+		.func = test__unit_number__scnprint,
+	},
+	{
+		.desc = "mem2node",
+		.func = test__mem2node,
+	},
+	{
+		.func = NULL,
+	},
+};
+
+static struct test *tests[] = {
+	generic_tests,
+	arch_tests,
+};
+
+static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[])
+{
+	int i;
+
+	if (argc == 0)
+		return true;
+
+	for (i = 0; i < argc; ++i) {
+		char *end;
+		long nr = strtoul(argv[i], &end, 10);
+
+		if (*end == '\0') {
+			if (nr == curr + 1)
+				return true;
+			continue;
+		}
+
+		if (strcasestr(test->desc, argv[i]))
+			return true;
+	}
+
+	return false;
+}
+
+static int run_test(struct test *test, int subtest)
+{
+	int status, err = -1, child = dont_fork ? 0 : fork();
+	char sbuf[STRERR_BUFSIZE];
+
+	if (child < 0) {
+		pr_err("failed to fork test: %s\n",
+			str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -1;
+	}
+
+	if (!child) {
+		if (!dont_fork) {
+			pr_debug("test child forked, pid %d\n", getpid());
+
+			if (verbose <= 0) {
+				int nullfd = open("/dev/null", O_WRONLY);
+
+				if (nullfd >= 0) {
+					close(STDERR_FILENO);
+					close(STDOUT_FILENO);
+
+					dup2(nullfd, STDOUT_FILENO);
+					dup2(STDOUT_FILENO, STDERR_FILENO);
+					close(nullfd);
+				}
+			} else {
+				signal(SIGSEGV, sighandler_dump_stack);
+				signal(SIGFPE, sighandler_dump_stack);
+			}
+		}
+
+		err = test->func(test, subtest);
+		if (!dont_fork)
+			exit(err);
+	}
+
+	if (!dont_fork) {
+		wait(&status);
+
+		if (WIFEXITED(status)) {
+			err = (signed char)WEXITSTATUS(status);
+			pr_debug("test child finished with %d\n", err);
+		} else if (WIFSIGNALED(status)) {
+			err = -1;
+			pr_debug("test child interrupted\n");
+		}
+	}
+
+	return err;
+}
+
+#define for_each_test(j, t)	 				\
+	for (j = 0; j < ARRAY_SIZE(tests); j++)	\
+		for (t = &tests[j][0]; t->func; t++)
+
+static int test_and_print(struct test *t, bool force_skip, int subtest)
+{
+	int err;
+
+	if (!force_skip) {
+		pr_debug("\n--- start ---\n");
+		err = run_test(t, subtest);
+		pr_debug("---- end ----\n");
+	} else {
+		pr_debug("\n--- force skipped ---\n");
+		err = TEST_SKIP;
+	}
+
+	if (!t->subtest.get_nr)
+		pr_debug("%s:", t->desc);
+	else
+		pr_debug("%s subtest %d:", t->desc, subtest);
+
+	switch (err) {
+	case TEST_OK:
+		pr_info(" Ok\n");
+		break;
+	case TEST_SKIP:
+		color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+		break;
+	case TEST_FAIL:
+	default:
+		color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
+		break;
+	}
+
+	return err;
+}
+
+static const char *shell_test__description(char *description, size_t size,
+					   const char *path, const char *name)
+{
+	FILE *fp;
+	char filename[PATH_MAX];
+
+	path__join(filename, sizeof(filename), path, name);
+	fp = fopen(filename, "r");
+	if (!fp)
+		return NULL;
+
+	description = fgets(description, size, fp);
+	fclose(fp);
+
+	return description ? trim(description + 1) : NULL;
+}
+
+#define for_each_shell_test(dir, base, ent)	\
+	while ((ent = readdir(dir)) != NULL)	\
+		if (!is_directory(base, ent))
+
+static const char *shell_tests__dir(char *path, size_t size)
+{
+	const char *devel_dirs[] = { "./tools/perf/tests", "./tests", };
+        char *exec_path;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
+		struct stat st;
+		if (!lstat(devel_dirs[i], &st)) {
+			scnprintf(path, size, "%s/shell", devel_dirs[i]);
+			if (!lstat(devel_dirs[i], &st))
+				return path;
+		}
+	}
+
+        /* Then installed path. */
+        exec_path = get_argv_exec_path();
+        scnprintf(path, size, "%s/tests/shell", exec_path);
+	free(exec_path);
+	return path;
+}
+
+static int shell_tests__max_desc_width(void)
+{
+	DIR *dir;
+	struct dirent *ent;
+	char path_dir[PATH_MAX];
+	const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
+	int width = 0;
+
+	if (path == NULL)
+		return -1;
+
+	dir = opendir(path);
+	if (!dir)
+		return -1;
+
+	for_each_shell_test(dir, path, ent) {
+		char bf[256];
+		const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name);
+
+		if (desc) {
+			int len = strlen(desc);
+
+			if (width < len)
+				width = len;
+		}
+	}
+
+	closedir(dir);
+	return width;
+}
+
+struct shell_test {
+	const char *dir;
+	const char *file;
+};
+
+static int shell_test__run(struct test *test, int subdir __maybe_unused)
+{
+	int err;
+	char script[PATH_MAX];
+	struct shell_test *st = test->priv;
+
+	path__join(script, sizeof(script), st->dir, st->file);
+
+	err = system(script);
+	if (!err)
+		return TEST_OK;
+
+	return WEXITSTATUS(err) == 2 ? TEST_SKIP : TEST_FAIL;
+}
+
+static int run_shell_tests(int argc, const char *argv[], int i, int width)
+{
+	DIR *dir;
+	struct dirent *ent;
+	char path_dir[PATH_MAX];
+	struct shell_test st = {
+		.dir = shell_tests__dir(path_dir, sizeof(path_dir)),
+	};
+
+	if (st.dir == NULL)
+		return -1;
+
+	dir = opendir(st.dir);
+	if (!dir)
+		return -1;
+
+	for_each_shell_test(dir, st.dir, ent) {
+		int curr = i++;
+		char desc[256];
+		struct test test = {
+			.desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name),
+			.func = shell_test__run,
+			.priv = &st,
+		};
+
+		if (!perf_test__matches(&test, curr, argc, argv))
+			continue;
+
+		st.file = ent->d_name;
+		pr_info("%2d: %-*s:", i, width, test.desc);
+		test_and_print(&test, false, -1);
+	}
+
+	closedir(dir);
+	return 0;
+}
+
+static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
+{
+	struct test *t;
+	unsigned int j;
+	int i = 0;
+	int width = shell_tests__max_desc_width();
+
+	for_each_test(j, t) {
+		int len = strlen(t->desc);
+
+		if (width < len)
+			width = len;
+	}
+
+	for_each_test(j, t) {
+		int curr = i++, err;
+
+		if (!perf_test__matches(t, curr, argc, argv))
+			continue;
+
+		if (t->is_supported && !t->is_supported()) {
+			pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
+			continue;
+		}
+
+		pr_info("%2d: %-*s:", i, width, t->desc);
+
+		if (intlist__find(skiplist, i)) {
+			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
+			continue;
+		}
+
+		if (!t->subtest.get_nr) {
+			test_and_print(t, false, -1);
+		} else {
+			int subn = t->subtest.get_nr();
+			/*
+			 * minus 2 to align with normal testcases.
+			 * For subtest we print additional '.x' in number.
+			 * for example:
+			 *
+			 * 35: Test LLVM searching and compiling                        :
+			 * 35.1: Basic BPF llvm compiling test                          : Ok
+			 */
+			int subw = width > 2 ? width - 2 : width;
+			bool skip = false;
+			int subi;
+
+			if (subn <= 0) {
+				color_fprintf(stderr, PERF_COLOR_YELLOW,
+					      " Skip (not compiled in)\n");
+				continue;
+			}
+			pr_info("\n");
+
+			for (subi = 0; subi < subn; subi++) {
+				int len = strlen(t->subtest.get_desc(subi));
+
+				if (subw < len)
+					subw = len;
+			}
+
+			for (subi = 0; subi < subn; subi++) {
+				pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+					t->subtest.get_desc(subi));
+				err = test_and_print(t, skip, subi);
+				if (err != TEST_OK && t->subtest.skip_if_fail)
+					skip = true;
+			}
+		}
+	}
+
+	return run_shell_tests(argc, argv, i, width);
+}
+
+static int perf_test__list_shell(int argc, const char **argv, int i)
+{
+	DIR *dir;
+	struct dirent *ent;
+	char path_dir[PATH_MAX];
+	const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
+
+	if (path == NULL)
+		return -1;
+
+	dir = opendir(path);
+	if (!dir)
+		return -1;
+
+	for_each_shell_test(dir, path, ent) {
+		int curr = i++;
+		char bf[256];
+		struct test t = {
+			.desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
+		};
+
+		if (!perf_test__matches(&t, curr, argc, argv))
+			continue;
+
+		pr_info("%2d: %s\n", i, t.desc);
+	}
+
+	closedir(dir);
+	return 0;
+}
+
+static int perf_test__list(int argc, const char **argv)
+{
+	unsigned int j;
+	struct test *t;
+	int i = 0;
+
+	for_each_test(j, t) {
+		int curr = i++;
+
+		if (!perf_test__matches(t, curr, argc, argv) ||
+		    (t->is_supported && !t->is_supported()))
+			continue;
+
+		pr_info("%2d: %s\n", i, t->desc);
+	}
+
+	perf_test__list_shell(argc, argv, i);
+
+	return 0;
+}
+
+int cmd_test(int argc, const char **argv)
+{
+	const char *test_usage[] = {
+	"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
+	NULL,
+	};
+	const char *skip = NULL;
+	const struct option test_options[] = {
+	OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('F', "dont-fork", &dont_fork,
+		    "Do not fork for testcase"),
+	OPT_END()
+	};
+	const char * const test_subcommands[] = { "list", NULL };
+	struct intlist *skiplist = NULL;
+        int ret = hists__init();
+
+        if (ret < 0)
+                return ret;
+
+	argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
+	if (argc >= 1 && !strcmp(argv[0], "list"))
+		return perf_test__list(argc - 1, argv + 1);
+
+	symbol_conf.priv_size = sizeof(int);
+	symbol_conf.sort_by_name = true;
+	symbol_conf.try_vmlinux_path = true;
+
+	if (symbol__init(NULL) < 0)
+		return -1;
+
+	if (skip != NULL)
+		skiplist = intlist__new(skip);
+
+	return __cmd_test(argc, argv, skiplist);
+}
diff --git a/tests/clang.c b/tests/clang.c
new file mode 100644
index 0000000..f45fe11
--- /dev/null
+++ b/tests/clang.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include "c++/clang-c.h"
+#include <linux/kernel.h>
+
+static struct {
+	int (*func)(void);
+	const char *desc;
+} clang_testcase_table[] = {
+#ifdef HAVE_LIBCLANGLLVM_SUPPORT
+	{
+		.func = test__clang_to_IR,
+		.desc = "builtin clang compile C source to IR",
+	},
+	{
+		.func = test__clang_to_obj,
+		.desc = "builtin clang compile C source to ELF object",
+	},
+#endif
+};
+
+int test__clang_subtest_get_nr(void)
+{
+	return (int)ARRAY_SIZE(clang_testcase_table);
+}
+
+const char *test__clang_subtest_get_desc(int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
+		return NULL;
+	return clang_testcase_table[i].desc;
+}
+
+#ifndef HAVE_LIBCLANGLLVM_SUPPORT
+int test__clang(struct test *test __maybe_unused, int i __maybe_unused)
+{
+	return TEST_SKIP;
+}
+#else
+int test__clang(struct test *test __maybe_unused, int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
+		return TEST_FAIL;
+	return clang_testcase_table[i].func();
+}
+#endif
diff --git a/tests/code-reading.c b/tests/code-reading.c
new file mode 100644
index 0000000..9993635
--- /dev/null
+++ b/tests/code-reading.c
@@ -0,0 +1,739 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "machine.h"
+#include "event.h"
+#include "thread.h"
+
+#include "tests.h"
+
+#include "sane_ctype.h"
+
+#define BUFSZ	1024
+#define READLEN	128
+
+struct state {
+	u64 done[1024];
+	size_t done_cnt;
+};
+
+static unsigned int hex(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	return c - 'A' + 10;
+}
+
+static size_t read_objdump_chunk(const char **line, unsigned char **buf,
+				 size_t *buf_len)
+{
+	size_t bytes_read = 0;
+	unsigned char *chunk_start = *buf;
+
+	/* Read bytes */
+	while (*buf_len > 0) {
+		char c1, c2;
+
+		/* Get 2 hex digits */
+		c1 = *(*line)++;
+		if (!isxdigit(c1))
+			break;
+		c2 = *(*line)++;
+		if (!isxdigit(c2))
+			break;
+
+		/* Store byte and advance buf */
+		**buf = (hex(c1) << 4) | hex(c2);
+		(*buf)++;
+		(*buf_len)--;
+		bytes_read++;
+
+		/* End of chunk? */
+		if (isspace(**line))
+			break;
+	}
+
+	/*
+	 * objdump will display raw insn as LE if code endian
+	 * is LE and bytes_per_chunk > 1. In that case reverse
+	 * the chunk we just read.
+	 *
+	 * see disassemble_bytes() at binutils/objdump.c for details
+	 * how objdump chooses display endian)
+	 */
+	if (bytes_read > 1 && !bigendian()) {
+		unsigned char *chunk_end = chunk_start + bytes_read - 1;
+		unsigned char tmp;
+
+		while (chunk_start < chunk_end) {
+			tmp = *chunk_start;
+			*chunk_start = *chunk_end;
+			*chunk_end = tmp;
+			chunk_start++;
+			chunk_end--;
+		}
+	}
+
+	return bytes_read;
+}
+
+static size_t read_objdump_line(const char *line, unsigned char *buf,
+				size_t buf_len)
+{
+	const char *p;
+	size_t ret, bytes_read = 0;
+
+	/* Skip to a colon */
+	p = strchr(line, ':');
+	if (!p)
+		return 0;
+	p++;
+
+	/* Skip initial spaces */
+	while (*p) {
+		if (!isspace(*p))
+			break;
+		p++;
+	}
+
+	do {
+		ret = read_objdump_chunk(&p, &buf, &buf_len);
+		bytes_read += ret;
+		p++;
+	} while (ret > 0);
+
+	/* return number of successfully read bytes */
+	return bytes_read;
+}
+
+static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr)
+{
+	char *line = NULL;
+	size_t line_len, off_last = 0;
+	ssize_t ret;
+	int err = 0;
+	u64 addr, last_addr = start_addr;
+
+	while (off_last < *len) {
+		size_t off, read_bytes, written_bytes;
+		unsigned char tmp[BUFSZ];
+
+		ret = getline(&line, &line_len, f);
+		if (feof(f))
+			break;
+		if (ret < 0) {
+			pr_debug("getline failed\n");
+			err = -1;
+			break;
+		}
+
+		/* read objdump data into temporary buffer */
+		read_bytes = read_objdump_line(line, tmp, sizeof(tmp));
+		if (!read_bytes)
+			continue;
+
+		if (sscanf(line, "%"PRIx64, &addr) != 1)
+			continue;
+		if (addr < last_addr) {
+			pr_debug("addr going backwards, read beyond section?\n");
+			break;
+		}
+		last_addr = addr;
+
+		/* copy it from temporary buffer to 'buf' according
+		 * to address on current objdump line */
+		off = addr - start_addr;
+		if (off >= *len)
+			break;
+		written_bytes = MIN(read_bytes, *len - off);
+		memcpy(buf + off, tmp, written_bytes);
+		off_last = off + written_bytes;
+	}
+
+	/* len returns number of bytes that could not be read */
+	*len -= off_last;
+
+	free(line);
+
+	return err;
+}
+
+static int read_via_objdump(const char *filename, u64 addr, void *buf,
+			    size_t len)
+{
+	char cmd[PATH_MAX * 2];
+	const char *fmt;
+	FILE *f;
+	int ret;
+
+	fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s";
+	ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len,
+		       filename);
+	if (ret <= 0 || (size_t)ret >= sizeof(cmd))
+		return -1;
+
+	pr_debug("Objdump command is: %s\n", cmd);
+
+	/* Ignore objdump errors */
+	strcat(cmd, " 2>/dev/null");
+
+	f = popen(cmd, "r");
+	if (!f) {
+		pr_debug("popen failed\n");
+		return -1;
+	}
+
+	ret = read_objdump_output(f, buf, &len, addr);
+	if (len) {
+		pr_debug("objdump read too few bytes: %zd\n", len);
+		if (!ret)
+			ret = len;
+	}
+
+	pclose(f);
+
+	return ret;
+}
+
+static void dump_buf(unsigned char *buf, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		pr_debug("0x%02x ", buf[i]);
+		if (i % 16 == 15)
+			pr_debug("\n");
+	}
+	pr_debug("\n");
+}
+
+static int read_object_code(u64 addr, size_t len, u8 cpumode,
+			    struct thread *thread, struct state *state)
+{
+	struct addr_location al;
+	unsigned char buf1[BUFSZ];
+	unsigned char buf2[BUFSZ];
+	size_t ret_len;
+	u64 objdump_addr;
+	const char *objdump_name;
+	char decomp_name[KMOD_DECOMP_LEN];
+	int ret;
+
+	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
+	if (!al.map || !al.map->dso) {
+		if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
+			pr_debug("Hypervisor address can not be resolved - skipping\n");
+			return 0;
+		}
+
+		pr_debug("thread__find_addr_map failed\n");
+		return -1;
+	}
+
+	pr_debug("File is: %s\n", al.map->dso->long_name);
+
+	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(al.map->dso)) {
+		pr_debug("Unexpected kernel address - skipping\n");
+		return 0;
+	}
+
+	pr_debug("On file address is: %#"PRIx64"\n", al.addr);
+
+	if (len > BUFSZ)
+		len = BUFSZ;
+
+	/* Do not go off the map */
+	if (addr + len > al.map->end)
+		len = al.map->end - addr;
+
+	/* Read the object code using perf */
+	ret_len = dso__data_read_offset(al.map->dso, thread->mg->machine,
+					al.addr, buf1, len);
+	if (ret_len != len) {
+		pr_debug("dso__data_read_offset failed\n");
+		return -1;
+	}
+
+	/*
+	 * Converting addresses for use by objdump requires more information.
+	 * map__load() does that.  See map__rip_2objdump() for details.
+	 */
+	if (map__load(al.map))
+		return -1;
+
+	/* objdump struggles with kcore - try each map only once */
+	if (dso__is_kcore(al.map->dso)) {
+		size_t d;
+
+		for (d = 0; d < state->done_cnt; d++) {
+			if (state->done[d] == al.map->start) {
+				pr_debug("kcore map tested already");
+				pr_debug(" - skipping\n");
+				return 0;
+			}
+		}
+		if (state->done_cnt >= ARRAY_SIZE(state->done)) {
+			pr_debug("Too many kcore maps - skipping\n");
+			return 0;
+		}
+		state->done[state->done_cnt++] = al.map->start;
+	}
+
+	objdump_name = al.map->dso->long_name;
+	if (dso__needs_decompress(al.map->dso)) {
+		if (dso__decompress_kmodule_path(al.map->dso, objdump_name,
+						 decomp_name,
+						 sizeof(decomp_name)) < 0) {
+			pr_debug("decompression failed\n");
+			return -1;
+		}
+
+		objdump_name = decomp_name;
+	}
+
+	/* Read the object code using objdump */
+	objdump_addr = map__rip_2objdump(al.map, al.addr);
+	ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
+
+	if (dso__needs_decompress(al.map->dso))
+		unlink(objdump_name);
+
+	if (ret > 0) {
+		/*
+		 * The kernel maps are inaccurate - assume objdump is right in
+		 * that case.
+		 */
+		if (cpumode == PERF_RECORD_MISC_KERNEL ||
+		    cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
+			len -= ret;
+			if (len) {
+				pr_debug("Reducing len to %zu\n", len);
+			} else if (dso__is_kcore(al.map->dso)) {
+				/*
+				 * objdump cannot handle very large segments
+				 * that may be found in kcore.
+				 */
+				pr_debug("objdump failed for kcore");
+				pr_debug(" - skipping\n");
+				return 0;
+			} else {
+				return -1;
+			}
+		}
+	}
+	if (ret < 0) {
+		pr_debug("read_via_objdump failed\n");
+		return -1;
+	}
+
+	/* The results should be identical */
+	if (memcmp(buf1, buf2, len)) {
+		pr_debug("Bytes read differ from those read by objdump\n");
+		pr_debug("buf1 (dso):\n");
+		dump_buf(buf1, len);
+		pr_debug("buf2 (objdump):\n");
+		dump_buf(buf2, len);
+		return -1;
+	}
+	pr_debug("Bytes read match those read by objdump\n");
+
+	return 0;
+}
+
+static int process_sample_event(struct machine *machine,
+				struct perf_evlist *evlist,
+				union perf_event *event, struct state *state)
+{
+	struct perf_sample sample;
+	struct thread *thread;
+	int ret;
+
+	if (perf_evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("perf_evlist__parse_sample failed\n");
+		return -1;
+	}
+
+	thread = machine__findnew_thread(machine, sample.pid, sample.tid);
+	if (!thread) {
+		pr_debug("machine__findnew_thread failed\n");
+		return -1;
+	}
+
+	ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
+	thread__put(thread);
+	return ret;
+}
+
+static int process_event(struct machine *machine, struct perf_evlist *evlist,
+			 union perf_event *event, struct state *state)
+{
+	if (event->header.type == PERF_RECORD_SAMPLE)
+		return process_sample_event(machine, evlist, event, state);
+
+	if (event->header.type == PERF_RECORD_THROTTLE ||
+	    event->header.type == PERF_RECORD_UNTHROTTLE)
+		return 0;
+
+	if (event->header.type < PERF_RECORD_MAX) {
+		int ret;
+
+		ret = machine__process_event(machine, event, NULL);
+		if (ret < 0)
+			pr_debug("machine__process_event failed, event type %u\n",
+				 event->header.type);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int process_events(struct machine *machine, struct perf_evlist *evlist,
+			  struct state *state)
+{
+	union perf_event *event;
+	struct perf_mmap *md;
+	int i, ret;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md)) != NULL) {
+			ret = process_event(machine, evlist, event, state);
+			perf_mmap__consume(md);
+			if (ret < 0)
+				return ret;
+		}
+		perf_mmap__read_done(md);
+	}
+	return 0;
+}
+
+static int comp(const void *a, const void *b)
+{
+	return *(int *)a - *(int *)b;
+}
+
+static void do_sort_something(void)
+{
+	int buf[40960], i;
+
+	for (i = 0; i < (int)ARRAY_SIZE(buf); i++)
+		buf[i] = ARRAY_SIZE(buf) - i - 1;
+
+	qsort(buf, ARRAY_SIZE(buf), sizeof(int), comp);
+
+	for (i = 0; i < (int)ARRAY_SIZE(buf); i++) {
+		if (buf[i] != i) {
+			pr_debug("qsort failed\n");
+			break;
+		}
+	}
+}
+
+static void sort_something(void)
+{
+	int i;
+
+	for (i = 0; i < 10; i++)
+		do_sort_something();
+}
+
+static void syscall_something(void)
+{
+	int pipefd[2];
+	int i;
+
+	for (i = 0; i < 1000; i++) {
+		if (pipe(pipefd) < 0) {
+			pr_debug("pipe failed\n");
+			break;
+		}
+		close(pipefd[1]);
+		close(pipefd[0]);
+	}
+}
+
+static void fs_something(void)
+{
+	const char *test_file_name = "temp-perf-code-reading-test-file--";
+	FILE *f;
+	int i;
+
+	for (i = 0; i < 1000; i++) {
+		f = fopen(test_file_name, "w+");
+		if (f) {
+			fclose(f);
+			unlink(test_file_name);
+		}
+	}
+}
+
+static const char *do_determine_event(bool excl_kernel)
+{
+	const char *event = excl_kernel ? "cycles:u" : "cycles";
+
+#ifdef __s390x__
+	char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
+	unsigned int family;
+	int ret, cpum_cf_a;
+
+	if (get_cpuid(cpuid, sizeof(cpuid)))
+		goto out_clocks;
+	ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
+		     model, cpum_cf_v, &cpum_cf_a);
+	if (ret != 5)		 /* Not available */
+		goto out_clocks;
+	if (excl_kernel && (cpum_cf_a & 4))
+		return event;
+	if (!excl_kernel && (cpum_cf_a & 2))
+		return event;
+
+	/* Fall through: missing authorization */
+out_clocks:
+	event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
+
+#endif
+	return event;
+}
+
+static void do_something(void)
+{
+	fs_something();
+
+	sort_something();
+
+	syscall_something();
+}
+
+enum {
+	TEST_CODE_READING_OK,
+	TEST_CODE_READING_NO_VMLINUX,
+	TEST_CODE_READING_NO_KCORE,
+	TEST_CODE_READING_NO_ACCESS,
+	TEST_CODE_READING_NO_KERNEL_OBJ,
+};
+
+static int do_test_code_reading(bool try_kcore)
+{
+	struct machine *machine;
+	struct thread *thread;
+	struct record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 500,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+	};
+	struct state state = {
+		.done_cnt = 0,
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int err = -1, ret;
+	pid_t pid;
+	struct map *map;
+	bool have_vmlinux, have_kcore, excl_kernel = false;
+
+	pid = getpid();
+
+	machine = machine__new_host();
+
+	ret = machine__create_kernel_maps(machine);
+	if (ret < 0) {
+		pr_debug("machine__create_kernel_maps failed\n");
+		goto out_err;
+	}
+
+	/* Force the use of kallsyms instead of vmlinux to try kcore */
+	if (try_kcore)
+		symbol_conf.kallsyms_name = "/proc/kallsyms";
+
+	/* Load kernel map */
+	map = machine__kernel_map(machine);
+	ret = map__load(map);
+	if (ret < 0) {
+		pr_debug("map__load failed\n");
+		goto out_err;
+	}
+	have_vmlinux = dso__is_vmlinux(map->dso);
+	have_kcore = dso__is_kcore(map->dso);
+
+	/* 2nd time through we just try kcore */
+	if (try_kcore && !have_kcore)
+		return TEST_CODE_READING_NO_KCORE;
+
+	/* No point getting kernel events if there is no kernel object */
+	if (!have_vmlinux && !have_kcore)
+		excl_kernel = true;
+
+	threads = thread_map__new_by_tid(pid);
+	if (!threads) {
+		pr_debug("thread_map__new_by_tid failed\n");
+		goto out_err;
+	}
+
+	ret = perf_event__synthesize_thread_map(NULL, threads,
+						perf_event__process, machine, false, 500);
+	if (ret < 0) {
+		pr_debug("perf_event__synthesize_thread_map failed\n");
+		goto out_err;
+	}
+
+	thread = machine__findnew_thread(machine, pid, pid);
+	if (!thread) {
+		pr_debug("machine__findnew_thread failed\n");
+		goto out_put;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus) {
+		pr_debug("cpu_map__new failed\n");
+		goto out_put;
+	}
+
+	while (1) {
+		const char *str;
+
+		evlist = perf_evlist__new();
+		if (!evlist) {
+			pr_debug("perf_evlist__new failed\n");
+			goto out_put;
+		}
+
+		perf_evlist__set_maps(evlist, cpus, threads);
+
+		str = do_determine_event(excl_kernel);
+		pr_debug("Parsing event '%s'\n", str);
+		ret = parse_events(evlist, str, NULL);
+		if (ret < 0) {
+			pr_debug("parse_events failed\n");
+			goto out_put;
+		}
+
+		perf_evlist__config(evlist, &opts, NULL);
+
+		evsel = perf_evlist__first(evlist);
+
+		evsel->attr.comm = 1;
+		evsel->attr.disabled = 1;
+		evsel->attr.enable_on_exec = 0;
+
+		ret = perf_evlist__open(evlist);
+		if (ret < 0) {
+			if (!excl_kernel) {
+				excl_kernel = true;
+				/*
+				 * Both cpus and threads are now owned by evlist
+				 * and will be freed by following perf_evlist__set_maps
+				 * call. Getting refference to keep them alive.
+				 */
+				cpu_map__get(cpus);
+				thread_map__get(threads);
+				perf_evlist__set_maps(evlist, NULL, NULL);
+				perf_evlist__delete(evlist);
+				evlist = NULL;
+				continue;
+			}
+
+			if (verbose > 0) {
+				char errbuf[512];
+				perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
+			}
+
+			goto out_put;
+		}
+		break;
+	}
+
+	ret = perf_evlist__mmap(evlist, UINT_MAX);
+	if (ret < 0) {
+		pr_debug("perf_evlist__mmap failed\n");
+		goto out_put;
+	}
+
+	perf_evlist__enable(evlist);
+
+	do_something();
+
+	perf_evlist__disable(evlist);
+
+	ret = process_events(machine, evlist, &state);
+	if (ret < 0)
+		goto out_put;
+
+	if (!have_vmlinux && !have_kcore && !try_kcore)
+		err = TEST_CODE_READING_NO_KERNEL_OBJ;
+	else if (!have_vmlinux && !try_kcore)
+		err = TEST_CODE_READING_NO_VMLINUX;
+	else if (excl_kernel)
+		err = TEST_CODE_READING_NO_ACCESS;
+	else
+		err = TEST_CODE_READING_OK;
+out_put:
+	thread__put(thread);
+out_err:
+
+	if (evlist) {
+		perf_evlist__delete(evlist);
+	} else {
+		cpu_map__put(cpus);
+		thread_map__put(threads);
+	}
+	machine__delete_threads(machine);
+	machine__delete(machine);
+
+	return err;
+}
+
+int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int ret;
+
+	ret = do_test_code_reading(false);
+	if (!ret)
+		ret = do_test_code_reading(true);
+
+	switch (ret) {
+	case TEST_CODE_READING_OK:
+		return 0;
+	case TEST_CODE_READING_NO_VMLINUX:
+		pr_debug("no vmlinux\n");
+		return 0;
+	case TEST_CODE_READING_NO_KCORE:
+		pr_debug("no kcore\n");
+		return 0;
+	case TEST_CODE_READING_NO_ACCESS:
+		pr_debug("no access\n");
+		return 0;
+	case TEST_CODE_READING_NO_KERNEL_OBJ:
+		pr_debug("no kernel obj\n");
+		return 0;
+	default:
+		return -1;
+	};
+}
diff --git a/tests/cpumap.c b/tests/cpumap.c
new file mode 100644
index 0000000..e78b897
--- /dev/null
+++ b/tests/cpumap.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include <stdio.h>
+#include "cpumap.h"
+#include "event.h"
+#include <string.h>
+#include <linux/bitops.h>
+#include "debug.h"
+
+struct machine;
+
+static int process_event_mask(struct perf_tool *tool __maybe_unused,
+			 union perf_event *event,
+			 struct perf_sample *sample __maybe_unused,
+			 struct machine *machine __maybe_unused)
+{
+	struct cpu_map_event *map_event = &event->cpu_map;
+	struct cpu_map_mask *mask;
+	struct cpu_map_data *data;
+	struct cpu_map *map;
+	int i;
+
+	data = &map_event->data;
+
+	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK);
+
+	mask = (struct cpu_map_mask *)data->data;
+
+	TEST_ASSERT_VAL("wrong nr",   mask->nr == 1);
+
+	for (i = 0; i < 20; i++) {
+		TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask));
+	}
+
+	map = cpu_map__new_data(data);
+	TEST_ASSERT_VAL("wrong nr",  map->nr == 20);
+
+	for (i = 0; i < 20; i++) {
+		TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+	}
+
+	cpu_map__put(map);
+	return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+			 union perf_event *event,
+			 struct perf_sample *sample __maybe_unused,
+			 struct machine *machine __maybe_unused)
+{
+	struct cpu_map_event *map_event = &event->cpu_map;
+	struct cpu_map_entries *cpus;
+	struct cpu_map_data *data;
+	struct cpu_map *map;
+
+	data = &map_event->data;
+
+	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS);
+
+	cpus = (struct cpu_map_entries *)data->data;
+
+	TEST_ASSERT_VAL("wrong nr",   cpus->nr == 2);
+	TEST_ASSERT_VAL("wrong cpu",  cpus->cpu[0] == 1);
+	TEST_ASSERT_VAL("wrong cpu",  cpus->cpu[1] == 256);
+
+	map = cpu_map__new_data(data);
+	TEST_ASSERT_VAL("wrong nr",  map->nr == 2);
+	TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
+	TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+	TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
+	cpu_map__put(map);
+	return 0;
+}
+
+
+int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct cpu_map *cpus;
+
+	/* This one is better stores in mask. */
+	cpus = cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
+
+	cpu_map__put(cpus);
+
+	/* This one is better stores in cpu values. */
+	cpus = cpu_map__new("1,256");
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
+
+	cpu_map__put(cpus);
+	return 0;
+}
+
+static int cpu_map_print(const char *str)
+{
+	struct cpu_map *map = cpu_map__new(str);
+	char buf[100];
+
+	if (!map)
+		return -1;
+
+	cpu_map__snprint(map, buf, sizeof(buf));
+	return !strcmp(buf, str);
+}
+
+int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3,5,7,9,11,13,15,17,19,21-40"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("2-5"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37"));
+	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40"));
+	return 0;
+}
diff --git a/tests/dso-data.c b/tests/dso-data.c
new file mode 100644
index 0000000..7f6c520
--- /dev/null
+++ b/tests/dso-data.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <api/fs/fs.h>
+#include "util.h"
+#include "machine.h"
+#include "symbol.h"
+#include "tests.h"
+#include "debug.h"
+
+static char *test_file(int size)
+{
+#define TEMPL "/tmp/perf-test-XXXXXX"
+	static char buf_templ[sizeof(TEMPL)];
+	char *templ = buf_templ;
+	int fd, i;
+	unsigned char *buf;
+
+	strcpy(buf_templ, TEMPL);
+#undef TEMPL
+
+	fd = mkstemp(templ);
+	if (fd < 0) {
+		perror("mkstemp failed");
+		return NULL;
+	}
+
+	buf = malloc(size);
+	if (!buf) {
+		close(fd);
+		return NULL;
+	}
+
+	for (i = 0; i < size; i++)
+		buf[i] = (unsigned char) ((int) i % 10);
+
+	if (size != write(fd, buf, size))
+		templ = NULL;
+
+	free(buf);
+	close(fd);
+	return templ;
+}
+
+#define TEST_FILE_SIZE (DSO__DATA_CACHE_SIZE * 20)
+
+struct test_data_offset {
+	off_t offset;
+	u8 data[10];
+	int size;
+};
+
+struct test_data_offset offsets[] = {
+	/* Fill first cache page. */
+	{
+		.offset = 10,
+		.data   = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+		.size   = 10,
+	},
+	/* Read first cache page. */
+	{
+		.offset = 10,
+		.data   = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+		.size   = 10,
+	},
+	/* Fill cache boundary pages. */
+	{
+		.offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10,
+		.data   = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+		.size   = 10,
+	},
+	/* Read cache boundary pages. */
+	{
+		.offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10,
+		.data   = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+		.size   = 10,
+	},
+	/* Fill final cache page. */
+	{
+		.offset = TEST_FILE_SIZE - 10,
+		.data   = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+		.size   = 10,
+	},
+	/* Read final cache page. */
+	{
+		.offset = TEST_FILE_SIZE - 10,
+		.data   = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+		.size   = 10,
+	},
+	/* Read final cache page. */
+	{
+		.offset = TEST_FILE_SIZE - 3,
+		.data   = { 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 },
+		.size   = 3,
+	},
+};
+
+/* move it from util/dso.c for compatibility */
+static int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+	int fd = dso__data_get_fd(dso, machine);
+
+	if (fd >= 0)
+		dso__data_put_fd(dso);
+
+	return fd;
+}
+
+int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct machine machine;
+	struct dso *dso;
+	char *file = test_file(TEST_FILE_SIZE);
+	size_t i;
+
+	TEST_ASSERT_VAL("No test file", file);
+
+	memset(&machine, 0, sizeof(machine));
+
+	dso = dso__new((const char *)file);
+
+	TEST_ASSERT_VAL("Failed to access to dso",
+			dso__data_fd(dso, &machine) >= 0);
+
+	/* Basic 10 bytes tests. */
+	for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+		struct test_data_offset *data = &offsets[i];
+		ssize_t size;
+		u8 buf[10];
+
+		memset(buf, 0, 10);
+		size = dso__data_read_offset(dso, &machine, data->offset,
+				     buf, 10);
+
+		TEST_ASSERT_VAL("Wrong size", size == data->size);
+		TEST_ASSERT_VAL("Wrong data", !memcmp(buf, data->data, 10));
+	}
+
+	/* Read cross multiple cache pages. */
+	{
+		ssize_t size;
+		int c;
+		u8 *buf;
+
+		buf = malloc(TEST_FILE_SIZE);
+		TEST_ASSERT_VAL("ENOMEM\n", buf);
+
+		/* First iteration to fill caches, second one to read them. */
+		for (c = 0; c < 2; c++) {
+			memset(buf, 0, TEST_FILE_SIZE);
+			size = dso__data_read_offset(dso, &machine, 10,
+						     buf, TEST_FILE_SIZE);
+
+			TEST_ASSERT_VAL("Wrong size",
+				size == (TEST_FILE_SIZE - 10));
+
+			for (i = 0; i < (size_t)size; i++)
+				TEST_ASSERT_VAL("Wrong data",
+					buf[i] == (i % 10));
+		}
+
+		free(buf);
+	}
+
+	dso__put(dso);
+	unlink(file);
+	return 0;
+}
+
+static long open_files_cnt(void)
+{
+	char path[PATH_MAX];
+	struct dirent *dent;
+	DIR *dir;
+	long nr = 0;
+
+	scnprintf(path, PATH_MAX, "%s/self/fd", procfs__mountpoint());
+	pr_debug("fd path: %s\n", path);
+
+	dir = opendir(path);
+	TEST_ASSERT_VAL("failed to open fd directory", dir);
+
+	while ((dent = readdir(dir)) != NULL) {
+		if (!strcmp(dent->d_name, ".") ||
+		    !strcmp(dent->d_name, ".."))
+			continue;
+
+		nr++;
+	}
+
+	closedir(dir);
+	return nr - 1;
+}
+
+static struct dso **dsos;
+
+static int dsos__create(int cnt, int size)
+{
+	int i;
+
+	dsos = malloc(sizeof(*dsos) * cnt);
+	TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
+
+	for (i = 0; i < cnt; i++) {
+		char *file;
+
+		file = test_file(size);
+		TEST_ASSERT_VAL("failed to get dso file", file);
+
+		dsos[i] = dso__new(file);
+		TEST_ASSERT_VAL("failed to get dso", dsos[i]);
+	}
+
+	return 0;
+}
+
+static void dsos__delete(int cnt)
+{
+	int i;
+
+	for (i = 0; i < cnt; i++) {
+		struct dso *dso = dsos[i];
+
+		unlink(dso->name);
+		dso__put(dso);
+	}
+
+	free(dsos);
+}
+
+static int set_fd_limit(int n)
+{
+	struct rlimit rlim;
+
+	if (getrlimit(RLIMIT_NOFILE, &rlim))
+		return -1;
+
+	pr_debug("file limit %ld, new %d\n", (long) rlim.rlim_cur, n);
+
+	rlim.rlim_cur = n;
+	return setrlimit(RLIMIT_NOFILE, &rlim);
+}
+
+int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct machine machine;
+	long nr_end, nr = open_files_cnt();
+	int dso_cnt, limit, i, fd;
+
+	/* Rest the internal dso open counter limit. */
+	reset_fd_limit();
+
+	memset(&machine, 0, sizeof(machine));
+
+	/* set as system limit */
+	limit = nr * 4;
+	TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit));
+
+	/* and this is now our dso open FDs limit */
+	dso_cnt = limit / 2;
+	TEST_ASSERT_VAL("failed to create dsos\n",
+		!dsos__create(dso_cnt, TEST_FILE_SIZE));
+
+	for (i = 0; i < (dso_cnt - 1); i++) {
+		struct dso *dso = dsos[i];
+
+		/*
+		 * Open dsos via dso__data_fd(), it opens the data
+		 * file and keep it open (unless open file limit).
+		 */
+		fd = dso__data_fd(dso, &machine);
+		TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+		if (i % 2) {
+			#define BUFSIZE 10
+			u8 buf[BUFSIZE];
+			ssize_t n;
+
+			n = dso__data_read_offset(dso, &machine, 0, buf, BUFSIZE);
+			TEST_ASSERT_VAL("failed to read dso", n == BUFSIZE);
+		}
+	}
+
+	/* verify the first one is already open */
+	TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1);
+
+	/* open +1 dso to reach the allowed limit */
+	fd = dso__data_fd(dsos[i], &machine);
+	TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+	/* should force the first one to be closed */
+	TEST_ASSERT_VAL("failed to close dsos[0]", dsos[0]->data.fd == -1);
+
+	/* cleanup everything */
+	dsos__delete(dso_cnt);
+
+	/* Make sure we did not leak any file descriptor. */
+	nr_end = open_files_cnt();
+	pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
+	TEST_ASSERT_VAL("failed leadking files", nr == nr_end);
+	return 0;
+}
+
+int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct machine machine;
+	long nr_end, nr = open_files_cnt();
+	int fd, fd_extra;
+
+#define dso_0 (dsos[0])
+#define dso_1 (dsos[1])
+#define dso_2 (dsos[2])
+
+	/* Rest the internal dso open counter limit. */
+	reset_fd_limit();
+
+	memset(&machine, 0, sizeof(machine));
+
+	/*
+	 * Test scenario:
+	 * - create 3 dso objects
+	 * - set process file descriptor limit to current
+	 *   files count + 3
+	 * - test that the first dso gets closed when we
+	 *   reach the files count limit
+	 */
+
+	/* Make sure we are able to open 3 fds anyway */
+	TEST_ASSERT_VAL("failed to set file limit",
+			!set_fd_limit((nr + 3)));
+
+	TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE));
+
+	/* open dso_0 */
+	fd = dso__data_fd(dso_0, &machine);
+	TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+	/* open dso_1 */
+	fd = dso__data_fd(dso_1, &machine);
+	TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+	/*
+	 * open extra file descriptor and we just
+	 * reached the files count limit
+	 */
+	fd_extra = open("/dev/null", O_RDONLY);
+	TEST_ASSERT_VAL("failed to open extra fd", fd_extra > 0);
+
+	/* open dso_2 */
+	fd = dso__data_fd(dso_2, &machine);
+	TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+	/*
+	 * dso_0 should get closed, because we reached
+	 * the file descriptor limit
+	 */
+	TEST_ASSERT_VAL("failed to close dso_0", dso_0->data.fd == -1);
+
+	/* open dso_0 */
+	fd = dso__data_fd(dso_0, &machine);
+	TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+	/*
+	 * dso_1 should get closed, because we reached
+	 * the file descriptor limit
+	 */
+	TEST_ASSERT_VAL("failed to close dso_1", dso_1->data.fd == -1);
+
+	/* cleanup everything */
+	close(fd_extra);
+	dsos__delete(3);
+
+	/* Make sure we did not leak any file descriptor. */
+	nr_end = open_files_cnt();
+	pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
+	TEST_ASSERT_VAL("failed leadking files", nr == nr_end);
+	return 0;
+}
diff --git a/tests/dwarf-unwind.c b/tests/dwarf-unwind.c
new file mode 100644
index 0000000..2f00806
--- /dev/null
+++ b/tests/dwarf-unwind.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include "tests.h"
+#include "debug.h"
+#include "machine.h"
+#include "event.h"
+#include "../util/unwind.h"
+#include "perf_regs.h"
+#include "map.h"
+#include "thread.h"
+#include "callchain.h"
+
+#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
+#include "arch-tests.h"
+#endif
+
+/* For bsearch. We try to unwind functions in shared object. */
+#include <stdlib.h>
+
+static int mmap_handler(struct perf_tool *tool __maybe_unused,
+			union perf_event *event,
+			struct perf_sample *sample,
+			struct machine *machine)
+{
+	return machine__process_mmap2_event(machine, event, sample);
+}
+
+static int init_live_machine(struct machine *machine)
+{
+	union perf_event event;
+	pid_t pid = getpid();
+
+	return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
+						  mmap_handler, machine, true, 500);
+}
+
+/*
+ * We need to keep these functions global, despite the
+ * fact that they are used only locally in this object,
+ * in order to keep them around even if the binary is
+ * stripped. If they are gone, the unwind check for
+ * symbol fails.
+ */
+int test_dwarf_unwind__thread(struct thread *thread);
+int test_dwarf_unwind__compare(void *p1, void *p2);
+int test_dwarf_unwind__krava_3(struct thread *thread);
+int test_dwarf_unwind__krava_2(struct thread *thread);
+int test_dwarf_unwind__krava_1(struct thread *thread);
+
+#define MAX_STACK 8
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+	unsigned long *cnt = (unsigned long *) arg;
+	char *symbol = entry->sym ? entry->sym->name : NULL;
+	static const char *funcs[MAX_STACK] = {
+		"test__arch_unwind_sample",
+		"test_dwarf_unwind__thread",
+		"test_dwarf_unwind__compare",
+		"bsearch",
+		"test_dwarf_unwind__krava_3",
+		"test_dwarf_unwind__krava_2",
+		"test_dwarf_unwind__krava_1",
+		"test__dwarf_unwind"
+	};
+	/*
+	 * The funcs[MAX_STACK] array index, based on the
+	 * callchain order setup.
+	 */
+	int idx = callchain_param.order == ORDER_CALLER ?
+		  MAX_STACK - *cnt - 1 : *cnt;
+
+	if (*cnt >= MAX_STACK) {
+		pr_debug("failed: crossed the max stack value %d\n", MAX_STACK);
+		return -1;
+	}
+
+	if (!symbol) {
+		pr_debug("failed: got unresolved address 0x%" PRIx64 "\n",
+			 entry->ip);
+		return -1;
+	}
+
+	(*cnt)++;
+	pr_debug("got: %s 0x%" PRIx64 ", expecting %s\n",
+		 symbol, entry->ip, funcs[idx]);
+	return strcmp((const char *) symbol, funcs[idx]);
+}
+
+noinline int test_dwarf_unwind__thread(struct thread *thread)
+{
+	struct perf_sample sample;
+	unsigned long cnt = 0;
+	int err = -1;
+
+	memset(&sample, 0, sizeof(sample));
+
+	if (test__arch_unwind_sample(&sample, thread)) {
+		pr_debug("failed to get unwind sample\n");
+		goto out;
+	}
+
+	err = unwind__get_entries(unwind_entry, &cnt, thread,
+				  &sample, MAX_STACK);
+	if (err)
+		pr_debug("unwind failed\n");
+	else if (cnt != MAX_STACK) {
+		pr_debug("got wrong number of stack entries %lu != %d\n",
+			 cnt, MAX_STACK);
+		err = -1;
+	}
+
+ out:
+	free(sample.user_stack.data);
+	free(sample.user_regs.regs);
+	return err;
+}
+
+static int global_unwind_retval = -INT_MAX;
+
+noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+{
+	/* Any possible value should be 'thread' */
+	struct thread *thread = *(struct thread **)p1;
+
+	if (global_unwind_retval == -INT_MAX) {
+		/* Call unwinder twice for both callchain orders. */
+		callchain_param.order = ORDER_CALLER;
+
+		global_unwind_retval = test_dwarf_unwind__thread(thread);
+		if (!global_unwind_retval) {
+			callchain_param.order = ORDER_CALLEE;
+			global_unwind_retval = test_dwarf_unwind__thread(thread);
+		}
+	}
+
+	return p1 - p2;
+}
+
+noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+{
+	struct thread *array[2] = {thread, thread};
+	void *fp = &bsearch;
+	/*
+	 * make _bsearch a volatile function pointer to
+	 * prevent potential optimization, which may expand
+	 * bsearch and call compare directly from this function,
+	 * instead of libc shared object.
+	 */
+	void *(*volatile _bsearch)(void *, void *, size_t,
+			size_t, int (*)(void *, void *));
+
+	_bsearch = fp;
+	_bsearch(array, &thread, 2, sizeof(struct thread **),
+		 test_dwarf_unwind__compare);
+	return global_unwind_retval;
+}
+
+noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+{
+	return test_dwarf_unwind__krava_3(thread);
+}
+
+noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+{
+	return test_dwarf_unwind__krava_2(thread);
+}
+
+int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct machine *machine;
+	struct thread *thread;
+	int err = -1;
+
+	machine = machine__new_host();
+	if (!machine) {
+		pr_err("Could not get machine\n");
+		return -1;
+	}
+
+	if (machine__create_kernel_maps(machine)) {
+		pr_err("Failed to create kernel maps\n");
+		return -1;
+	}
+
+	callchain_param.record_mode = CALLCHAIN_DWARF;
+	dwarf_callchain_users = true;
+
+	if (init_live_machine(machine)) {
+		pr_err("Could not init machine\n");
+		goto out;
+	}
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	thread = machine__find_thread(machine, getpid(), getpid());
+	if (!thread) {
+		pr_err("Could not get thread\n");
+		goto out;
+	}
+
+	err = test_dwarf_unwind__krava_1(thread);
+	thread__put(thread);
+
+ out:
+	machine__delete_threads(machine);
+	machine__delete(machine);
+	return err;
+}
diff --git a/tests/event-times.c b/tests/event-times.c
new file mode 100644
index 0000000..1a2686f
--- /dev/null
+++ b/tests/event-times.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "tests.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+#include "debug.h"
+#include "thread_map.h"
+#include "target.h"
+
+static int attach__enable_on_exec(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct target target = {
+		.uid = UINT_MAX,
+	};
+	const char *argv[] = { "true", NULL, };
+	char sbuf[STRERR_BUFSIZE];
+	int err;
+
+	pr_debug("attaching to spawned child, enable on exec\n");
+
+	err = perf_evlist__create_maps(evlist, &target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		return err;
+	}
+
+	err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+	if (err < 0) {
+		pr_debug("Couldn't run the workload!\n");
+		return err;
+	}
+
+	evsel->attr.enable_on_exec = 1;
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		return err;
+	}
+
+	return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__enable_on_exec(struct perf_evlist *evlist)
+{
+	waitpid(evlist->workload.pid, NULL, 0);
+	return 0;
+}
+
+static int attach__current_disabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct thread_map *threads;
+	int err;
+
+	pr_debug("attaching to current thread as disabled\n");
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	evsel->attr.disabled = 1;
+
+	err = perf_evsel__open_per_thread(evsel, threads);
+	if (err) {
+		pr_debug("Failed to open event cpu-clock:u\n");
+		return err;
+	}
+
+	thread_map__put(threads);
+	return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int attach__current_enabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct thread_map *threads;
+	int err;
+
+	pr_debug("attaching to current thread as enabled\n");
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("failed to call thread_map__new\n");
+		return -1;
+	}
+
+	err = perf_evsel__open_per_thread(evsel, threads);
+
+	thread_map__put(threads);
+	return err == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__disable(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+
+	return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_disabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct cpu_map *cpus;
+	int err;
+
+	pr_debug("attaching to CPU 0 as enabled\n");
+
+	cpus = cpu_map__new("0");
+	if (cpus == NULL) {
+		pr_debug("failed to call cpu_map__new\n");
+		return -1;
+	}
+
+	evsel->attr.disabled = 1;
+
+	err = perf_evsel__open_per_cpu(evsel, cpus);
+	if (err) {
+		if (err == -EACCES)
+			return TEST_SKIP;
+
+		pr_debug("Failed to open event cpu-clock:u\n");
+		return err;
+	}
+
+	cpu_map__put(cpus);
+	return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_enabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct cpu_map *cpus;
+	int err;
+
+	pr_debug("attaching to CPU 0 as enabled\n");
+
+	cpus = cpu_map__new("0");
+	if (cpus == NULL) {
+		pr_debug("failed to call cpu_map__new\n");
+		return -1;
+	}
+
+	err = perf_evsel__open_per_cpu(evsel, cpus);
+	if (err == -EACCES)
+		return TEST_SKIP;
+
+	cpu_map__put(cpus);
+	return err ? TEST_FAIL : TEST_OK;
+}
+
+static int test_times(int (attach)(struct perf_evlist *),
+		      int (detach)(struct perf_evlist *))
+{
+	struct perf_counts_values count;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel;
+	int err = -1, i;
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("failed to create event list\n");
+		goto out_err;
+	}
+
+	err = parse_events(evlist, "cpu-clock:u", NULL);
+	if (err) {
+		pr_debug("failed to parse event cpu-clock:u\n");
+		goto out_err;
+	}
+
+	evsel = perf_evlist__last(evlist);
+	evsel->attr.read_format |=
+		PERF_FORMAT_TOTAL_TIME_ENABLED |
+		PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	err = attach(evlist);
+	if (err == TEST_SKIP) {
+		pr_debug("  SKIP  : not enough rights\n");
+		return err;
+	}
+
+	TEST_ASSERT_VAL("failed to attach", !err);
+
+	for (i = 0; i < 100000000; i++) { }
+
+	TEST_ASSERT_VAL("failed to detach", !detach(evlist));
+
+	perf_evsel__read(evsel, 0, 0, &count);
+
+	err = !(count.ena == count.run);
+
+	pr_debug("  %s: ena %" PRIu64", run %" PRIu64"\n",
+		 !err ? "OK    " : "FAILED",
+		 count.ena, count.run);
+
+out_err:
+	perf_evlist__delete(evlist);
+	return !err ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This test creates software event 'cpu-clock'
+ * attaches it in several ways (explained below)
+ * and checks that enabled and running times
+ * match.
+ */
+int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err, ret = 0;
+
+#define _T(attach, detach)			\
+	err = test_times(attach, detach);	\
+	if (err && (ret == TEST_OK || ret == TEST_SKIP))	\
+		ret = err;
+
+	/* attach on newly spawned process after exec */
+	_T(attach__enable_on_exec,   detach__enable_on_exec)
+	/* attach on current process as enabled */
+	_T(attach__current_enabled,  detach__disable)
+	/* attach on current process as disabled */
+	_T(attach__current_disabled, detach__disable)
+	/* attach on cpu as disabled */
+	_T(attach__cpu_disabled,     detach__disable)
+	/* attach on cpu as enabled */
+	_T(attach__cpu_enabled,      detach__disable)
+
+#undef _T
+	return ret;
+}
diff --git a/tests/event_update.c b/tests/event_update.c
new file mode 100644
index 0000000..f14dcd6
--- /dev/null
+++ b/tests/event_update.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "tests.h"
+#include "debug.h"
+
+static int process_event_unit(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct event_update_event *ev = (struct event_update_event *) event;
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT);
+	TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA"));
+	return 0;
+}
+
+static int process_event_scale(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine __maybe_unused)
+{
+	struct event_update_event *ev = (struct event_update_event *) event;
+	struct event_update_event_scale *ev_data;
+
+	ev_data = (struct event_update_event_scale *) ev->data;
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
+	TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
+	return 0;
+}
+
+struct event_name {
+	struct perf_tool tool;
+	const char *name;
+};
+
+static int process_event_name(struct perf_tool *tool,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct event_name *tmp = container_of(tool, struct event_name, tool);
+	struct event_update_event *ev = (struct event_update_event*) event;
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME);
+	TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name));
+	return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct event_update_event *ev = (struct event_update_event*) event;
+	struct event_update_event_cpus *ev_data;
+	struct cpu_map *map;
+
+	ev_data = (struct event_update_event_cpus*) ev->data;
+
+	map = cpu_map__new_data(&ev_data->cpus);
+
+	TEST_ASSERT_VAL("wrong id", ev->id == 123);
+	TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
+	TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
+	TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
+	TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
+	TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+	cpu_map__put(map);
+	return 0;
+}
+
+int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct event_name tmp;
+
+	evlist = perf_evlist__new_default();
+	TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+	evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("failed to allos ids",
+			!perf_evsel__alloc_id(evsel, 1, 1));
+
+	perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+
+	evsel->unit = strdup("KRAVA");
+
+	TEST_ASSERT_VAL("failed to synthesize attr update unit",
+			!perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit));
+
+	evsel->scale = 0.123;
+
+	TEST_ASSERT_VAL("failed to synthesize attr update scale",
+			!perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
+
+	tmp.name = perf_evsel__name(evsel);
+
+	TEST_ASSERT_VAL("failed to synthesize attr update name",
+			!perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
+
+	evsel->own_cpus = cpu_map__new("1,2,3");
+
+	TEST_ASSERT_VAL("failed to synthesize attr update cpus",
+			!perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus));
+
+	cpu_map__put(evsel->own_cpus);
+	return 0;
+}
diff --git a/tests/evsel-roundtrip-name.c b/tests/evsel-roundtrip-name.c
new file mode 100644
index 0000000..a104728
--- /dev/null
+++ b/tests/evsel-roundtrip-name.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "tests.h"
+#include "debug.h"
+#include <errno.h>
+#include <linux/kernel.h>
+
+static int perf_evsel__roundtrip_cache_name_test(void)
+{
+	char name[128];
+	int type, op, err = 0, ret = 0, i, idx;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+
+        if (evlist == NULL)
+                return -ENOMEM;
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				err = parse_events(evlist, name, NULL);
+				if (err)
+					ret = err;
+			}
+		}
+	}
+
+	idx = 0;
+	evsel = perf_evlist__first(evlist);
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				if (evsel->idx != idx)
+					continue;
+
+				++idx;
+
+				if (strcmp(perf_evsel__name(evsel), name)) {
+					pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+					ret = -1;
+				}
+
+				evsel = perf_evsel__next(evsel);
+			}
+		}
+	}
+
+	perf_evlist__delete(evlist);
+	return ret;
+}
+
+static int __perf_evsel__name_array_test(const char *names[], int nr_names)
+{
+	int i, err;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+
+        if (evlist == NULL)
+                return -ENOMEM;
+
+	for (i = 0; i < nr_names; ++i) {
+		err = parse_events(evlist, names[i], NULL);
+		if (err) {
+			pr_debug("failed to parse event '%s', err %d\n",
+				 names[i], err);
+			goto out_delete_evlist;
+		}
+	}
+
+	err = 0;
+	evlist__for_each_entry(evlist, evsel) {
+		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+			--err;
+			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+		}
+	}
+
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return err;
+}
+
+#define perf_evsel__name_array_test(names) \
+	__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
+
+int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = 0, ret = 0;
+
+	err = perf_evsel__name_array_test(perf_evsel__hw_names);
+	if (err)
+		ret = err;
+
+	err = __perf_evsel__name_array_test(perf_evsel__sw_names,
+					    PERF_COUNT_SW_DUMMY + 1);
+	if (err)
+		ret = err;
+
+	err = perf_evsel__roundtrip_cache_name_test();
+	if (err)
+		ret = err;
+
+	return ret;
+}
diff --git a/tests/evsel-tp-sched.c b/tests/evsel-tp-sched.c
new file mode 100644
index 0000000..699561f
--- /dev/null
+++ b/tests/evsel-tp-sched.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/err.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "tests.h"
+#include "debug.h"
+
+static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
+				  int size, bool should_be_signed)
+{
+	struct format_field *field = perf_evsel__field(evsel, name);
+	int is_signed;
+	int ret = 0;
+
+	if (field == NULL) {
+		pr_debug("%s: \"%s\" field not found!\n", evsel->name, name);
+		return -1;
+	}
+
+	is_signed = !!(field->flags | FIELD_IS_SIGNED);
+	if (should_be_signed && !is_signed) {
+		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
+			 evsel->name, name, is_signed, should_be_signed);
+		ret = -1;
+	}
+
+	if (field->size != size) {
+		pr_debug("%s: \"%s\" size (%d) should be %d!\n",
+			 evsel->name, name, field->size, size);
+		ret = -1;
+	}
+
+	return ret;
+}
+
+int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
+	int ret = 0;
+
+	if (IS_ERR(evsel)) {
+		pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+		return -1;
+	}
+
+	if (perf_evsel__test_field(evsel, "prev_comm", 16, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "next_comm", 16, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "next_pid", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "next_prio", 4, true))
+		ret = -1;
+
+	perf_evsel__delete(evsel);
+
+	evsel = perf_evsel__newtp("sched", "sched_wakeup");
+
+	if (IS_ERR(evsel)) {
+		pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+		return -1;
+	}
+
+	if (perf_evsel__test_field(evsel, "comm", 16, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "pid", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "prio", 4, true))
+		ret = -1;
+
+	if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
+		ret = -1;
+
+	return ret;
+}
diff --git a/tests/expr.c b/tests/expr.c
new file mode 100644
index 0000000..01f0706
--- /dev/null
+++ b/tests/expr.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/debug.h"
+#include "util/expr.h"
+#include "tests.h"
+#include <stdlib.h>
+
+static int test(struct parse_ctx *ctx, const char *e, double val2)
+{
+	double val;
+
+	if (expr__parse(&val, ctx, &e))
+		TEST_ASSERT_VAL("parse test failed", 0);
+	TEST_ASSERT_VAL("unexpected value", val == val2);
+	return 0;
+}
+
+int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+	const char *p;
+	const char **other;
+	double val;
+	int ret;
+	struct parse_ctx ctx;
+	int num_other;
+
+	expr__ctx_init(&ctx);
+	expr__add_id(&ctx, "FOO", 1);
+	expr__add_id(&ctx, "BAR", 2);
+
+	ret = test(&ctx, "1+1", 2);
+	ret |= test(&ctx, "FOO+BAR", 3);
+	ret |= test(&ctx, "(BAR/2)%2", 1);
+	ret |= test(&ctx, "1 - -4",  5);
+	ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4",  5);
+	ret |= test(&ctx, "1-1 | 1", 1);
+	ret |= test(&ctx, "1-1 & 1", 0);
+	ret |= test(&ctx, "min(1,2) + 1", 2);
+	ret |= test(&ctx, "max(1,2) + 1", 3);
+	ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
+
+	if (ret)
+		return ret;
+
+	p = "FOO/0";
+	ret = expr__parse(&val, &ctx, &p);
+	TEST_ASSERT_VAL("division by zero", ret == 1);
+
+	p = "BAR/";
+	ret = expr__parse(&val, &ctx, &p);
+	TEST_ASSERT_VAL("missing operand", ret == 1);
+
+	TEST_ASSERT_VAL("find other",
+			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
+	TEST_ASSERT_VAL("find other", num_other == 3);
+	TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
+	TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
+	TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
+	TEST_ASSERT_VAL("find other", other[3] == NULL);
+	free((void *)other);
+
+	return 0;
+}
diff --git a/tests/fdarray.c b/tests/fdarray.c
new file mode 100644
index 0000000..c7c81c4
--- /dev/null
+++ b/tests/fdarray.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <api/fd/array.h>
+#include <poll.h>
+#include "util/debug.h"
+#include "tests/tests.h"
+
+static void fdarray__init_revents(struct fdarray *fda, short revents)
+{
+	int fd;
+
+	fda->nr = fda->nr_alloc;
+
+	for (fd = 0; fd < fda->nr; ++fd) {
+		fda->entries[fd].fd	 = fda->nr - fd;
+		fda->entries[fd].revents = revents;
+	}
+}
+
+static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE *fp)
+{
+	int printed = 0;
+
+	if (verbose <= 0)
+		return 0;
+
+	printed += fprintf(fp, "\n%s: ", prefix);
+	return printed + fdarray__fprintf(fda, fp);
+}
+
+int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+	struct fdarray *fda = fdarray__new(5, 5);
+
+	if (fda == NULL) {
+		pr_debug("\nfdarray__new() failed!");
+		goto out;
+	}
+
+	fdarray__init_revents(fda, POLLIN);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+	if (nr_fds != fda->nr_alloc) {
+		pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
+			 nr_fds, fda->nr_alloc);
+		goto out_delete;
+	}
+
+	fdarray__init_revents(fda, POLLHUP);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+	if (nr_fds != 0) {
+		pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
+			 nr_fds, fda->nr_alloc);
+		goto out_delete;
+	}
+
+	fdarray__init_revents(fda, POLLHUP);
+	fda->entries[2].revents = POLLIN;
+	expected_fd[0] = fda->entries[2].fd;
+
+	pr_debug("\nfiltering all but fda->entries[2]:");
+	fdarray__fprintf_prefix(fda, "before", stderr);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+	fdarray__fprintf_prefix(fda, " after", stderr);
+	if (nr_fds != 1) {
+		pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
+		goto out_delete;
+	}
+
+	if (fda->entries[0].fd != expected_fd[0]) {
+		pr_debug("\nfda->entries[0].fd=%d != %d\n",
+			 fda->entries[0].fd, expected_fd[0]);
+		goto out_delete;
+	}
+
+	fdarray__init_revents(fda, POLLHUP);
+	fda->entries[0].revents = POLLIN;
+	expected_fd[0] = fda->entries[0].fd;
+	fda->entries[3].revents = POLLIN;
+	expected_fd[1] = fda->entries[3].fd;
+
+	pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
+	fdarray__fprintf_prefix(fda, "before", stderr);
+	nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+	fdarray__fprintf_prefix(fda, " after", stderr);
+	if (nr_fds != 2) {
+		pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
+			 nr_fds);
+		goto out_delete;
+	}
+
+	for (fd = 0; fd < 2; ++fd) {
+		if (fda->entries[fd].fd != expected_fd[fd]) {
+			pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
+				 fda->entries[fd].fd, expected_fd[fd]);
+			goto out_delete;
+		}
+	}
+
+	pr_debug("\n");
+
+	err = 0;
+out_delete:
+	fdarray__delete(fda);
+out:
+	return err;
+}
+
+int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = TEST_FAIL;
+	struct fdarray *fda = fdarray__new(2, 2);
+
+	if (fda == NULL) {
+		pr_debug("\nfdarray__new() failed!");
+		goto out;
+	}
+
+#define FDA_CHECK(_idx, _fd, _revents)					   \
+	if (fda->entries[_idx].fd != _fd) {				   \
+		pr_debug("\n%d: fda->entries[%d](%d) != %d!",		   \
+			 __LINE__, _idx, fda->entries[1].fd, _fd);	   \
+		goto out_delete;					   \
+	}								   \
+	if (fda->entries[_idx].events != (_revents)) {			   \
+		pr_debug("\n%d: fda->entries[%d].revents(%d) != %d!",	   \
+			 __LINE__, _idx, fda->entries[_idx].fd, _revents); \
+		goto out_delete;					   \
+	}
+
+#define FDA_ADD(_idx, _fd, _revents, _nr)				   \
+	if (fdarray__add(fda, _fd, _revents) < 0) {			   \
+		pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!",	   \
+			 __LINE__,_fd, _revents);			   \
+		goto out_delete;					   \
+	}								   \
+	if (fda->nr != _nr) {						   \
+		pr_debug("\n%d: fdarray__add(fda, %d, %d)=%d != %d",	   \
+			 __LINE__,_fd, _revents, fda->nr, _nr);		   \
+		goto out_delete;					   \
+	}								   \
+	FDA_CHECK(_idx, _fd, _revents)
+
+	FDA_ADD(0, 1, POLLIN, 1);
+	FDA_ADD(1, 2, POLLERR, 2);
+
+	fdarray__fprintf_prefix(fda, "before growing array", stderr);
+
+	FDA_ADD(2, 35, POLLHUP, 3);
+
+	if (fda->entries == NULL) {
+		pr_debug("\nfdarray__add(fda, 35, POLLHUP) should have allocated fda->pollfd!");
+		goto out_delete;
+	}
+
+	fdarray__fprintf_prefix(fda, "after 3rd add", stderr);
+
+	FDA_ADD(3, 88, POLLIN | POLLOUT, 4);
+
+	fdarray__fprintf_prefix(fda, "after 4th add", stderr);
+
+	FDA_CHECK(0, 1, POLLIN);
+	FDA_CHECK(1, 2, POLLERR);
+	FDA_CHECK(2, 35, POLLHUP);
+	FDA_CHECK(3, 88, POLLIN | POLLOUT);
+
+#undef FDA_ADD
+#undef FDA_CHECK
+
+	pr_debug("\n");
+
+	err = 0;
+out_delete:
+	fdarray__delete(fda);
+out:
+	return err;
+}
diff --git a/tests/hists_common.c b/tests/hists_common.c
new file mode 100644
index 0000000..f7c5b61
--- /dev/null
+++ b/tests/hists_common.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+static struct {
+	u32 pid;
+	const char *comm;
+} fake_threads[] = {
+	{ FAKE_PID_PERF1, "perf" },
+	{ FAKE_PID_PERF2, "perf" },
+	{ FAKE_PID_BASH,  "bash" },
+};
+
+static struct {
+	u32 pid;
+	u64 start;
+	const char *filename;
+} fake_mmap_info[] = {
+	{ FAKE_PID_PERF1, FAKE_MAP_PERF,   "perf" },
+	{ FAKE_PID_PERF1, FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
+	{ FAKE_PID_PERF2, FAKE_MAP_PERF,   "perf" },
+	{ FAKE_PID_PERF2, FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
+	{ FAKE_PID_BASH,  FAKE_MAP_BASH,   "bash" },
+	{ FAKE_PID_BASH,  FAKE_MAP_LIBC,   "libc" },
+	{ FAKE_PID_BASH,  FAKE_MAP_KERNEL, "[kernel]" },
+};
+
+struct fake_sym {
+	u64 start;
+	u64 length;
+	const char *name;
+};
+
+static struct fake_sym perf_syms[] = {
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
+};
+
+static struct fake_sym bash_syms[] = {
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
+};
+
+static struct fake_sym libc_syms[] = {
+	{ 700, 100, "malloc" },
+	{ 800, 100, "free" },
+	{ 900, 100, "realloc" },
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
+};
+
+static struct fake_sym kernel_syms[] = {
+	{ FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
+	{ FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
+	{ FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
+};
+
+static struct {
+	const char *dso_name;
+	struct fake_sym *syms;
+	size_t nr_syms;
+} fake_symbols[] = {
+	{ "perf", perf_syms, ARRAY_SIZE(perf_syms) },
+	{ "bash", bash_syms, ARRAY_SIZE(bash_syms) },
+	{ "libc", libc_syms, ARRAY_SIZE(libc_syms) },
+	{ "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
+};
+
+struct machine *setup_fake_machine(struct machines *machines)
+{
+	struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
+	size_t i;
+
+	if (machine == NULL) {
+		pr_debug("Not enough memory for machine setup\n");
+		return NULL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
+		struct thread *thread;
+
+		thread = machine__findnew_thread(machine, fake_threads[i].pid,
+						 fake_threads[i].pid);
+		if (thread == NULL)
+			goto out;
+
+		thread__set_comm(thread, fake_threads[i].comm, 0);
+		thread__put(thread);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+		struct perf_sample sample = {
+			.cpumode = PERF_RECORD_MISC_USER,
+		};
+		union perf_event fake_mmap_event = {
+			.mmap = {
+				.pid = fake_mmap_info[i].pid,
+				.tid = fake_mmap_info[i].pid,
+				.start = fake_mmap_info[i].start,
+				.len = FAKE_MAP_LENGTH,
+				.pgoff = 0ULL,
+			},
+		};
+
+		strcpy(fake_mmap_event.mmap.filename,
+		       fake_mmap_info[i].filename);
+
+		machine__process_mmap_event(machine, &fake_mmap_event, &sample);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
+		size_t k;
+		struct dso *dso;
+
+		dso = machine__findnew_dso(machine, fake_symbols[i].dso_name);
+		if (dso == NULL)
+			goto out;
+
+		/* emulate dso__load() */
+		dso__set_loaded(dso, MAP__FUNCTION);
+
+		for (k = 0; k < fake_symbols[i].nr_syms; k++) {
+			struct symbol *sym;
+			struct fake_sym *fsym = &fake_symbols[i].syms[k];
+
+			sym = symbol__new(fsym->start, fsym->length,
+					  STB_GLOBAL, fsym->name);
+			if (sym == NULL) {
+				dso__put(dso);
+				goto out;
+			}
+
+			symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
+		}
+
+		dso__put(dso);
+	}
+
+	return machine;
+
+out:
+	pr_debug("Not enough memory for machine setup\n");
+	machine__delete_threads(machine);
+	return NULL;
+}
+
+void print_hists_in(struct hists *hists)
+{
+	int i = 0;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	pr_info("----- %s --------\n", __func__);
+	node = rb_first(root);
+	while (node) {
+		struct hist_entry *he;
+
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+
+		if (!he->filtered) {
+			pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
+				i, thread__comm_str(he->thread),
+				he->ms.map->dso->short_name,
+				he->ms.sym->name, he->stat.period);
+		}
+
+		i++;
+		node = rb_next(node);
+	}
+}
+
+void print_hists_out(struct hists *hists)
+{
+	int i = 0;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	root = &hists->entries;
+
+	pr_info("----- %s --------\n", __func__);
+	node = rb_first(root);
+	while (node) {
+		struct hist_entry *he;
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+
+		if (!he->filtered) {
+			pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
+				i, thread__comm_str(he->thread), he->thread->tid,
+				he->ms.map->dso->short_name,
+				he->ms.sym->name, he->stat.period,
+				he->stat_acc ? he->stat_acc->period : 0);
+		}
+
+		i++;
+		node = rb_next(node);
+	}
+}
diff --git a/tests/hists_common.h b/tests/hists_common.h
new file mode 100644
index 0000000..a2de0ff
--- /dev/null
+++ b/tests/hists_common.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TESTS__HISTS_COMMON_H__
+#define __PERF_TESTS__HISTS_COMMON_H__
+
+struct machine;
+struct machines;
+
+#define FAKE_PID_PERF1  100
+#define FAKE_PID_PERF2  200
+#define FAKE_PID_BASH   300
+
+#define FAKE_MAP_PERF    0x400000
+#define FAKE_MAP_BASH    0x400000
+#define FAKE_MAP_LIBC    0x500000
+#define FAKE_MAP_KERNEL  0xf00000
+#define FAKE_MAP_LENGTH  0x100000
+
+#define FAKE_SYM_OFFSET1  700
+#define FAKE_SYM_OFFSET2  800
+#define FAKE_SYM_OFFSET3  900
+#define FAKE_SYM_LENGTH   100
+
+#define FAKE_IP_PERF_MAIN  FAKE_MAP_PERF + FAKE_SYM_OFFSET1
+#define FAKE_IP_PERF_RUN_COMMAND  FAKE_MAP_PERF + FAKE_SYM_OFFSET2
+#define FAKE_IP_PERF_CMD_RECORD  FAKE_MAP_PERF + FAKE_SYM_OFFSET3
+#define FAKE_IP_BASH_MAIN  FAKE_MAP_BASH + FAKE_SYM_OFFSET1
+#define FAKE_IP_BASH_XMALLOC  FAKE_MAP_BASH + FAKE_SYM_OFFSET2
+#define FAKE_IP_BASH_XFREE  FAKE_MAP_BASH + FAKE_SYM_OFFSET3
+#define FAKE_IP_LIBC_MALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
+#define FAKE_IP_LIBC_FREE  FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
+#define FAKE_IP_LIBC_REALLOC  FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
+#define FAKE_IP_KERNEL_SCHEDULE  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
+#define FAKE_IP_KERNEL_PAGE_FAULT  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
+#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN  FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
+
+/*
+ * The setup_fake_machine() provides a test environment which consists
+ * of 3 processes that have 3 mappings and in turn, have 3 symbols
+ * respectively.  See below table:
+ *
+ * Command:  Pid  Shared Object               Symbol
+ * .............  .............  ...................
+ *    perf:  100           perf  main
+ *    perf:  100           perf  run_command
+ *    perf:  100           perf  cmd_record
+ *    perf:  100           libc  malloc
+ *    perf:  100           libc  free
+ *    perf:  100           libc  realloc
+ *    perf:  100       [kernel]  schedule
+ *    perf:  100       [kernel]  page_fault
+ *    perf:  100       [kernel]  sys_perf_event_open
+ *    perf:  200           perf  main
+ *    perf:  200           perf  run_command
+ *    perf:  200           perf  cmd_record
+ *    perf:  200           libc  malloc
+ *    perf:  200           libc  free
+ *    perf:  200           libc  realloc
+ *    perf:  200       [kernel]  schedule
+ *    perf:  200       [kernel]  page_fault
+ *    perf:  200       [kernel]  sys_perf_event_open
+ *    bash:  300           bash  main
+ *    bash:  300           bash  xmalloc
+ *    bash:  300           bash  xfree
+ *    bash:  300           libc  malloc
+ *    bash:  300           libc  free
+ *    bash:  300           libc  realloc
+ *    bash:  300       [kernel]  schedule
+ *    bash:  300       [kernel]  page_fault
+ *    bash:  300       [kernel]  sys_perf_event_open
+ */
+struct machine *setup_fake_machine(struct machines *machines);
+
+void print_hists_in(struct hists *hists);
+void print_hists_out(struct hists *hists);
+
+#endif /* __PERF_TESTS__HISTS_COMMON_H__ */
diff --git a/tests/hists_cumulate.c b/tests/hists_cumulate.c
new file mode 100644
index 0000000..65fe02b
--- /dev/null
+++ b/tests/hists_cumulate.c
@@ -0,0 +1,737 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+struct sample {
+	u32 pid;
+	u64 ip;
+	struct thread *thread;
+	struct map *map;
+	struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+	/* perf [kernel] schedule() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [perf]   cmd_record() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+	/* perf [libc]   malloc() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+	/* perf [libc]   free() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [kernel] page_fault() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+	/* bash [bash]   main() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
+	/* bash [bash]   xmalloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
+	/* bash [kernel] page_fault() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+/*
+ * Will be casted to struct ip_callchain which has all 64 bit entries
+ * of nr and ips[].
+ */
+static u64 fake_callchains[][10] = {
+	/*   schedule => run_command => main */
+	{ 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   main  */
+	{ 1, FAKE_IP_PERF_MAIN, },
+	/*   cmd_record => run_command => main */
+	{ 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   malloc => cmd_record => run_command => main */
+	{ 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+	     FAKE_IP_PERF_MAIN, },
+	/*   free => cmd_record => run_command => main */
+	{ 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+	     FAKE_IP_PERF_MAIN, },
+	/*   main */
+	{ 1, FAKE_IP_PERF_MAIN, },
+	/*   page_fault => sys_perf_event_open => run_command => main */
+	{ 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
+	     FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+	/*   main */
+	{ 1, FAKE_IP_BASH_MAIN, },
+	/*   xmalloc => malloc => xmalloc => malloc => xmalloc => main */
+	{ 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
+	     FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
+	/*   page_fault => malloc => main */
+	{ 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+	struct addr_location al;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	struct perf_sample sample = { .period = 1000, };
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+		struct hist_entry_iter iter = {
+			.evsel = evsel,
+			.sample	= &sample,
+			.hide_unresolved = false,
+		};
+
+		if (symbol_conf.cumulate_callchain)
+			iter.ops = &hist_iter_cumulative;
+		else
+			iter.ops = &hist_iter_normal;
+
+		sample.cpumode = PERF_RECORD_MISC_USER;
+		sample.pid = fake_samples[i].pid;
+		sample.tid = fake_samples[i].pid;
+		sample.ip = fake_samples[i].ip;
+		sample.callchain = (struct ip_callchain *)fake_callchains[i];
+
+		if (machine__resolve(machine, &al, &sample) < 0)
+			goto out;
+
+		if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
+					 NULL) < 0) {
+			addr_location__put(&al);
+			goto out;
+		}
+
+		fake_samples[i].thread = al.thread;
+		fake_samples[i].map = al.map;
+		fake_samples[i].sym = al.sym;
+	}
+
+	return TEST_OK;
+
+out:
+	pr_debug("Not enough memory for adding a hist entry\n");
+	return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+	struct hist_entry *he;
+	struct rb_root *root_in;
+	struct rb_root *root_out;
+	struct rb_node *node;
+
+	if (hists__has(hists, need_collapse))
+		root_in = &hists->entries_collapsed;
+	else
+		root_in = hists->entries_in;
+
+	root_out = &hists->entries;
+
+	while (!RB_EMPTY_ROOT(root_out)) {
+		node = rb_first(root_out);
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		rb_erase(node, root_out);
+		rb_erase(&he->rb_node_in, root_in);
+		hist_entry__delete(he);
+	}
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he)  (thread__comm_str(he->thread))
+#define DSO(he)   (he->ms.map->dso->short_name)
+#define SYM(he)   (he->ms.sym->name)
+#define CPU(he)   (he->cpu)
+#define PID(he)   (he->thread->tid)
+#define DEPTH(he) (he->callchain->max_depth)
+#define CDSO(cl)  (cl->ms.map->dso->short_name)
+#define CSYM(cl)  (cl->ms.sym->name)
+
+struct result {
+	u64 children;
+	u64 self;
+	const char *comm;
+	const char *dso;
+	const char *sym;
+};
+
+struct callchain_result {
+	u64 nr;
+	struct {
+		const char *dso;
+		const char *sym;
+	} node[10];
+};
+
+static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
+		   struct callchain_result *expected_callchain, size_t nr_callchain)
+{
+	char buf[32];
+	size_t i, c;
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+	struct callchain_node *cnode;
+	struct callchain_list *clist;
+
+	/*
+	 * adding and deleting hist entries must be done outside of this
+	 * function since TEST_ASSERT_VAL() returns in case of failure.
+	 */
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(hists_to_evsel(hists), NULL);
+
+	if (verbose > 2) {
+		pr_info("use callchain: %d, cumulate callchain: %d\n",
+			symbol_conf.use_callchain,
+			symbol_conf.cumulate_callchain);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	for (node = rb_first(root), i = 0;
+	     node && (he = rb_entry(node, struct hist_entry, rb_node));
+	     node = rb_next(node), i++) {
+		scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
+
+		TEST_ASSERT_VAL("Incorrect number of hist entry",
+				i < nr_expected);
+		TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
+				!strcmp(COMM(he), expected[i].comm) &&
+				!strcmp(DSO(he), expected[i].dso) &&
+				!strcmp(SYM(he), expected[i].sym));
+
+		if (symbol_conf.cumulate_callchain)
+			TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
+
+		if (!symbol_conf.use_callchain)
+			continue;
+
+		/* check callchain entries */
+		root = &he->callchain->node.rb_root;
+
+		TEST_ASSERT_VAL("callchains expected", !RB_EMPTY_ROOT(root));
+		cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
+
+		c = 0;
+		list_for_each_entry(clist, &cnode->val, list) {
+			scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
+
+			TEST_ASSERT_VAL("Incorrect number of callchain entry",
+					c < expected_callchain[i].nr);
+			TEST_ASSERT_VAL(buf,
+				!strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
+				!strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
+			c++;
+		}
+		/* TODO: handle multiple child nodes properly */
+		TEST_ASSERT_VAL("Incorrect number of callchain entry",
+				c <= expected_callchain[i].nr);
+	}
+	TEST_ASSERT_VAL("Incorrect number of hist entry",
+			i == nr_expected);
+	TEST_ASSERT_VAL("Incorrect number of callchain entry",
+			!symbol_conf.use_callchain || nr_expected == nr_callchain);
+	return 0;
+}
+
+/* NO callchain + NO children */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%     bash  bash           [.] main
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%     perf  libc           [.] free
+	 *   10.00%     perf  libc           [.] malloc
+	 *   10.00%     perf  perf           [.] cmd_record
+	 */
+	struct result expected[] = {
+		{ 0, 2000, "perf", "perf",     "main" },
+		{ 0, 1000, "bash", "[kernel]", "page_fault" },
+		{ 0, 1000, "bash", "bash",     "main" },
+		{ 0, 1000, "bash", "bash",     "xmalloc" },
+		{ 0, 1000, "perf", "[kernel]", "page_fault" },
+		{ 0, 1000, "perf", "[kernel]", "schedule" },
+		{ 0, 1000, "perf", "libc",     "free" },
+		{ 0, 1000, "perf", "libc",     "malloc" },
+		{ 0, 1000, "perf", "perf",     "cmd_record" },
+	};
+
+	symbol_conf.use_callchain = false;
+	symbol_conf.cumulate_callchain = false;
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+
+	setup_sorting(NULL);
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* callcain + NO children */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  malloc
+	 *                  main
+	 *
+	 *   10.00%     bash  bash           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *              |
+	 *              --- xmalloc
+	 *                  malloc
+	 *                  xmalloc     <--- NOTE: there's a cycle
+	 *                  malloc
+	 *                  xmalloc
+	 *                  main
+	 *
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *              |
+	 *              --- schedule
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  libc           [.] free
+	 *              |
+	 *              --- free
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%     perf  perf           [.] cmd_record
+	 *              |
+	 *              --- cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 */
+	struct result expected[] = {
+		{ 0, 2000, "perf", "perf",     "main" },
+		{ 0, 1000, "bash", "[kernel]", "page_fault" },
+		{ 0, 1000, "bash", "bash",     "main" },
+		{ 0, 1000, "bash", "bash",     "xmalloc" },
+		{ 0, 1000, "perf", "[kernel]", "page_fault" },
+		{ 0, 1000, "perf", "[kernel]", "schedule" },
+		{ 0, 1000, "perf", "libc",     "free" },
+		{ 0, 1000, "perf", "libc",     "malloc" },
+		{ 0, 1000, "perf", "perf",     "cmd_record" },
+	};
+	struct callchain_result expected_callchain[] = {
+		{
+			1, {	{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "page_fault" },
+				{ "libc",     "malloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			1, {	{ "bash",     "main" }, },
+		},
+		{
+			6, {	{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			4, {	{ "[kernel]", "page_fault" },
+				{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "schedule" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "free" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+	};
+
+	symbol_conf.use_callchain = true;
+	symbol_conf.cumulate_callchain = false;
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+
+	setup_sorting(NULL);
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected),
+		      expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* NO callchain + children */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	/*
+	 * expected output:
+	 *
+	 * Children      Self  Command  Shared Object                   Symbol
+	 * ========  ========  =======  =============  =======================
+	 *   70.00%    20.00%     perf  perf           [.] main
+	 *   50.00%     0.00%     perf  perf           [.] run_command
+	 *   30.00%    10.00%     bash  bash           [.] main
+	 *   30.00%    10.00%     perf  perf           [.] cmd_record
+	 *   20.00%     0.00%     bash  libc           [.] malloc
+	 *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%    10.00%     bash  bash           [.] xmalloc
+	 *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%    10.00%     perf  libc           [.] malloc
+	 *   10.00%    10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%    10.00%     perf  libc           [.] free
+	 *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+	 */
+	struct result expected[] = {
+		{ 7000, 2000, "perf", "perf",     "main" },
+		{ 5000,    0, "perf", "perf",     "run_command" },
+		{ 3000, 1000, "bash", "bash",     "main" },
+		{ 3000, 1000, "perf", "perf",     "cmd_record" },
+		{ 2000,    0, "bash", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "[kernel]", "page_fault" },
+		{ 1000, 1000, "bash", "bash",     "xmalloc" },
+		{ 1000, 1000, "perf", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "[kernel]", "schedule" },
+		{ 1000, 1000, "perf", "libc",     "free" },
+		{ 1000, 1000, "perf", "libc",     "malloc" },
+		{ 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+	};
+
+	symbol_conf.use_callchain = false;
+	symbol_conf.cumulate_callchain = true;
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+
+	setup_sorting(NULL);
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* callchain + children */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	/*
+	 * expected output:
+	 *
+	 * Children      Self  Command  Shared Object                   Symbol
+	 * ========  ========  =======  =============  =======================
+	 *   70.00%    20.00%     perf  perf           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   50.00%     0.00%     perf  perf           [.] run_command
+	 *              |
+	 *              --- run_command
+	 *                  main
+	 *
+	 *   30.00%    10.00%     bash  bash           [.] main
+	 *              |
+	 *              --- main
+	 *
+	 *   30.00%    10.00%     perf  perf           [.] cmd_record
+	 *              |
+	 *              --- cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   20.00%     0.00%     bash  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                 |
+	 *                 |--50.00%-- xmalloc
+	 *                 |           main
+	 *                  --50.00%-- main
+	 *
+	 *   10.00%    10.00%     bash  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  malloc
+	 *                  main
+	 *
+	 *   10.00%    10.00%     bash  bash           [.] xmalloc
+	 *              |
+	 *              --- xmalloc
+	 *                  malloc
+	 *                  xmalloc     <--- NOTE: there's a cycle
+	 *                  malloc
+	 *                  xmalloc
+	 *                  main
+	 *
+	 *   10.00%     0.00%     perf  [kernel]       [k] sys_perf_event_open
+	 *              |
+	 *              --- sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  [kernel]       [k] page_fault
+	 *              |
+	 *              --- page_fault
+	 *                  sys_perf_event_open
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  [kernel]       [k] schedule
+	 *              |
+	 *              --- schedule
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  libc           [.] free
+	 *              |
+	 *              --- free
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 *   10.00%    10.00%     perf  libc           [.] malloc
+	 *              |
+	 *              --- malloc
+	 *                  cmd_record
+	 *                  run_command
+	 *                  main
+	 *
+	 */
+	struct result expected[] = {
+		{ 7000, 2000, "perf", "perf",     "main" },
+		{ 5000,    0, "perf", "perf",     "run_command" },
+		{ 3000, 1000, "bash", "bash",     "main" },
+		{ 3000, 1000, "perf", "perf",     "cmd_record" },
+		{ 2000,    0, "bash", "libc",     "malloc" },
+		{ 1000, 1000, "bash", "[kernel]", "page_fault" },
+		{ 1000, 1000, "bash", "bash",     "xmalloc" },
+		{ 1000,    0, "perf", "[kernel]", "sys_perf_event_open" },
+		{ 1000, 1000, "perf", "[kernel]", "page_fault" },
+		{ 1000, 1000, "perf", "[kernel]", "schedule" },
+		{ 1000, 1000, "perf", "libc",     "free" },
+		{ 1000, 1000, "perf", "libc",     "malloc" },
+	};
+	struct callchain_result expected_callchain[] = {
+		{
+			1, {	{ "perf",     "main" }, },
+		},
+		{
+			2, {	{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			1, {	{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" },
+				{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "page_fault" },
+				{ "libc",     "malloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			6, {	{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "libc",     "malloc" },
+				{ "bash",     "xmalloc" },
+				{ "bash",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "[kernel]", "page_fault" },
+				{ "[kernel]", "sys_perf_event_open" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			3, {	{ "[kernel]", "schedule" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "free" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+		{
+			4, {	{ "libc",     "malloc" },
+				{ "perf",     "cmd_record" },
+				{ "perf",     "run_command" },
+				{ "perf",     "main" }, },
+		},
+	};
+
+	symbol_conf.use_callchain = true;
+	symbol_conf.cumulate_callchain = true;
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+
+	setup_sorting(NULL);
+
+	callchain_param = callchain_param_default;
+	callchain_register_param(&callchain_param);
+
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	err = do_test(hists, expected, ARRAY_SIZE(expected),
+		      expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = TEST_FAIL;
+	struct machines machines;
+	struct machine *machine;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+	size_t i;
+	test_fn_t testcases[] = {
+		test1,
+		test2,
+		test3,
+		test4,
+	};
+
+	TEST_ASSERT_VAL("No memory", evlist);
+
+	err = parse_events(evlist, "cpu-clock", NULL);
+	if (err)
+		goto out;
+	err = TEST_FAIL;
+
+	machines__init(&machines);
+
+	/* setup threads/dso/map/symbols also */
+	machine = setup_fake_machine(&machines);
+	if (!machine)
+		goto out;
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	evsel = perf_evlist__first(evlist);
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		err = testcases[i](evsel, machine);
+		if (err < 0)
+			break;
+	}
+
+out:
+	/* tear down everything */
+	perf_evlist__delete(evlist);
+	machines__exit(&machines);
+
+	return err;
+}
diff --git a/tests/hists_filter.c b/tests/hists_filter.c
new file mode 100644
index 0000000..1c5beda
--- /dev/null
+++ b/tests/hists_filter.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+struct sample {
+	u32 pid;
+	u64 ip;
+	struct thread *thread;
+	struct map *map;
+	struct symbol *sym;
+	int socket;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+	/* perf [kernel] schedule() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, .socket = 0 },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, .socket = 0 },
+	/* perf [libc]   malloc() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, .socket = 0 },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, .socket = 0 }, /* will be merged */
+	/* perf [perf]   cmd_record() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, .socket = 1 },
+	/* perf [kernel] page_fault() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 1 },
+	/* bash [bash]   main() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, .socket = 2 },
+	/* bash [bash]   xmalloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, .socket = 2 },
+	/* bash [libc]   malloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, .socket = 3 },
+	/* bash [kernel] page_fault() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 3 },
+};
+
+static int add_hist_entries(struct perf_evlist *evlist,
+			    struct machine *machine)
+{
+	struct perf_evsel *evsel;
+	struct addr_location al;
+	struct perf_sample sample = { .period = 100, };
+	size_t i;
+
+	/*
+	 * each evsel will have 10 samples but the 4th sample
+	 * (perf [perf] main) will be collapsed to an existing entry
+	 * so total 9 entries will be in the tree.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+			struct hist_entry_iter iter = {
+				.evsel = evsel,
+				.sample = &sample,
+				.ops = &hist_iter_normal,
+				.hide_unresolved = false,
+			};
+			struct hists *hists = evsel__hists(evsel);
+
+			/* make sure it has no filter at first */
+			hists->thread_filter = NULL;
+			hists->dso_filter = NULL;
+			hists->symbol_filter_str = NULL;
+
+			sample.cpumode = PERF_RECORD_MISC_USER;
+			sample.pid = fake_samples[i].pid;
+			sample.tid = fake_samples[i].pid;
+			sample.ip = fake_samples[i].ip;
+
+			if (machine__resolve(machine, &al, &sample) < 0)
+				goto out;
+
+			al.socket = fake_samples[i].socket;
+			if (hist_entry_iter__add(&iter, &al,
+						 sysctl_perf_event_max_stack, NULL) < 0) {
+				addr_location__put(&al);
+				goto out;
+			}
+
+			fake_samples[i].thread = al.thread;
+			fake_samples[i].map = al.map;
+			fake_samples[i].sym = al.sym;
+		}
+	}
+
+	return 0;
+
+out:
+	pr_debug("Not enough memory for adding a hist entry\n");
+	return TEST_FAIL;
+}
+
+int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = TEST_FAIL;
+	struct machines machines;
+	struct machine *machine;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	TEST_ASSERT_VAL("No memory", evlist);
+
+	err = parse_events(evlist, "cpu-clock", NULL);
+	if (err)
+		goto out;
+	err = parse_events(evlist, "task-clock", NULL);
+	if (err)
+		goto out;
+	err = TEST_FAIL;
+
+	/* default sort order (comm,dso,sym) will be used */
+	if (setup_sorting(NULL) < 0)
+		goto out;
+
+	machines__init(&machines);
+
+	/* setup threads/dso/map/symbols also */
+	machine = setup_fake_machine(&machines);
+	if (!machine)
+		goto out;
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	/* process sample events */
+	err = add_hist_entries(evlist, machine);
+	if (err < 0)
+		goto out;
+
+	evlist__for_each_entry(evlist, evsel) {
+		struct hists *hists = evsel__hists(evsel);
+
+		hists__collapse_resort(hists, NULL);
+		perf_evsel__output_resort(evsel, NULL);
+
+		if (verbose > 2) {
+			pr_info("Normal histogram\n");
+			print_hists_out(hists);
+		}
+
+		TEST_ASSERT_VAL("Invalid nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+		TEST_ASSERT_VAL("Invalid nr hist entries",
+				hists->nr_entries == 9);
+		TEST_ASSERT_VAL("Invalid total period",
+				hists->stats.total_period == 1000);
+		TEST_ASSERT_VAL("Unmatched nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] ==
+				hists->stats.nr_non_filtered_samples);
+		TEST_ASSERT_VAL("Unmatched nr hist entries",
+				hists->nr_entries == hists->nr_non_filtered_entries);
+		TEST_ASSERT_VAL("Unmatched total period",
+				hists->stats.total_period ==
+				hists->stats.total_non_filtered_period);
+
+		/* now applying thread filter for 'bash' */
+		hists->thread_filter = fake_samples[9].thread;
+		hists__filter_by_thread(hists);
+
+		if (verbose > 2) {
+			pr_info("Histogram for thread filter\n");
+			print_hists_out(hists);
+		}
+
+		/* normal stats should be invariant */
+		TEST_ASSERT_VAL("Invalid nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+		TEST_ASSERT_VAL("Invalid nr hist entries",
+				hists->nr_entries == 9);
+		TEST_ASSERT_VAL("Invalid total period",
+				hists->stats.total_period == 1000);
+
+		/* but filter stats are changed */
+		TEST_ASSERT_VAL("Unmatched nr samples for thread filter",
+				hists->stats.nr_non_filtered_samples == 4);
+		TEST_ASSERT_VAL("Unmatched nr hist entries for thread filter",
+				hists->nr_non_filtered_entries == 4);
+		TEST_ASSERT_VAL("Unmatched total period for thread filter",
+				hists->stats.total_non_filtered_period == 400);
+
+		/* remove thread filter first */
+		hists->thread_filter = NULL;
+		hists__filter_by_thread(hists);
+
+		/* now applying dso filter for 'kernel' */
+		hists->dso_filter = fake_samples[0].map->dso;
+		hists__filter_by_dso(hists);
+
+		if (verbose > 2) {
+			pr_info("Histogram for dso filter\n");
+			print_hists_out(hists);
+		}
+
+		/* normal stats should be invariant */
+		TEST_ASSERT_VAL("Invalid nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+		TEST_ASSERT_VAL("Invalid nr hist entries",
+				hists->nr_entries == 9);
+		TEST_ASSERT_VAL("Invalid total period",
+				hists->stats.total_period == 1000);
+
+		/* but filter stats are changed */
+		TEST_ASSERT_VAL("Unmatched nr samples for dso filter",
+				hists->stats.nr_non_filtered_samples == 3);
+		TEST_ASSERT_VAL("Unmatched nr hist entries for dso filter",
+				hists->nr_non_filtered_entries == 3);
+		TEST_ASSERT_VAL("Unmatched total period for dso filter",
+				hists->stats.total_non_filtered_period == 300);
+
+		/* remove dso filter first */
+		hists->dso_filter = NULL;
+		hists__filter_by_dso(hists);
+
+		/*
+		 * now applying symbol filter for 'main'.  Also note that
+		 * there's 3 samples that have 'main' symbol but the 4th
+		 * entry of fake_samples was collapsed already so it won't
+		 * be counted as a separate entry but the sample count and
+		 * total period will be remained.
+		 */
+		hists->symbol_filter_str = "main";
+		hists__filter_by_symbol(hists);
+
+		if (verbose > 2) {
+			pr_info("Histogram for symbol filter\n");
+			print_hists_out(hists);
+		}
+
+		/* normal stats should be invariant */
+		TEST_ASSERT_VAL("Invalid nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+		TEST_ASSERT_VAL("Invalid nr hist entries",
+				hists->nr_entries == 9);
+		TEST_ASSERT_VAL("Invalid total period",
+				hists->stats.total_period == 1000);
+
+		/* but filter stats are changed */
+		TEST_ASSERT_VAL("Unmatched nr samples for symbol filter",
+				hists->stats.nr_non_filtered_samples == 3);
+		TEST_ASSERT_VAL("Unmatched nr hist entries for symbol filter",
+				hists->nr_non_filtered_entries == 2);
+		TEST_ASSERT_VAL("Unmatched total period for symbol filter",
+				hists->stats.total_non_filtered_period == 300);
+
+		/* remove symbol filter first */
+		hists->symbol_filter_str = NULL;
+		hists__filter_by_symbol(hists);
+
+		/* now applying socket filters */
+		hists->socket_filter = 2;
+		hists__filter_by_socket(hists);
+
+		if (verbose > 2) {
+			pr_info("Histogram for socket filters\n");
+			print_hists_out(hists);
+		}
+
+		/* normal stats should be invariant */
+		TEST_ASSERT_VAL("Invalid nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+		TEST_ASSERT_VAL("Invalid nr hist entries",
+				hists->nr_entries == 9);
+		TEST_ASSERT_VAL("Invalid total period",
+				hists->stats.total_period == 1000);
+
+		/* but filter stats are changed */
+		TEST_ASSERT_VAL("Unmatched nr samples for socket filter",
+				hists->stats.nr_non_filtered_samples == 2);
+		TEST_ASSERT_VAL("Unmatched nr hist entries for socket filter",
+				hists->nr_non_filtered_entries == 2);
+		TEST_ASSERT_VAL("Unmatched total period for socket filter",
+				hists->stats.total_non_filtered_period == 200);
+
+		/* remove socket filter first */
+		hists->socket_filter = -1;
+		hists__filter_by_socket(hists);
+
+		/* now applying all filters at once. */
+		hists->thread_filter = fake_samples[1].thread;
+		hists->dso_filter = fake_samples[1].map->dso;
+		hists__filter_by_thread(hists);
+		hists__filter_by_dso(hists);
+
+		if (verbose > 2) {
+			pr_info("Histogram for all filters\n");
+			print_hists_out(hists);
+		}
+
+		/* normal stats should be invariant */
+		TEST_ASSERT_VAL("Invalid nr samples",
+				hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+		TEST_ASSERT_VAL("Invalid nr hist entries",
+				hists->nr_entries == 9);
+		TEST_ASSERT_VAL("Invalid total period",
+				hists->stats.total_period == 1000);
+
+		/* but filter stats are changed */
+		TEST_ASSERT_VAL("Unmatched nr samples for all filter",
+				hists->stats.nr_non_filtered_samples == 2);
+		TEST_ASSERT_VAL("Unmatched nr hist entries for all filter",
+				hists->nr_non_filtered_entries == 1);
+		TEST_ASSERT_VAL("Unmatched total period for all filter",
+				hists->stats.total_non_filtered_period == 200);
+	}
+
+
+	err = TEST_OK;
+
+out:
+	/* tear down everything */
+	perf_evlist__delete(evlist);
+	reset_output_field();
+	machines__exit(&machines);
+
+	return err;
+}
diff --git a/tests/hists_link.c b/tests/hists_link.c
new file mode 100644
index 0000000..9a9d06c
--- /dev/null
+++ b/tests/hists_link.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "tests.h"
+#include "debug.h"
+#include "symbol.h"
+#include "sort.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "thread.h"
+#include "parse-events.h"
+#include "hists_common.h"
+#include <errno.h>
+#include <linux/kernel.h>
+
+struct sample {
+	u32 pid;
+	u64 ip;
+	struct thread *thread;
+	struct map *map;
+	struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_common_samples[] = {
+	/* perf [kernel] schedule() */
+	{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+	/* perf [perf]   main() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [perf]   cmd_record() */
+	{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
+	/* bash [bash]   xmalloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
+	/* bash [libc]   malloc() */
+	{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, },
+};
+
+static struct sample fake_samples[][5] = {
+	{
+		/* perf [perf]   run_command() */
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
+		/* perf [libc]   malloc() */
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+		/* perf [kernel] page_fault() */
+		{ .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+		/* perf [kernel] sys_perf_event_open() */
+		{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
+		/* bash [libc]   free() */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_FREE, },
+	},
+	{
+		/* perf [libc]   free() */
+		{ .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
+		/* bash [libc]   malloc() */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
+		/* bash [bash]   xfee() */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XFREE, },
+		/* bash [libc]   realloc() */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_LIBC_REALLOC, },
+		/* bash [kernel] page_fault() */
+		{ .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+	},
+};
+
+static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
+{
+	struct perf_evsel *evsel;
+	struct addr_location al;
+	struct hist_entry *he;
+	struct perf_sample sample = { .period = 1, .weight = 1, };
+	size_t i = 0, k;
+
+	/*
+	 * each evsel will have 10 samples - 5 common and 5 distinct.
+	 * However the second evsel also has a collapsed entry for
+	 * "bash [libc] malloc" so total 9 entries will be in the tree.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		struct hists *hists = evsel__hists(evsel);
+
+		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
+			sample.cpumode = PERF_RECORD_MISC_USER;
+			sample.pid = fake_common_samples[k].pid;
+			sample.tid = fake_common_samples[k].pid;
+			sample.ip = fake_common_samples[k].ip;
+
+			if (machine__resolve(machine, &al, &sample) < 0)
+				goto out;
+
+			he = hists__add_entry(hists, &al, NULL,
+						NULL, NULL, &sample, true);
+			if (he == NULL) {
+				addr_location__put(&al);
+				goto out;
+			}
+
+			fake_common_samples[k].thread = al.thread;
+			fake_common_samples[k].map = al.map;
+			fake_common_samples[k].sym = al.sym;
+		}
+
+		for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
+			sample.pid = fake_samples[i][k].pid;
+			sample.tid = fake_samples[i][k].pid;
+			sample.ip = fake_samples[i][k].ip;
+			if (machine__resolve(machine, &al, &sample) < 0)
+				goto out;
+
+			he = hists__add_entry(hists, &al, NULL,
+						NULL, NULL, &sample, true);
+			if (he == NULL) {
+				addr_location__put(&al);
+				goto out;
+			}
+
+			fake_samples[i][k].thread = al.thread;
+			fake_samples[i][k].map = al.map;
+			fake_samples[i][k].sym = al.sym;
+		}
+		i++;
+	}
+
+	return 0;
+
+out:
+	pr_debug("Not enough memory for adding a hist entry\n");
+	return -1;
+}
+
+static int find_sample(struct sample *samples, size_t nr_samples,
+		       struct thread *t, struct map *m, struct symbol *s)
+{
+	while (nr_samples--) {
+		if (samples->thread == t && samples->map == m &&
+		    samples->sym == s)
+			return 1;
+		samples++;
+	}
+	return 0;
+}
+
+static int __validate_match(struct hists *hists)
+{
+	size_t count = 0;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	/*
+	 * Only entries from fake_common_samples should have a pair.
+	 */
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	node = rb_first(root);
+	while (node) {
+		struct hist_entry *he;
+
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+
+		if (hist_entry__has_pairs(he)) {
+			if (find_sample(fake_common_samples,
+					ARRAY_SIZE(fake_common_samples),
+					he->thread, he->ms.map, he->ms.sym)) {
+				count++;
+			} else {
+				pr_debug("Can't find the matched entry\n");
+				return -1;
+			}
+		}
+
+		node = rb_next(node);
+	}
+
+	if (count != ARRAY_SIZE(fake_common_samples)) {
+		pr_debug("Invalid count for matched entries: %zd of %zd\n",
+			 count, ARRAY_SIZE(fake_common_samples));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int validate_match(struct hists *leader, struct hists *other)
+{
+	return __validate_match(leader) || __validate_match(other);
+}
+
+static int __validate_link(struct hists *hists, int idx)
+{
+	size_t count = 0;
+	size_t count_pair = 0;
+	size_t count_dummy = 0;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	/*
+	 * Leader hists (idx = 0) will have dummy entries from other,
+	 * and some entries will have no pair.  However every entry
+	 * in other hists should have (dummy) pair.
+	 */
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	node = rb_first(root);
+	while (node) {
+		struct hist_entry *he;
+
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+
+		if (hist_entry__has_pairs(he)) {
+			if (!find_sample(fake_common_samples,
+					 ARRAY_SIZE(fake_common_samples),
+					 he->thread, he->ms.map, he->ms.sym) &&
+			    !find_sample(fake_samples[idx],
+					 ARRAY_SIZE(fake_samples[idx]),
+					 he->thread, he->ms.map, he->ms.sym)) {
+				count_dummy++;
+			}
+			count_pair++;
+		} else if (idx) {
+			pr_debug("A entry from the other hists should have pair\n");
+			return -1;
+		}
+
+		count++;
+		node = rb_next(node);
+	}
+
+	/*
+	 * Note that we have a entry collapsed in the other (idx = 1) hists.
+	 */
+	if (idx == 0) {
+		if (count_dummy != ARRAY_SIZE(fake_samples[1]) - 1) {
+			pr_debug("Invalid count of dummy entries: %zd of %zd\n",
+				 count_dummy, ARRAY_SIZE(fake_samples[1]) - 1);
+			return -1;
+		}
+		if (count != count_pair + ARRAY_SIZE(fake_samples[0])) {
+			pr_debug("Invalid count of total leader entries: %zd of %zd\n",
+				 count, count_pair + ARRAY_SIZE(fake_samples[0]));
+			return -1;
+		}
+	} else {
+		if (count != count_pair) {
+			pr_debug("Invalid count of total other entries: %zd of %zd\n",
+				 count, count_pair);
+			return -1;
+		}
+		if (count_dummy > 0) {
+			pr_debug("Other hists should not have dummy entries: %zd\n",
+				 count_dummy);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int validate_link(struct hists *leader, struct hists *other)
+{
+	return __validate_link(leader, 0) || __validate_link(other, 1);
+}
+
+int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = -1;
+	struct hists *hists, *first_hists;
+	struct machines machines;
+	struct machine *machine = NULL;
+	struct perf_evsel *evsel, *first;
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist == NULL)
+                return -ENOMEM;
+
+	err = parse_events(evlist, "cpu-clock", NULL);
+	if (err)
+		goto out;
+	err = parse_events(evlist, "task-clock", NULL);
+	if (err)
+		goto out;
+
+	err = TEST_FAIL;
+	/* default sort order (comm,dso,sym) will be used */
+	if (setup_sorting(NULL) < 0)
+		goto out;
+
+	machines__init(&machines);
+
+	/* setup threads/dso/map/symbols also */
+	machine = setup_fake_machine(&machines);
+	if (!machine)
+		goto out;
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	/* process sample events */
+	err = add_hist_entries(evlist, machine);
+	if (err < 0)
+		goto out;
+
+	evlist__for_each_entry(evlist, evsel) {
+		hists = evsel__hists(evsel);
+		hists__collapse_resort(hists, NULL);
+
+		if (verbose > 2)
+			print_hists_in(hists);
+	}
+
+	first = perf_evlist__first(evlist);
+	evsel = perf_evlist__last(evlist);
+
+	first_hists = evsel__hists(first);
+	hists = evsel__hists(evsel);
+
+	/* match common entries */
+	hists__match(first_hists, hists);
+	err = validate_match(first_hists, hists);
+	if (err)
+		goto out;
+
+	/* link common and/or dummy entries */
+	hists__link(first_hists, hists);
+	err = validate_link(first_hists, hists);
+	if (err)
+		goto out;
+
+	err = 0;
+
+out:
+	/* tear down everything */
+	perf_evlist__delete(evlist);
+	reset_output_field();
+	machines__exit(&machines);
+
+	return err;
+}
diff --git a/tests/hists_output.c b/tests/hists_output.c
new file mode 100644
index 0000000..faacb4f
--- /dev/null
+++ b/tests/hists_output.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+struct sample {
+	u32 cpu;
+	u32 pid;
+	u64 ip;
+	struct thread *thread;
+	struct map *map;
+	struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+	/* perf [kernel] schedule() */
+	{ .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+	/* perf [perf]   main() */
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [perf]   cmd_record() */
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+	/* perf [libc]   malloc() */
+	{ .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+	/* perf [libc]   free() */
+	{ .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+	/* perf [perf]   main() */
+	{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+	/* perf [kernel] page_fault() */
+	{ .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+	/* bash [bash]   main() */
+	{ .cpu = 3, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_MAIN, },
+	/* bash [bash]   xmalloc() */
+	{ .cpu = 0, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_BASH_XMALLOC, },
+	/* bash [kernel] page_fault() */
+	{ .cpu = 1, .pid = FAKE_PID_BASH,  .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+	struct addr_location al;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	struct perf_sample sample = { .period = 100, };
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+		struct hist_entry_iter iter = {
+			.evsel = evsel,
+			.sample = &sample,
+			.ops = &hist_iter_normal,
+			.hide_unresolved = false,
+		};
+
+		sample.cpumode = PERF_RECORD_MISC_USER;
+		sample.cpu = fake_samples[i].cpu;
+		sample.pid = fake_samples[i].pid;
+		sample.tid = fake_samples[i].pid;
+		sample.ip = fake_samples[i].ip;
+
+		if (machine__resolve(machine, &al, &sample) < 0)
+			goto out;
+
+		if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
+					 NULL) < 0) {
+			addr_location__put(&al);
+			goto out;
+		}
+
+		fake_samples[i].thread = al.thread;
+		fake_samples[i].map = al.map;
+		fake_samples[i].sym = al.sym;
+	}
+
+	return TEST_OK;
+
+out:
+	pr_debug("Not enough memory for adding a hist entry\n");
+	return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+	struct hist_entry *he;
+	struct rb_root *root_in;
+	struct rb_root *root_out;
+	struct rb_node *node;
+
+	if (hists__has(hists, need_collapse))
+		root_in = &hists->entries_collapsed;
+	else
+		root_in = hists->entries_in;
+
+	root_out = &hists->entries;
+
+	while (!RB_EMPTY_ROOT(root_out)) {
+		node = rb_first(root_out);
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		rb_erase(node, root_out);
+		rb_erase(&he->rb_node_in, root_in);
+		hist_entry__delete(he);
+	}
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he)  (thread__comm_str(he->thread))
+#define DSO(he)   (he->ms.map->dso->short_name)
+#define SYM(he)   (he->ms.sym->name)
+#define CPU(he)   (he->cpu)
+#define PID(he)   (he->thread->tid)
+
+/* default sort keys (no field) */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	field_order = NULL;
+	sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */
+
+	setup_sorting(NULL);
+
+	/*
+	 * expected output:
+	 *
+	 * Overhead  Command  Shared Object          Symbol
+	 * ========  =======  =============  ==============
+	 *   20.00%     perf  perf           [.] main
+	 *   10.00%     bash  [kernel]       [k] page_fault
+	 *   10.00%     bash  bash           [.] main
+	 *   10.00%     bash  bash           [.] xmalloc
+	 *   10.00%     perf  [kernel]       [k] page_fault
+	 *   10.00%     perf  [kernel]       [k] schedule
+	 *   10.00%     perf  libc           [.] free
+	 *   10.00%     perf  libc           [.] malloc
+	 *   10.00%     perf  perf           [.] cmd_record
+	 */
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+
+	if (verbose > 2) {
+		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	node = rb_first(root);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+			!strcmp(SYM(he), "main") && he->stat.period == 200);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+			!strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+			!strcmp(SYM(he), "main") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+			!strcmp(SYM(he), "xmalloc") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+			!strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+			!strcmp(SYM(he), "schedule") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+			!strcmp(SYM(he), "free") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+			!strcmp(SYM(he), "malloc") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+			!strcmp(SYM(he), "cmd_record") && he->stat.period == 100);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* mixed fields and sort keys */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	field_order = "overhead,cpu";
+	sort_order = "pid";
+
+	setup_sorting(NULL);
+
+	/*
+	 * expected output:
+	 *
+	 * Overhead  CPU  Command:  Pid
+	 * ========  ===  =============
+	 *   30.00%    1  perf   :  100
+	 *   10.00%    0  perf   :  100
+	 *   10.00%    2  perf   :  100
+	 *   20.00%    2  perf   :  200
+	 *   10.00%    0  bash   :  300
+	 *   10.00%    1  bash   :  300
+	 *   10.00%    3  bash   :  300
+	 */
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+
+	if (verbose > 2) {
+		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	node = rb_first(root);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 0 && PID(he) == 100 && he->stat.period == 100);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* fields only (no sort key) */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	field_order = "comm,overhead,dso";
+	sort_order = NULL;
+
+	setup_sorting(NULL);
+
+	/*
+	 * expected output:
+	 *
+	 * Command  Overhead  Shared Object
+	 * =======  ========  =============
+	 *    bash    20.00%  bash
+	 *    bash    10.00%  [kernel]
+	 *    perf    30.00%  perf
+	 *    perf    20.00%  [kernel]
+	 *    perf    20.00%  libc
+	 */
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+
+	if (verbose > 2) {
+		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	node = rb_first(root);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+			he->stat.period == 200);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+			he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+			he->stat.period == 300);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+			he->stat.period == 200);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+			he->stat.period == 200);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* handle duplicate 'dso' field */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	field_order = "dso,sym,comm,overhead,dso";
+	sort_order = "sym";
+
+	setup_sorting(NULL);
+
+	/*
+	 * expected output:
+	 *
+	 * Shared Object          Symbol  Command  Overhead
+	 * =============  ==============  =======  ========
+	 *          perf  [.] cmd_record     perf    10.00%
+	 *          libc  [.] free           perf    10.00%
+	 *          bash  [.] main           bash    10.00%
+	 *          perf  [.] main           perf    20.00%
+	 *          libc  [.] malloc         perf    10.00%
+	 *      [kernel]  [k] page_fault     bash    10.00%
+	 *      [kernel]  [k] page_fault     perf    10.00%
+	 *      [kernel]  [k] schedule       perf    10.00%
+	 *          bash  [.] xmalloc        bash    10.00%
+	 */
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+
+	if (verbose > 2) {
+		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	node = rb_first(root);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") &&
+			!strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "libc") && !strcmp(SYM(he), "free") &&
+			!strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "bash") && !strcmp(SYM(he), "main") &&
+			!strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "perf") && !strcmp(SYM(he), "main") &&
+			!strcmp(COMM(he), "perf") && he->stat.period == 200);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "libc") && !strcmp(SYM(he), "malloc") &&
+			!strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "page_fault") &&
+			!strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "page_fault") &&
+			!strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "schedule") &&
+			!strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			!strcmp(DSO(he), "bash") && !strcmp(SYM(he), "xmalloc") &&
+			!strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+/* full sort keys w/o overhead field */
+static int test5(struct perf_evsel *evsel, struct machine *machine)
+{
+	int err;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he;
+	struct rb_root *root;
+	struct rb_node *node;
+
+	field_order = "cpu,pid,comm,dso,sym";
+	sort_order = "dso,pid";
+
+	setup_sorting(NULL);
+
+	/*
+	 * expected output:
+	 *
+	 * CPU  Command:  Pid  Command  Shared Object          Symbol
+	 * ===  =============  =======  =============  ==============
+	 *   0     perf:  100     perf       [kernel]  [k] schedule
+	 *   2     perf:  200     perf       [kernel]  [k] page_fault
+	 *   1     bash:  300     bash       [kernel]  [k] page_fault
+	 *   0     bash:  300     bash           bash  [.] xmalloc
+	 *   3     bash:  300     bash           bash  [.] main
+	 *   1     perf:  100     perf           libc  [.] malloc
+	 *   2     perf:  100     perf           libc  [.] free
+	 *   1     perf:  100     perf           perf  [.] cmd_record
+	 *   1     perf:  100     perf           perf  [.] main
+	 *   2     perf:  200     perf           perf  [.] main
+	 */
+	err = add_hist_entries(hists, machine);
+	if (err < 0)
+		goto out;
+
+	hists__collapse_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
+
+	if (verbose > 2) {
+		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+		print_hists_out(hists);
+	}
+
+	root = &hists->entries;
+	node = rb_first(root);
+	he = rb_entry(node, struct hist_entry, rb_node);
+
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 0 && PID(he) == 100 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+			!strcmp(SYM(he), "schedule") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 2 && PID(he) == 200 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+			!strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 1 && PID(he) == 300 &&
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+			!strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 0 && PID(he) == 300 &&
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+			!strcmp(SYM(he), "xmalloc") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 3 && PID(he) == 300 &&
+			!strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+			!strcmp(SYM(he), "main") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 1 && PID(he) == 100 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+			!strcmp(SYM(he), "malloc") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 2 && PID(he) == 100 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+			!strcmp(SYM(he), "free") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 1 && PID(he) == 100 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+			!strcmp(SYM(he), "cmd_record") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 1 && PID(he) == 100 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+			!strcmp(SYM(he), "main") && he->stat.period == 100);
+
+	node = rb_next(node);
+	he = rb_entry(node, struct hist_entry, rb_node);
+	TEST_ASSERT_VAL("Invalid hist entry",
+			CPU(he) == 2 && PID(he) == 200 &&
+			!strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+			!strcmp(SYM(he), "main") && he->stat.period == 100);
+
+out:
+	del_hist_entries(hists);
+	reset_output_field();
+	return err;
+}
+
+int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = TEST_FAIL;
+	struct machines machines;
+	struct machine *machine;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = perf_evlist__new();
+	size_t i;
+	test_fn_t testcases[] = {
+		test1,
+		test2,
+		test3,
+		test4,
+		test5,
+	};
+
+	TEST_ASSERT_VAL("No memory", evlist);
+
+	err = parse_events(evlist, "cpu-clock", NULL);
+	if (err)
+		goto out;
+	err = TEST_FAIL;
+
+	machines__init(&machines);
+
+	/* setup threads/dso/map/symbols also */
+	machine = setup_fake_machine(&machines);
+	if (!machine)
+		goto out;
+
+	if (verbose > 1)
+		machine__fprintf(machine, stderr);
+
+	evsel = perf_evlist__first(evlist);
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		err = testcases[i](evsel, machine);
+		if (err < 0)
+			break;
+	}
+
+out:
+	/* tear down everything */
+	perf_evlist__delete(evlist);
+	machines__exit(&machines);
+
+	return err;
+}
diff --git a/tests/is_printable_array.c b/tests/is_printable_array.c
new file mode 100644
index 0000000..9c7b3ba
--- /dev/null
+++ b/tests/is_printable_array.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "tests.h"
+#include "debug.h"
+#include "print_binary.h"
+
+int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 };
+	char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 };
+	struct {
+		char		*buf;
+		unsigned int	 len;
+		int		 ret;
+	} t[] = {
+		{ (char *) "krava",	sizeof("krava"),	1 },
+		{ (char *) "krava",	sizeof("krava") - 1,	0 },
+		{ (char *) "",		sizeof(""),		1 },
+		{ (char *) "",		0,			0 },
+		{ NULL,			0,			0 },
+		{ buf1,			sizeof(buf1),		0 },
+		{ buf2,			sizeof(buf2),		0 },
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(t); i++) {
+		int ret;
+
+		ret = is_printable_array((char *) t[i].buf, t[i].len);
+		if (ret != t[i].ret) {
+			pr_err("failed: test %u\n", i);
+			return TEST_FAIL;
+		}
+	}
+
+	return TEST_OK;
+}
diff --git a/tests/keep-tracking.c b/tests/keep-tracking.c
new file mode 100644
index 0000000..17c46f3
--- /dev/null
+++ b/tests/keep-tracking.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+#define CHECK__(x) {				\
+	while ((x) < 0) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+#define CHECK_NOT_NULL__(x) {			\
+	while ((x) == NULL) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+static int find_comm(struct perf_evlist *evlist, const char *comm)
+{
+	union perf_event *event;
+	struct perf_mmap *md;
+	int i, found;
+
+	found = 0;
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md) < 0)
+			continue;
+		while ((event = perf_mmap__read_event(md)) != NULL) {
+			if (event->header.type == PERF_RECORD_COMM &&
+			    (pid_t)event->comm.pid == getpid() &&
+			    (pid_t)event->comm.tid == getpid() &&
+			    strcmp(event->comm.comm, comm) == 0)
+				found += 1;
+			perf_mmap__consume(md);
+		}
+		perf_mmap__read_done(md);
+	}
+	return found;
+}
+
+/**
+ * test__keep_tracking - test using a dummy software event to keep tracking.
+ *
+ * This function implements a test that checks that tracking events continue
+ * when an event is disabled but a dummy software event is not disabled.  If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ */
+int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int found, err = -1;
+	const char *comm;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	CHECK_NOT_NULL__(threads);
+
+	cpus = cpu_map__new(NULL);
+	CHECK_NOT_NULL__(cpus);
+
+	evlist = perf_evlist__new();
+	CHECK_NOT_NULL__(evlist);
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	CHECK__(parse_events(evlist, "dummy:u", NULL));
+	CHECK__(parse_events(evlist, "cycles:u", NULL));
+
+	perf_evlist__config(evlist, &opts, NULL);
+
+	evsel = perf_evlist__first(evlist);
+
+	evsel->attr.comm = 1;
+	evsel->attr.disabled = 1;
+	evsel->attr.enable_on_exec = 0;
+
+	if (perf_evlist__open(evlist) < 0) {
+		pr_debug("Unable to open dummy and cycles event\n");
+		err = TEST_SKIP;
+		goto out_err;
+	}
+
+	CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+
+	/*
+	 * First, test that a 'comm' event can be found when the event is
+	 * enabled.
+	 */
+
+	perf_evlist__enable(evlist);
+
+	comm = "Test COMM 1";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
+
+	perf_evlist__disable(evlist);
+
+	found = find_comm(evlist, comm);
+	if (found != 1) {
+		pr_debug("First time, failed to find tracking event.\n");
+		goto out_err;
+	}
+
+	/*
+	 * Secondly, test that a 'comm' event can be found when the event is
+	 * disabled with the dummy event still enabled.
+	 */
+
+	perf_evlist__enable(evlist);
+
+	evsel = perf_evlist__last(evlist);
+
+	CHECK__(perf_evsel__disable(evsel));
+
+	comm = "Test COMM 2";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
+
+	perf_evlist__disable(evlist);
+
+	found = find_comm(evlist, comm);
+	if (found != 1) {
+		pr_debug("Seconf time, failed to find tracking event.\n");
+		goto out_err;
+	}
+
+	err = 0;
+
+out_err:
+	if (evlist) {
+		perf_evlist__disable(evlist);
+		perf_evlist__delete(evlist);
+	} else {
+		cpu_map__put(cpus);
+		thread_map__put(threads);
+	}
+
+	return err;
+}
diff --git a/tests/kmod-path.c b/tests/kmod-path.c
new file mode 100644
index 0000000..8e57d46
--- /dev/null
+++ b/tests/kmod-path.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <stdlib.h>
+#include "tests.h"
+#include "dso.h"
+#include "debug.h"
+
+static int test(const char *path, bool alloc_name, bool alloc_ext,
+		bool kmod, bool comp, const char *name, const char *ext)
+{
+	struct kmod_path m;
+
+	memset(&m, 0x0, sizeof(m));
+
+	TEST_ASSERT_VAL("kmod_path__parse",
+			!__kmod_path__parse(&m, path, alloc_name, alloc_ext));
+
+	pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n",
+		 path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext);
+
+	TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod);
+	TEST_ASSERT_VAL("wrong comp", m.comp == comp);
+
+	if (ext)
+		TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext));
+	else
+		TEST_ASSERT_VAL("wrong ext", !m.ext);
+
+	if (name)
+		TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name));
+	else
+		TEST_ASSERT_VAL("wrong name", !m.name);
+
+	free(m.name);
+	free(m.ext);
+	return 0;
+}
+
+static int test_is_kernel_module(const char *path, int cpumode, bool expect)
+{
+	TEST_ASSERT_VAL("is_kernel_module",
+			(!!is_kernel_module(path, cpumode)) == (!!expect));
+	pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n",
+			path, cpumode, expect ? "true" : "false");
+	return 0;
+}
+
+#define T(path, an, ae, k, c, n, e) \
+	TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
+
+#define M(path, c, e) \
+	TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
+
+int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+	/* path                alloc_name  alloc_ext   kmod  comp   name     ext */
+	T("/xxxx/xxxx/x-x.ko", true      , true      , true, false, "[x_x]", NULL);
+	T("/xxxx/xxxx/x-x.ko", false     , true      , true, false, NULL   , NULL);
+	T("/xxxx/xxxx/x-x.ko", true      , false     , true, false, "[x_x]", NULL);
+	T("/xxxx/xxxx/x-x.ko", false     , false     , true, false, NULL   , NULL);
+	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
+	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
+
+#ifdef HAVE_ZLIB_SUPPORT
+	/* path                alloc_name  alloc_ext   kmod  comp  name   ext */
+	T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
+	T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
+	T("/xxxx/xxxx/x.ko.gz", true     , false     , true, true, "[x]", NULL);
+	T("/xxxx/xxxx/x.ko.gz", false    , false     , true, true, NULL , NULL);
+	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
+
+	/* path              alloc_name  alloc_ext  kmod   comp  name    ext */
+	T("/xxxx/xxxx/x.gz", true      , true     , false, true, "x.gz" ,"gz");
+	T("/xxxx/xxxx/x.gz", false     , true     , false, true, NULL   ,"gz");
+	T("/xxxx/xxxx/x.gz", true      , false    , false, true, "x.gz" , NULL);
+	T("/xxxx/xxxx/x.gz", false     , false    , false, true, NULL   , NULL);
+	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
+	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
+
+	/* path   alloc_name  alloc_ext  kmod   comp  name     ext */
+	T("x.gz", true      , true     , false, true, "x.gz", "gz");
+	T("x.gz", false     , true     , false, true, NULL  , "gz");
+	T("x.gz", true      , false    , false, true, "x.gz", NULL);
+	T("x.gz", false     , false    , false, true, NULL  , NULL);
+	M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("x.gz", PERF_RECORD_MISC_KERNEL, false);
+	M("x.gz", PERF_RECORD_MISC_USER, false);
+
+	/* path      alloc_name  alloc_ext  kmod  comp  name  ext */
+	T("x.ko.gz", true      , true     , true, true, "[x]", "gz");
+	T("x.ko.gz", false     , true     , true, true, NULL , "gz");
+	T("x.ko.gz", true      , false    , true, true, "[x]", NULL);
+	T("x.ko.gz", false     , false    , true, true, NULL , NULL);
+	M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+	M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+#endif
+
+	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
+	T("[test_module]", true      , true     , true, false, "[test_module]", NULL);
+	T("[test_module]", false     , true     , true, false, NULL           , NULL);
+	T("[test_module]", true      , false    , true, false, "[test_module]", NULL);
+	T("[test_module]", false     , false    , true, false, NULL           , NULL);
+	M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
+	M("[test_module]", PERF_RECORD_MISC_USER, false);
+
+	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
+	T("[test.module]", true      , true     , true, false, "[test.module]", NULL);
+	T("[test.module]", false     , true     , true, false, NULL           , NULL);
+	T("[test.module]", true      , false    , true, false, "[test.module]", NULL);
+	T("[test.module]", false     , false    , true, false, NULL           , NULL);
+	M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
+	M("[test.module]", PERF_RECORD_MISC_USER, false);
+
+	/* path     alloc_name  alloc_ext  kmod   comp   name      ext */
+	T("[vdso]", true      , true     , false, false, "[vdso]", NULL);
+	T("[vdso]", false     , true     , false, false, NULL    , NULL);
+	T("[vdso]", true      , false    , false, false, "[vdso]", NULL);
+	T("[vdso]", false     , false    , false, false, NULL    , NULL);
+	M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
+	M("[vdso]", PERF_RECORD_MISC_USER, false);
+
+	/* path         alloc_name  alloc_ext  kmod   comp   name          ext */
+	T("[vsyscall]", true      , true     , false, false, "[vsyscall]", NULL);
+	T("[vsyscall]", false     , true     , false, false, NULL        , NULL);
+	T("[vsyscall]", true      , false    , false, false, "[vsyscall]", NULL);
+	T("[vsyscall]", false     , false    , false, false, NULL        , NULL);
+	M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
+	M("[vsyscall]", PERF_RECORD_MISC_USER, false);
+
+	/* path                alloc_name  alloc_ext  kmod   comp   name      ext */
+	T("[kernel.kallsyms]", true      , true     , false, false, "[kernel.kallsyms]", NULL);
+	T("[kernel.kallsyms]", false     , true     , false, false, NULL               , NULL);
+	T("[kernel.kallsyms]", true      , false    , false, false, "[kernel.kallsyms]", NULL);
+	T("[kernel.kallsyms]", false     , false    , false, false, NULL               , NULL);
+	M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
+	M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
+
+	return 0;
+}
diff --git a/tests/llvm.c b/tests/llvm.c
new file mode 100644
index 0000000..a039f93
--- /dev/null
+++ b/tests/llvm.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <bpf/libbpf.h>
+#include <util/llvm-utils.h>
+#include <util/cache.h>
+#include "llvm.h"
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
+{
+	struct bpf_object *obj;
+
+	obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
+	if (libbpf_get_error(obj))
+		return TEST_FAIL;
+	bpf_object__close(obj);
+	return TEST_OK;
+}
+#else
+static int test__bpf_parsing(void *obj_buf __maybe_unused,
+			     size_t obj_buf_sz __maybe_unused)
+{
+	pr_debug("Skip bpf parsing\n");
+	return TEST_OK;
+}
+#endif
+
+static struct {
+	const char *source;
+	const char *desc;
+	bool should_load_fail;
+} bpf_source_table[__LLVM_TESTCASE_MAX] = {
+	[LLVM_TESTCASE_BASE] = {
+		.source = test_llvm__bpf_base_prog,
+		.desc = "Basic BPF llvm compile",
+	},
+	[LLVM_TESTCASE_KBUILD] = {
+		.source = test_llvm__bpf_test_kbuild_prog,
+		.desc = "kbuild searching",
+	},
+	[LLVM_TESTCASE_BPF_PROLOGUE] = {
+		.source = test_llvm__bpf_test_prologue_prog,
+		.desc = "Compile source for BPF prologue generation",
+	},
+	[LLVM_TESTCASE_BPF_RELOCATION] = {
+		.source = test_llvm__bpf_test_relocation,
+		.desc = "Compile source for BPF relocation",
+		.should_load_fail = true,
+	},
+};
+
+int
+test_llvm__fetch_bpf_obj(void **p_obj_buf,
+			 size_t *p_obj_buf_sz,
+			 enum test_llvm__testcase idx,
+			 bool force,
+			 bool *should_load_fail)
+{
+	const char *source;
+	const char *desc;
+	const char *tmpl_old, *clang_opt_old;
+	char *tmpl_new = NULL, *clang_opt_new = NULL;
+	int err, old_verbose, ret = TEST_FAIL;
+
+	if (idx >= __LLVM_TESTCASE_MAX)
+		return TEST_FAIL;
+
+	source = bpf_source_table[idx].source;
+	desc = bpf_source_table[idx].desc;
+	if (should_load_fail)
+		*should_load_fail = bpf_source_table[idx].should_load_fail;
+
+	/*
+	 * Skip this test if user's .perfconfig doesn't set [llvm] section
+	 * and clang is not found in $PATH, and this is not perf test -v
+	 */
+	if (!force && (verbose <= 0 &&
+		       !llvm_param.user_set_param &&
+		       llvm__search_clang())) {
+		pr_debug("No clang and no verbosive, skip this test\n");
+		return TEST_SKIP;
+	}
+
+	/*
+	 * llvm is verbosity when error. Suppress all error output if
+	 * not 'perf test -v'.
+	 */
+	old_verbose = verbose;
+	if (verbose == 0)
+		verbose = -1;
+
+	*p_obj_buf = NULL;
+	*p_obj_buf_sz = 0;
+
+	if (!llvm_param.clang_bpf_cmd_template)
+		goto out;
+
+	if (!llvm_param.clang_opt)
+		llvm_param.clang_opt = strdup("");
+
+	err = asprintf(&tmpl_new, "echo '%s' | %s%s", source,
+		       llvm_param.clang_bpf_cmd_template,
+		       old_verbose ? "" : " 2>/dev/null");
+	if (err < 0)
+		goto out;
+	err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt);
+	if (err < 0)
+		goto out;
+
+	tmpl_old = llvm_param.clang_bpf_cmd_template;
+	llvm_param.clang_bpf_cmd_template = tmpl_new;
+	clang_opt_old = llvm_param.clang_opt;
+	llvm_param.clang_opt = clang_opt_new;
+
+	err = llvm__compile_bpf("-", p_obj_buf, p_obj_buf_sz);
+
+	llvm_param.clang_bpf_cmd_template = tmpl_old;
+	llvm_param.clang_opt = clang_opt_old;
+
+	verbose = old_verbose;
+	if (err)
+		goto out;
+
+	ret = TEST_OK;
+out:
+	free(tmpl_new);
+	free(clang_opt_new);
+	if (ret != TEST_OK)
+		pr_debug("Failed to compile test case: '%s'\n", desc);
+	return ret;
+}
+
+int test__llvm(struct test *test __maybe_unused, int subtest)
+{
+	int ret;
+	void *obj_buf = NULL;
+	size_t obj_buf_sz = 0;
+	bool should_load_fail = false;
+
+	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+		return TEST_FAIL;
+
+	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
+				       subtest, false, &should_load_fail);
+
+	if (ret == TEST_OK && !should_load_fail) {
+		ret = test__bpf_parsing(obj_buf, obj_buf_sz);
+		if (ret != TEST_OK) {
+			pr_debug("Failed to parse test case '%s'\n",
+				 bpf_source_table[subtest].desc);
+		}
+	}
+	free(obj_buf);
+
+	return ret;
+}
+
+int test__llvm_subtest_get_nr(void)
+{
+	return __LLVM_TESTCASE_MAX;
+}
+
+const char *test__llvm_subtest_get_desc(int subtest)
+{
+	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+		return NULL;
+
+	return bpf_source_table[subtest].desc;
+}
diff --git a/tests/llvm.h b/tests/llvm.h
new file mode 100644
index 0000000..f68b0d9
--- /dev/null
+++ b/tests/llvm.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_TEST_LLVM_H
+#define PERF_TEST_LLVM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h> /* for size_t */
+#include <stdbool.h> /* for bool */
+
+extern const char test_llvm__bpf_base_prog[];
+extern const char test_llvm__bpf_test_kbuild_prog[];
+extern const char test_llvm__bpf_test_prologue_prog[];
+extern const char test_llvm__bpf_test_relocation[];
+
+enum test_llvm__testcase {
+	LLVM_TESTCASE_BASE,
+	LLVM_TESTCASE_KBUILD,
+	LLVM_TESTCASE_BPF_PROLOGUE,
+	LLVM_TESTCASE_BPF_RELOCATION,
+	__LLVM_TESTCASE_MAX,
+};
+
+int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
+			     enum test_llvm__testcase index, bool force,
+			     bool *should_load_fail);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tests/make b/tests/make
new file mode 100644
index 0000000..e467235
--- /dev/null
+++ b/tests/make
@@ -0,0 +1,368 @@
+include ../scripts/Makefile.include
+
+ifndef MK
+ifeq ($(MAKECMDGOALS),)
+# no target specified, trigger the whole suite
+all:
+	@echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
+	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
+else
+# run only specific test over 'Makefile'
+%:
+	@echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile $@
+endif
+else
+PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O))
+
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
+  PERF_O := $(FULL_O)
+  ifeq ($(SET_O),1)
+    O_OPT := 'O=$(FULL_O)'
+  endif
+  K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  ifeq ($(cores),0)
+    cores := 1
+  endif
+  PARALLEL_OPT="-j$(cores)"
+endif
+
+# As per kernel Makefile, avoid funny character set dependencies
+unexport LC_ALL
+LC_COLLATE=C
+LC_NUMERIC=C
+export LC_COLLATE LC_NUMERIC
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+include $(srctree)/tools/scripts/Makefile.arch
+
+# FIXME looks like x86 is the only arch running tests ;-)
+# we need some IS_(32/64) flag to make this generic
+ifeq ($(ARCH)$(IS_64_BIT), x861)
+lib = lib64
+else
+lib = lib
+endif
+
+has = $(shell which $1 2>/dev/null)
+
+# standard single make variable specified
+make_clean_all      := clean all
+make_python_perf_so := python/perf.so
+make_debug          := DEBUG=1
+make_no_libperl     := NO_LIBPERL=1
+make_no_libpython   := NO_LIBPYTHON=1
+make_no_scripts     := NO_LIBPYTHON=1 NO_LIBPERL=1
+make_no_newt        := NO_NEWT=1
+make_no_slang       := NO_SLANG=1
+make_no_gtk2        := NO_GTK2=1
+make_no_ui          := NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
+make_no_demangle    := NO_DEMANGLE=1
+make_no_libelf      := NO_LIBELF=1
+make_no_libunwind   := NO_LIBUNWIND=1
+make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1
+make_no_backtrace   := NO_BACKTRACE=1
+make_no_libnuma     := NO_LIBNUMA=1
+make_no_libaudit    := NO_LIBAUDIT=1
+make_no_libbionic   := NO_LIBBIONIC=1
+make_no_auxtrace    := NO_AUXTRACE=1
+make_no_libbpf	    := NO_LIBBPF=1
+make_no_libcrypto   := NO_LIBCRYPTO=1
+make_with_babeltrace:= LIBBABELTRACE=1
+make_no_sdt	    := NO_SDT=1
+make_with_clangllvm := LIBCLANGLLVM=1
+make_tags           := tags
+make_cscope         := cscope
+make_help           := help
+make_doc            := doc
+make_perf_o           := perf.o
+make_util_map_o       := util/map.o
+make_util_pmu_bison_o := util/pmu-bison.o
+make_install        := install
+make_install_bin    := install-bin
+make_install_doc    := install-doc
+make_install_man    := install-man
+make_install_html   := install-html
+make_install_info   := install-info
+make_install_pdf    := install-pdf
+make_install_prefix       := install prefix=/tmp/krava
+make_install_prefix_slash := install prefix=/tmp/krava/
+make_static         := LDFLAGS=-static
+
+# all the NO_* variable combined
+make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
+make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
+make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
+make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1
+
+# $(run) contains all available tests
+run := make_pure
+# Targets 'clean all' can be run together only through top level
+# Makefile because we detect clean target in Makefile.perf and
+# disable features detection
+ifeq ($(MK),Makefile)
+run += make_clean_all
+MAKE_F := $(MAKE)
+else
+MAKE_F := $(MAKE) -f $(MK)
+endif
+run += make_python_perf_so
+run += make_debug
+run += make_no_libperl
+run += make_no_libpython
+run += make_no_scripts
+run += make_no_newt
+run += make_no_slang
+run += make_no_gtk2
+run += make_no_ui
+run += make_no_demangle
+run += make_no_libelf
+run += make_no_libunwind
+run += make_no_libdw_dwarf_unwind
+run += make_no_backtrace
+run += make_no_libnuma
+run += make_no_libaudit
+run += make_no_libbionic
+run += make_no_auxtrace
+run += make_no_libbpf
+run += make_with_babeltrace
+run += make_with_clangllvm
+run += make_help
+run += make_doc
+run += make_perf_o
+run += make_util_map_o
+run += make_util_pmu_bison_o
+run += make_install
+run += make_install_bin
+run += make_install_prefix
+run += make_install_prefix_slash
+# FIXME 'install-*' commented out till they're fixed
+# run += make_install_doc
+# run += make_install_man
+# run += make_install_html
+# run += make_install_info
+# run += make_install_pdf
+run += make_minimal
+run += make_static
+
+ifneq ($(call has,ctags),)
+run += make_tags
+endif
+ifneq ($(call has,cscope),)
+run += make_cscope
+endif
+
+# $(run_O) contains same portion of $(run) tests with '_O' attached
+# to distinguish O=... tests
+run_O := $(addsuffix _O,$(run))
+
+# disable some tests for O=...
+run_O := $(filter-out make_python_perf_so_O,$(run_O))
+
+# define test for each compile as 'test_NAME' variable
+# with the test itself as a value
+test_make_tags   = test -f tags
+test_make_cscope = test -f cscope.out
+
+test_make_tags_O   := $(test_make_tags)
+test_make_cscope_O := $(test_make_cscope)
+
+test_ok          := true
+test_make_help   := $(test_ok)
+test_make_doc    := $(test_ok)
+test_make_help_O := $(test_ok)
+test_make_doc_O  := $(test_ok)
+
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
+
+test_make_perf_o           := test -f $(PERF_O)/perf.o
+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
+
+define test_dest_files
+  for file in $(1); do				\
+    if [ ! -x $$TMP_DEST/$$file ]; then		\
+      echo "  failed to find: $$file";		\
+    fi						\
+  done
+endef
+
+installed_files_bin := bin/perf
+installed_files_bin += etc/bash_completion.d/perf
+installed_files_bin += libexec/perf-core/perf-archive
+
+installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so
+
+installed_files_all := $(installed_files_bin)
+installed_files_all += $(installed_files_plugins)
+
+test_make_install       := $(call test_dest_files,$(installed_files_all))
+test_make_install_O     := $(call test_dest_files,$(installed_files_all))
+test_make_install_bin   := $(call test_dest_files,$(installed_files_bin))
+test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin))
+
+# We prefix all installed files for make_install_prefix(_slash)
+# with '/tmp/krava' to match installed/prefix-ed files.
+installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all))
+test_make_install_prefix   :=  $(call test_dest_files,$(installed_files_all_prefix))
+test_make_install_prefix_O :=  $(call test_dest_files,$(installed_files_all_prefix))
+
+test_make_install_prefix_slash   := $(test_make_install_prefix)
+test_make_install_prefix_slash_O := $(test_make_install_prefix_O)
+
+# FIXME nothing gets installed
+test_make_install_man    := test -f $$TMP_DEST/share/man/man1/perf.1
+test_make_install_man_O  := $(test_make_install_man)
+
+# FIXME nothing gets installed
+test_make_install_doc    := $(test_ok)
+test_make_install_doc_O  := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_html   := $(test_ok)
+test_make_install_html_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_info   := $(test_ok)
+test_make_install_info_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_pdf    := $(test_ok)
+test_make_install_pdf_O  := $(test_ok)
+
+test_make_python_perf_so_O    := test -f $$TMP_O/python/perf.so
+test_make_perf_o_O            := test -f $$TMP_O/perf.o
+test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
+test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
+
+test_default = test -x $(PERF_O)/perf
+test = $(if $(test_$1),$(test_$1),$(test_default))
+
+test_default_O = test -x $$TMP_O/perf
+test_O = $(if $(test_$1),$(test_$1),$(test_default_O))
+
+all:
+
+ifdef SHUF
+run := $(shell shuf -e $(run))
+run_O := $(shell shuf -e $(run_O))
+endif
+
+max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L)
+
+ifdef DEBUG
+d := $(info run   $(run))
+d := $(info run_O $(run_O))
+endif
+
+MAKEFLAGS := --no-print-directory
+
+clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null && $(MAKE) -s $(O_OPT) -C ../build clean >/dev/null)
+
+$(run):
+	$(call clean)
+	@TMP_DEST=$$(mktemp -d); \
+	cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \
+	printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
+	( eval $$cmd ) >> $@ 2>&1; \
+	echo "  test: $(call test,$@)" >> $@ 2>&1; \
+	$(call test,$@) && \
+	rm -rf $@ $$TMP_DEST || (cat $@ ; false)
+
+$(run_O):
+	$(call clean)
+	@TMP_O=$$(mktemp -d); \
+	TMP_DEST=$$(mktemp -d); \
+	cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \
+	printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
+	( eval $$cmd ) >> $@ 2>&1 && \
+	echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
+	$(call test_O,$@) && \
+	rm -rf $@ $$TMP_O $$TMP_DEST || (cat $@ ; false)
+
+tarpkg:
+	@cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \
+	echo "- $@: $$cmd" && echo $$cmd > $@ && \
+	( eval $$cmd ) >> $@ 2>&1 && \
+	rm -f $@
+
+KERNEL_O := ../..
+ifneq ($(O),)
+  KERNEL_O := $(O)
+endif
+
+make_kernelsrc:
+	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
+	$(call clean); \
+	(make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
+
+make_kernelsrc_tools:
+	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
+	$(call clean); \
+	(make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
+
+FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
+FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
+
+all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
+	@echo OK
+	@rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+out: $(run_O)
+	@echo OK
+	@rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+ifeq ($(REUSE_FEATURES_DUMP),1)
+$(FEATURES_DUMP_FILE):
+	$(call clean)
+	@cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \
+	echo "- $@: $$cmd" && echo $$cmd && \
+	( eval $$cmd ) > /dev/null 2>&1
+
+$(FEATURES_DUMP_FILE_STATIC):
+	$(call clean)
+	@cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \
+	echo "- $@: $$cmd" && echo $$cmd && \
+	( eval $$cmd ) > /dev/null 2>&1
+
+# Add feature dump dependency for run/run_O targets
+$(foreach t,$(run) $(run_O),$(eval \
+	$(t): $(if $(findstring make_static,$(t)),\
+		$(FEATURES_DUMP_FILE_STATIC),\
+		$(FEATURES_DUMP_FILE))))
+
+# Append 'FEATURES_DUMP=' option to all test cases. For example:
+# make_no_libbpf: NO_LIBBPF=1  --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP
+# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC
+$(foreach t,$(run),$(if $(findstring make_static,$(t)),\
+			$(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\
+			$(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
+endif
+
+.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
+endif # ifndef MK
diff --git a/tests/mem.c b/tests/mem.c
new file mode 100644
index 0000000..0f82ee9
--- /dev/null
+++ b/tests/mem.c
@@ -0,0 +1,56 @@
+#include "util/mem-events.h"
+#include "util/symbol.h"
+#include "linux/perf_event.h"
+#include "util/debug.h"
+#include "tests.h"
+#include <string.h>
+
+static int check(union perf_mem_data_src data_src,
+		  const char *string)
+{
+	char out[100];
+	char failure[100];
+	struct mem_info mi = { .data_src = data_src };
+
+	int n;
+
+	n = perf_mem__snp_scnprintf(out, sizeof out, &mi);
+	n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi);
+	scnprintf(failure, sizeof failure, "unexpected %s", out);
+	TEST_ASSERT_VAL(failure, !strcmp(string, out));
+	return 0;
+}
+
+int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused)
+{
+	int ret = 0;
+	union perf_mem_data_src src;
+
+	memset(&src, 0, sizeof(src));
+
+	src.mem_lvl = PERF_MEM_LVL_HIT;
+	src.mem_lvl_num = 4;
+
+	ret |= check(src, "N/AL4 hit");
+
+	src.mem_remote = 1;
+
+	ret |= check(src, "N/ARemote L4 hit");
+
+	src.mem_lvl = PERF_MEM_LVL_MISS;
+	src.mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
+	src.mem_remote = 0;
+
+	ret |= check(src, "N/APMEM miss");
+
+	src.mem_remote = 1;
+
+	ret |= check(src, "N/ARemote PMEM miss");
+
+	src.mem_snoopx = PERF_MEM_SNOOPX_FWD;
+	src.mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+
+	ret |= check(src , "FwdRemote RAM miss");
+
+	return ret;
+}
diff --git a/tests/mem2node.c b/tests/mem2node.c
new file mode 100644
index 0000000..0c3c87f
--- /dev/null
+++ b/tests/mem2node.c
@@ -0,0 +1,75 @@
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include "cpumap.h"
+#include "mem2node.h"
+#include "tests.h"
+
+static struct node {
+	int		 node;
+	const char 	*map;
+} test_nodes[] = {
+	{ .node = 0, .map = "0"     },
+	{ .node = 1, .map = "1-2"   },
+	{ .node = 3, .map = "5-7,9" },
+};
+
+#define T TEST_ASSERT_VAL
+
+static unsigned long *get_bitmap(const char *str, int nbits)
+{
+	struct cpu_map *map = cpu_map__new(str);
+	unsigned long *bm = NULL;
+	int i;
+
+	bm = bitmap_alloc(nbits);
+
+	if (map && bm) {
+		bitmap_zero(bm, nbits);
+
+		for (i = 0; i < map->nr; i++) {
+			set_bit(map->map[i], bm);
+		}
+	}
+
+	if (map)
+		cpu_map__put(map);
+	else
+		free(bm);
+
+	return bm && map ? bm : NULL;
+}
+
+int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+	struct mem2node map;
+	struct memory_node nodes[3];
+	struct perf_env env = {
+		.memory_nodes    = (struct memory_node *) &nodes[0],
+		.nr_memory_nodes = ARRAY_SIZE(nodes),
+		.memory_bsize    = 0x100,
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(nodes); i++) {
+		nodes[i].node = test_nodes[i].node;
+		nodes[i].size = 10;
+
+		T("failed: alloc bitmap",
+		  (nodes[i].set = get_bitmap(test_nodes[i].map, 10)));
+	}
+
+	T("failed: mem2node__init", !mem2node__init(&map, &env));
+	T("failed: mem2node__node",  0 == mem2node__node(&map,   0x50));
+	T("failed: mem2node__node",  1 == mem2node__node(&map,  0x100));
+	T("failed: mem2node__node",  1 == mem2node__node(&map,  0x250));
+	T("failed: mem2node__node",  3 == mem2node__node(&map,  0x500));
+	T("failed: mem2node__node",  3 == mem2node__node(&map,  0x650));
+	T("failed: mem2node__node", -1 == mem2node__node(&map,  0x450));
+	T("failed: mem2node__node", -1 == mem2node__node(&map, 0x1050));
+
+	for (i = 0; i < ARRAY_SIZE(nodes); i++)
+		free(nodes[i].set);
+
+	mem2node__exit(&map);
+	return 0;
+}
diff --git a/tests/mmap-basic.c b/tests/mmap-basic.c
new file mode 100644
index 0000000..0919b07
--- /dev/null
+++ b/tests/mmap-basic.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+#include <linux/err.h>
+#include <linux/kernel.h>
+
+/*
+ * This test will generate random numbers of calls to some getpid syscalls,
+ * then establish an mmap for a group of events that are created to monitor
+ * the syscalls.
+ *
+ * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
+ * sample.id field to map back to its respective perf_evsel instance.
+ *
+ * Then it checks if the number of syscalls reported as perf events by
+ * the kernel corresponds to the number of syscalls made.
+ */
+int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = -1;
+	union perf_event *event;
+	struct thread_map *threads;
+	struct cpu_map *cpus;
+	struct perf_evlist *evlist;
+	cpu_set_t cpu_set;
+	const char *syscall_names[] = { "getsid", "getppid", "getpgid", };
+	pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid };
+#define nsyscalls ARRAY_SIZE(syscall_names)
+	unsigned int nr_events[nsyscalls],
+		     expected_nr_events[nsyscalls], i, j;
+	struct perf_evsel *evsels[nsyscalls], *evsel;
+	char sbuf[STRERR_BUFSIZE];
+	struct perf_mmap *md;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (cpus == NULL) {
+		pr_debug("cpu_map__new\n");
+		goto out_free_threads;
+	}
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(cpus->map[0], &cpu_set);
+	sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+	if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+		pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+			 cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_free_cpus;
+	}
+
+	evlist = perf_evlist__new();
+	if (evlist == NULL) {
+		pr_debug("perf_evlist__new\n");
+		goto out_free_cpus;
+	}
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	for (i = 0; i < nsyscalls; ++i) {
+		char name[64];
+
+		snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+		evsels[i] = perf_evsel__newtp("syscalls", name);
+		if (IS_ERR(evsels[i])) {
+			pr_debug("perf_evsel__new(%s)\n", name);
+			goto out_delete_evlist;
+		}
+
+		evsels[i]->attr.wakeup_events = 1;
+		perf_evsel__set_sample_id(evsels[i], false);
+
+		perf_evlist__add(evlist, evsels[i]);
+
+		if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
+			pr_debug("failed to open counter: %s, "
+				 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+				 str_error_r(errno, sbuf, sizeof(sbuf)));
+			goto out_delete_evlist;
+		}
+
+		nr_events[i] = 0;
+		expected_nr_events[i] = 1 + rand() % 127;
+	}
+
+	if (perf_evlist__mmap(evlist, 128) < 0) {
+		pr_debug("failed to mmap events: %d (%s)\n", errno,
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	for (i = 0; i < nsyscalls; ++i)
+		for (j = 0; j < expected_nr_events[i]; ++j) {
+			int foo = syscalls[i]();
+			++foo;
+		}
+
+	md = &evlist->mmap[0];
+	if (perf_mmap__read_init(md) < 0)
+		goto out_init;
+
+	while ((event = perf_mmap__read_event(md)) != NULL) {
+		struct perf_sample sample;
+
+		if (event->header.type != PERF_RECORD_SAMPLE) {
+			pr_debug("unexpected %s event\n",
+				 perf_event__name(event->header.type));
+			goto out_delete_evlist;
+		}
+
+		err = perf_evlist__parse_sample(evlist, event, &sample);
+		if (err) {
+			pr_err("Can't parse sample, err = %d\n", err);
+			goto out_delete_evlist;
+		}
+
+		err = -1;
+		evsel = perf_evlist__id2evsel(evlist, sample.id);
+		if (evsel == NULL) {
+			pr_debug("event with id %" PRIu64
+				 " doesn't map to an evsel\n", sample.id);
+			goto out_delete_evlist;
+		}
+		nr_events[evsel->idx]++;
+		perf_mmap__consume(md);
+	}
+	perf_mmap__read_done(md);
+
+out_init:
+	err = 0;
+	evlist__for_each_entry(evlist, evsel) {
+		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
+			pr_debug("expected %d %s events, got %d\n",
+				 expected_nr_events[evsel->idx],
+				 perf_evsel__name(evsel), nr_events[evsel->idx]);
+			err = -1;
+			goto out_delete_evlist;
+		}
+	}
+
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	cpus	= NULL;
+	threads = NULL;
+out_free_cpus:
+	cpu_map__put(cpus);
+out_free_threads:
+	thread_map__put(threads);
+	return err;
+}
diff --git a/tests/mmap-thread-lookup.c b/tests/mmap-thread-lookup.c
new file mode 100644
index 0000000..868d82b
--- /dev/null
+++ b/tests/mmap-thread-lookup.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "debug.h"
+#include "tests.h"
+#include "machine.h"
+#include "thread_map.h"
+#include "symbol.h"
+#include "thread.h"
+#include "util.h"
+
+#define THREADS 4
+
+static int go_away;
+
+struct thread_data {
+	pthread_t	pt;
+	pid_t		tid;
+	void		*map;
+	int		ready[2];
+};
+
+static struct thread_data threads[THREADS];
+
+static int thread_init(struct thread_data *td)
+{
+	void *map;
+
+	map = mmap(NULL, page_size,
+		   PROT_READ|PROT_WRITE|PROT_EXEC,
+		   MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+
+	if (map == MAP_FAILED) {
+		perror("mmap failed");
+		return -1;
+	}
+
+	td->map = map;
+	td->tid = syscall(SYS_gettid);
+
+	pr_debug("tid = %d, map = %p\n", td->tid, map);
+	return 0;
+}
+
+static void *thread_fn(void *arg)
+{
+	struct thread_data *td = arg;
+	ssize_t ret;
+	int go;
+
+	if (thread_init(td))
+		return NULL;
+
+	/* Signal thread_create thread is initialized. */
+	ret = write(td->ready[1], &go, sizeof(int));
+	if (ret != sizeof(int)) {
+		pr_err("failed to notify\n");
+		return NULL;
+	}
+
+	while (!go_away) {
+		/* Waiting for main thread to kill us. */
+		usleep(100);
+	}
+
+	munmap(td->map, page_size);
+	return NULL;
+}
+
+static int thread_create(int i)
+{
+	struct thread_data *td = &threads[i];
+	int err, go;
+
+	if (pipe(td->ready))
+		return -1;
+
+	err = pthread_create(&td->pt, NULL, thread_fn, td);
+	if (!err) {
+		/* Wait for thread initialization. */
+		ssize_t ret = read(td->ready[0], &go, sizeof(int));
+		err = ret != sizeof(int);
+	}
+
+	close(td->ready[0]);
+	close(td->ready[1]);
+	return err;
+}
+
+static int threads_create(void)
+{
+	struct thread_data *td0 = &threads[0];
+	int i, err = 0;
+
+	go_away = 0;
+
+	/* 0 is main thread */
+	if (thread_init(td0))
+		return -1;
+
+	for (i = 1; !err && i < THREADS; i++)
+		err = thread_create(i);
+
+	return err;
+}
+
+static int threads_destroy(void)
+{
+	struct thread_data *td0 = &threads[0];
+	int i, err = 0;
+
+	/* cleanup the main thread */
+	munmap(td0->map, page_size);
+
+	go_away = 1;
+
+	for (i = 1; !err && i < THREADS; i++)
+		err = pthread_join(threads[i].pt, NULL);
+
+	return err;
+}
+
+typedef int (*synth_cb)(struct machine *machine);
+
+static int synth_all(struct machine *machine)
+{
+	return perf_event__synthesize_threads(NULL,
+					      perf_event__process,
+					      machine, 0, 500, 1);
+}
+
+static int synth_process(struct machine *machine)
+{
+	struct thread_map *map;
+	int err;
+
+	map = thread_map__new_by_pid(getpid());
+
+	err = perf_event__synthesize_thread_map(NULL, map,
+						perf_event__process,
+						machine, 0, 500);
+
+	thread_map__put(map);
+	return err;
+}
+
+static int mmap_events(synth_cb synth)
+{
+	struct machine *machine;
+	int err, i;
+
+	/*
+	 * The threads_create will not return before all threads
+	 * are spawned and all created memory map.
+	 *
+	 * They will loop until threads_destroy is called, so we
+	 * can safely run synthesizing function.
+	 */
+	TEST_ASSERT_VAL("failed to create threads", !threads_create());
+
+	machine = machine__new_host();
+
+	dump_trace = verbose > 1 ? 1 : 0;
+
+	err = synth(machine);
+
+	dump_trace = 0;
+
+	TEST_ASSERT_VAL("failed to destroy threads", !threads_destroy());
+	TEST_ASSERT_VAL("failed to synthesize maps", !err);
+
+	/*
+	 * All data is synthesized, try to find map for each
+	 * thread object.
+	 */
+	for (i = 0; i < THREADS; i++) {
+		struct thread_data *td = &threads[i];
+		struct addr_location al;
+		struct thread *thread;
+
+		thread = machine__findnew_thread(machine, getpid(), td->tid);
+
+		pr_debug("looking for map %p\n", td->map);
+
+		thread__find_addr_map(thread,
+				      PERF_RECORD_MISC_USER, MAP__FUNCTION,
+				      (unsigned long) (td->map + 1), &al);
+
+		thread__put(thread);
+
+		if (!al.map) {
+			pr_debug("failed, couldn't find map\n");
+			err = -1;
+			break;
+		}
+
+		pr_debug("map %p, addr %" PRIx64 "\n", al.map, al.map->start);
+	}
+
+	machine__delete_threads(machine);
+	machine__delete(machine);
+	return err;
+}
+
+/*
+ * This test creates 'THREADS' number of threads (including
+ * main thread) and each thread creates memory map.
+ *
+ * When threads are created, we synthesize them with both
+ * (separate tests):
+ *   perf_event__synthesize_thread_map (process based)
+ *   perf_event__synthesize_threads    (global)
+ *
+ * We test we can find all memory maps via:
+ *   thread__find_addr_map
+ *
+ * by using all thread objects.
+ */
+int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	/* perf_event__synthesize_threads synthesize */
+	TEST_ASSERT_VAL("failed with sythesizing all",
+			!mmap_events(synth_all));
+
+	/* perf_event__synthesize_thread_map synthesize */
+	TEST_ASSERT_VAL("failed with sythesizing process",
+			!mmap_events(synth_process));
+
+	return 0;
+}
diff --git a/tests/openat-syscall-all-cpus.c b/tests/openat-syscall-all-cpus.c
new file mode 100644
index 0000000..c531e6d
--- /dev/null
+++ b/tests/openat-syscall-all-cpus.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+/* For the CPU_* macros */
+#include <pthread.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <api/fs/fs.h>
+#include <linux/err.h>
+#include <api/fs/tracing_path.h>
+#include "evsel.h"
+#include "tests.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "stat.h"
+
+int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = -1, fd, cpu;
+	struct cpu_map *cpus;
+	struct perf_evsel *evsel;
+	unsigned int nr_openat_calls = 111, i;
+	cpu_set_t cpu_set;
+	struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
+	char sbuf[STRERR_BUFSIZE];
+	char errbuf[BUFSIZ];
+
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (cpus == NULL) {
+		pr_debug("cpu_map__new\n");
+		goto out_thread_map_delete;
+	}
+
+	CPU_ZERO(&cpu_set);
+
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+	if (IS_ERR(evsel)) {
+		tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
+		pr_debug("%s\n", errbuf);
+		goto out_thread_map_delete;
+	}
+
+	if (perf_evsel__open(evsel, cpus, threads) < 0) {
+		pr_debug("failed to open counter: %s, "
+			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_evsel_delete;
+	}
+
+	for (cpu = 0; cpu < cpus->nr; ++cpu) {
+		unsigned int ncalls = nr_openat_calls + cpu;
+		/*
+		 * XXX eventually lift this restriction in a way that
+		 * keeps perf building on older glibc installations
+		 * without CPU_ALLOC. 1024 cpus in 2010 still seems
+		 * a reasonable upper limit tho :-)
+		 */
+		if (cpus->map[cpu] >= CPU_SETSIZE) {
+			pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
+			continue;
+		}
+
+		CPU_SET(cpus->map[cpu], &cpu_set);
+		if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+			pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+				 cpus->map[cpu],
+				 str_error_r(errno, sbuf, sizeof(sbuf)));
+			goto out_close_fd;
+		}
+		for (i = 0; i < ncalls; ++i) {
+			fd = openat(0, "/etc/passwd", O_RDONLY);
+			close(fd);
+		}
+		CPU_CLR(cpus->map[cpu], &cpu_set);
+	}
+
+	/*
+	 * Here we need to explicitly preallocate the counts, as if
+	 * we use the auto allocation it will allocate just for 1 cpu,
+	 * as we start by cpu 0.
+	 */
+	if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
+		pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+		goto out_close_fd;
+	}
+
+	err = 0;
+
+	for (cpu = 0; cpu < cpus->nr; ++cpu) {
+		unsigned int expected;
+
+		if (cpus->map[cpu] >= CPU_SETSIZE)
+			continue;
+
+		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+			pr_debug("perf_evsel__read_on_cpu\n");
+			err = -1;
+			break;
+		}
+
+		expected = nr_openat_calls + cpu;
+		if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
+			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+				 expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
+			err = -1;
+		}
+	}
+
+	perf_evsel__free_counts(evsel);
+out_close_fd:
+	perf_evsel__close_fd(evsel);
+out_evsel_delete:
+	perf_evsel__delete(evsel);
+out_thread_map_delete:
+	thread_map__put(threads);
+	return err;
+}
diff --git a/tests/openat-syscall-tp-fields.c b/tests/openat-syscall-tp-fields.c
new file mode 100644
index 0000000..344dc3a
--- /dev/null
+++ b/tests/openat-syscall-tp-fields.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "perf.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "tests.h"
+#include "debug.h"
+#include <errno.h>
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY    00200000
+#endif
+#ifndef AT_FDCWD
+#define AT_FDCWD       -100
+#endif
+
+int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.no_buffering = true,
+		.freq	      = 1,
+		.mmap_pages   = 256,
+		.raw_samples  = true,
+	};
+	const char *filename = "/etc/passwd";
+	int flags = O_RDONLY | O_DIRECTORY;
+	struct perf_evlist *evlist = perf_evlist__new();
+	struct perf_evsel *evsel;
+	int err = -1, i, nr_events = 0, nr_polls = 0;
+	char sbuf[STRERR_BUFSIZE];
+
+	if (evlist == NULL) {
+		pr_debug("%s: perf_evlist__new\n", __func__);
+		goto out;
+	}
+
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+	if (IS_ERR(evsel)) {
+		pr_debug("%s: perf_evsel__newtp\n", __func__);
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__add(evlist, evsel);
+
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("%s: perf_evlist__create_maps\n", __func__);
+		goto out_delete_evlist;
+	}
+
+	perf_evsel__config(evsel, &opts, NULL);
+
+	thread_map__set_pid(evlist->threads, 0, getpid());
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	err = perf_evlist__mmap(evlist, UINT_MAX);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__enable(evlist);
+
+	/*
+	 * Generate the event:
+	 */
+	openat(AT_FDCWD, filename, flags);
+
+	while (1) {
+		int before = nr_events;
+
+		for (i = 0; i < evlist->nr_mmaps; i++) {
+			union perf_event *event;
+			struct perf_mmap *md;
+
+			md = &evlist->mmap[i];
+			if (perf_mmap__read_init(md) < 0)
+				continue;
+
+			while ((event = perf_mmap__read_event(md)) != NULL) {
+				const u32 type = event->header.type;
+				int tp_flags;
+				struct perf_sample sample;
+
+				++nr_events;
+
+				if (type != PERF_RECORD_SAMPLE) {
+					perf_mmap__consume(md);
+					continue;
+				}
+
+				err = perf_evsel__parse_sample(evsel, event, &sample);
+				if (err) {
+					pr_debug("Can't parse sample, err = %d\n", err);
+					goto out_delete_evlist;
+				}
+
+				tp_flags = perf_evsel__intval(evsel, &sample, "flags");
+
+				if (flags != tp_flags) {
+					pr_debug("%s: Expected flags=%#x, got %#x\n",
+						 __func__, flags, tp_flags);
+					goto out_delete_evlist;
+				}
+
+				goto out_ok;
+			}
+			perf_mmap__read_done(md);
+		}
+
+		if (nr_events == before)
+			perf_evlist__poll(evlist, 10);
+
+		if (++nr_polls > 5) {
+			pr_debug("%s: no events!\n", __func__);
+			goto out_delete_evlist;
+		}
+	}
+out_ok:
+	err = 0;
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+out:
+	return err;
+}
diff --git a/tests/openat-syscall.c b/tests/openat-syscall.c
new file mode 100644
index 0000000..00cd63f
--- /dev/null
+++ b/tests/openat-syscall.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <api/fs/tracing_path.h>
+#include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "thread_map.h"
+#include "evsel.h"
+#include "debug.h"
+#include "tests.h"
+
+int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = -1, fd;
+	struct perf_evsel *evsel;
+	unsigned int nr_openat_calls = 111, i;
+	struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
+	char sbuf[STRERR_BUFSIZE];
+	char errbuf[BUFSIZ];
+
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+	if (IS_ERR(evsel)) {
+		tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
+		pr_debug("%s\n", errbuf);
+		goto out_thread_map_delete;
+	}
+
+	if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+		pr_debug("failed to open counter: %s, "
+			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_evsel_delete;
+	}
+
+	for (i = 0; i < nr_openat_calls; ++i) {
+		fd = openat(0, "/etc/passwd", O_RDONLY);
+		close(fd);
+	}
+
+	if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
+		pr_debug("perf_evsel__read_on_cpu\n");
+		goto out_close_fd;
+	}
+
+	if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
+		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
+			 nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
+		goto out_close_fd;
+	}
+
+	err = 0;
+out_close_fd:
+	perf_evsel__close_fd(evsel);
+out_evsel_delete:
+	perf_evsel__delete(evsel);
+out_thread_map_delete:
+	thread_map__put(threads);
+	return err;
+}
diff --git a/tests/parse-events.c b/tests/parse-events.c
new file mode 100644
index 0000000..18b0644
--- /dev/null
+++ b/tests/parse-events.c
@@ -0,0 +1,1840 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "parse-events.h"
+#include "evsel.h"
+#include "evlist.h"
+#include <api/fs/fs.h>
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include <dirent.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/kernel.h>
+#include <linux/hw_breakpoint.h>
+#include <api/fs/tracing_path.h>
+
+#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
+			     PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
+
+static int test__checkevent_tracepoint(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong sample_type",
+		PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+	TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+	return 0;
+}
+
+static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+	TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
+
+	evlist__for_each_entry(evlist, evsel) {
+		TEST_ASSERT_VAL("wrong type",
+			PERF_TYPE_TRACEPOINT == evsel->attr.type);
+		TEST_ASSERT_VAL("wrong sample_type",
+			PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+		TEST_ASSERT_VAL("wrong sample_period",
+			1 == evsel->attr.sample_period);
+	}
+	return 0;
+}
+
+static int test__checkevent_raw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0x1a == evsel->attr.config);
+	return 0;
+}
+
+static int test__checkevent_numeric(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+	return 0;
+}
+
+static int test__checkevent_symbolic_name(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	return 0;
+}
+
+static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	/*
+	 * The period value gets configured within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",
+			0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong config1",
+			0 == evsel->attr.config1);
+	TEST_ASSERT_VAL("wrong config2",
+			1 == evsel->attr.config2);
+	return 0;
+}
+
+static int test__checkevent_symbolic_alias(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config);
+	return 0;
+}
+
+static int test__checkevent_genhw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config);
+	return 0;
+}
+
+static int test__checkevent_breakpoint(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
+					 evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 ==
+					evsel->attr.bp_len);
+	return 0;
+}
+
+static int test__checkevent_breakpoint_x(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type",
+			HW_BREAKPOINT_X == evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len);
+	return 0;
+}
+
+static int test__checkevent_breakpoint_r(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type",
+			PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type",
+			HW_BREAKPOINT_R == evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len",
+			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+	return 0;
+}
+
+static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type",
+			PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type",
+			HW_BREAKPOINT_W == evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len",
+			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+	return 0;
+}
+
+static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type",
+			PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type",
+		(HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len",
+			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+	return 0;
+}
+
+static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_tracepoint(evlist);
+}
+
+static int
+test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+
+	evlist__for_each_entry(evlist, evsel) {
+		TEST_ASSERT_VAL("wrong exclude_user",
+				!evsel->attr.exclude_user);
+		TEST_ASSERT_VAL("wrong exclude_kernel",
+				evsel->attr.exclude_kernel);
+		TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+		TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	}
+
+	return test__checkevent_tracepoint_multi(evlist);
+}
+
+static int test__checkevent_raw_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+	return test__checkevent_raw(evlist);
+}
+
+static int test__checkevent_numeric_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+	return test__checkevent_numeric(evlist);
+}
+
+static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_symbolic_alias(evlist);
+}
+
+static int test__checkevent_genhw_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+	return test__checkevent_genhw(evlist);
+}
+
+static int test__checkevent_exclude_idle_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_idle_modifier_1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0:u"));
+
+	return test__checkevent_breakpoint(evlist);
+}
+
+static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0:x:k"));
+
+	return test__checkevent_breakpoint_x(evlist);
+}
+
+static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0:r:hp"));
+
+	return test__checkevent_breakpoint_r(evlist);
+}
+
+static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0:w:up"));
+
+	return test__checkevent_breakpoint_w(evlist);
+}
+
+static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "mem:0:rw:kp"));
+
+	return test__checkevent_breakpoint_rw(evlist);
+}
+
+static int test__checkevent_pmu(struct perf_evlist *evlist)
+{
+
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",    10 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong config1",    1 == evsel->attr.config1);
+	TEST_ASSERT_VAL("wrong config2",    3 == evsel->attr.config2);
+	/*
+	 * The period value gets configured within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+
+	return 0;
+}
+
+static int test__checkevent_list(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* r1 */
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1);
+	TEST_ASSERT_VAL("wrong config2", 0 == evsel->attr.config2);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	/* syscalls:sys_enter_openat:k */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong sample_type",
+		PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+	TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	/* 1:1:hp */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+	return 0;
+}
+
+static int test__checkevent_pmu_name(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	/* cpu/config=1,name=krava/u */
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
+
+	/* cpu/config=2/u" */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "cpu/config=2/u"));
+
+	return 0;
+}
+
+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	/* cpu/config=1,call-graph=fp,time,period=100000/ */
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
+	/*
+	 * The period, time and callgraph value gets configured
+	 * within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+	/* cpu/config=2,call-graph=no,time=0,period=2000/ */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
+	/*
+	 * The period, time and callgraph value gets configured
+	 * within perf_evlist__config,
+	 * while this test executes only parse events method.
+	 */
+	TEST_ASSERT_VAL("wrong period",     0 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong callgraph",  !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type));
+	TEST_ASSERT_VAL("wrong time",  !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+	return 0;
+}
+
+static int test__checkevent_pmu_events(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong exclude_user",
+			!evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel",
+			evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	return 0;
+}
+
+
+static int test__checkevent_pmu_events_mix(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	/* pmu-event:u */
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong exclude_user",
+			!evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel",
+			evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	/* cpu/pmu-event/u*/
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong exclude_user",
+			!evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel",
+			evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	return 0;
+}
+
+static int test__checkterms_simple(struct list_head *terms)
+{
+	struct parse_events_term *term;
+
+	/* config=10 */
+	term = list_entry(terms->next, struct parse_events_term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 10);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* config1 */
+	term = list_entry(term->list.next, struct parse_events_term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* config2=3 */
+	term = list_entry(term->list.next, struct parse_events_term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 3);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* umask=1*/
+	term = list_entry(term->list.next, struct parse_events_term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
+
+	return 0;
+}
+
+static int test__group1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* instructions:k */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* cycles:upp */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	/* use of precise requires exclude_guest */
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	return 0;
+}
+
+static int test__group2(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* faults + :ku modifier */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* cache-references + :u modifier */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_REFERENCES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* cycles:k */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	return 0;
+}
+
+static int test__group3(struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
+
+	/* group1 syscalls:sys_enter_openat:H */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong sample_type",
+		PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+	TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong group name",
+		!strcmp(leader->group_name, "group1"));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* group1 cycles:kppp */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	/* use of precise requires exclude_guest */
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* group2 cycles + G modifier */
+	evsel = leader = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong group name",
+		!strcmp(leader->group_name, "group2"));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* group2 1:3 + G modifier */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 3 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* instructions:u */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	return 0;
+}
+
+static int test__group4(struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* cycles:u + p */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	/* use of precise requires exclude_guest */
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* instructions:kp + p */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	/* use of precise requires exclude_guest */
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	return 0;
+}
+
+static int test__group5(struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
+
+	/* cycles + G */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* instructions + G */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* cycles:G */
+	evsel = leader = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+	/* instructions:G */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+	/* cycles */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+
+	return 0;
+}
+
+static int test__group_gh1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* cycles + :H group modifier */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+	/* cache-misses:G + :H group modifier */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+	return 0;
+}
+
+static int test__group_gh2(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* cycles + :G group modifier */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+	/* cache-misses:H + :G group modifier */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+	return 0;
+}
+
+static int test__group_gh3(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* cycles:G + :u group modifier */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+	/* cache-misses:H + :u group modifier */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+	return 0;
+}
+
+static int test__group_gh4(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+	/* cycles:G + :uG group modifier */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+	/* cache-misses:H + :uG group modifier */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+	return 0;
+}
+
+static int test__leader_sample1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* cycles - sampling group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* cache-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* branch-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	return 0;
+}
+
+static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+
+	/* instructions - sampling group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* branch-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	return 0;
+}
+
+static int test__checkevent_pinned_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__pinned_group(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* cycles - group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+	/* cache-misses - can not be pinned, but will go on with the leader */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	/* branch-misses - ditto */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	return 0;
+}
+
+static int test__checkevent_breakpoint_len(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
+					 evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 ==
+					evsel->attr.bp_len);
+
+	return 0;
+}
+
+static int test__checkevent_breakpoint_len_w(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W ==
+					 evsel->attr.bp_type);
+	TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 ==
+					evsel->attr.bp_len);
+
+	return 0;
+}
+
+static int
+test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+	return test__checkevent_breakpoint_rw(evlist);
+}
+
+static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_SW_TASK_CLOCK == evsel->attr.config);
+	return 0;
+}
+
+static int test__checkevent_config_symbol(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_raw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_num(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_cache(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
+	return 0;
+}
+
+static int count_tracepoints(void)
+{
+	struct dirent *events_ent;
+	DIR *events_dir;
+	int cnt = 0;
+
+	events_dir = opendir(tracing_events_path);
+
+	TEST_ASSERT_VAL("Can't open events dir", events_dir);
+
+	while ((events_ent = readdir(events_dir))) {
+		char sys_path[PATH_MAX];
+		struct dirent *sys_ent;
+		DIR *sys_dir;
+
+		if (!strcmp(events_ent->d_name, ".")
+		    || !strcmp(events_ent->d_name, "..")
+		    || !strcmp(events_ent->d_name, "enable")
+		    || !strcmp(events_ent->d_name, "header_event")
+		    || !strcmp(events_ent->d_name, "header_page"))
+			continue;
+
+		scnprintf(sys_path, PATH_MAX, "%s/%s",
+			  tracing_events_path, events_ent->d_name);
+
+		sys_dir = opendir(sys_path);
+		TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
+
+		while ((sys_ent = readdir(sys_dir))) {
+			if (!strcmp(sys_ent->d_name, ".")
+			    || !strcmp(sys_ent->d_name, "..")
+			    || !strcmp(sys_ent->d_name, "enable")
+			    || !strcmp(sys_ent->d_name, "filter"))
+				continue;
+
+			cnt++;
+		}
+
+		closedir(sys_dir);
+	}
+
+	closedir(events_dir);
+	return cnt;
+}
+
+static int test__all_tracepoints(struct perf_evlist *evlist)
+{
+	TEST_ASSERT_VAL("wrong events count",
+			count_tracepoints() == evlist->nr_entries);
+
+	return test__checkevent_tracepoint_multi(evlist);
+}
+
+struct evlist_test {
+	const char *name;
+	__u32 type;
+	const int id;
+	int (*check)(struct perf_evlist *evlist);
+};
+
+static struct evlist_test test__events[] = {
+	{
+		.name  = "syscalls:sys_enter_openat",
+		.check = test__checkevent_tracepoint,
+		.id    = 0,
+	},
+	{
+		.name  = "syscalls:*",
+		.check = test__checkevent_tracepoint_multi,
+		.id    = 1,
+	},
+	{
+		.name  = "r1a",
+		.check = test__checkevent_raw,
+		.id    = 2,
+	},
+	{
+		.name  = "1:1",
+		.check = test__checkevent_numeric,
+		.id    = 3,
+	},
+	{
+		.name  = "instructions",
+		.check = test__checkevent_symbolic_name,
+		.id    = 4,
+	},
+	{
+		.name  = "cycles/period=100000,config2/",
+		.check = test__checkevent_symbolic_name_config,
+		.id    = 5,
+	},
+	{
+		.name  = "faults",
+		.check = test__checkevent_symbolic_alias,
+		.id    = 6,
+	},
+	{
+		.name  = "L1-dcache-load-miss",
+		.check = test__checkevent_genhw,
+		.id    = 7,
+	},
+	{
+		.name  = "mem:0",
+		.check = test__checkevent_breakpoint,
+		.id    = 8,
+	},
+	{
+		.name  = "mem:0:x",
+		.check = test__checkevent_breakpoint_x,
+		.id    = 9,
+	},
+	{
+		.name  = "mem:0:r",
+		.check = test__checkevent_breakpoint_r,
+		.id    = 10,
+	},
+	{
+		.name  = "mem:0:w",
+		.check = test__checkevent_breakpoint_w,
+		.id    = 11,
+	},
+	{
+		.name  = "syscalls:sys_enter_openat:k",
+		.check = test__checkevent_tracepoint_modifier,
+		.id    = 12,
+	},
+	{
+		.name  = "syscalls:*:u",
+		.check = test__checkevent_tracepoint_multi_modifier,
+		.id    = 13,
+	},
+	{
+		.name  = "r1a:kp",
+		.check = test__checkevent_raw_modifier,
+		.id    = 14,
+	},
+	{
+		.name  = "1:1:hp",
+		.check = test__checkevent_numeric_modifier,
+		.id    = 15,
+	},
+	{
+		.name  = "instructions:h",
+		.check = test__checkevent_symbolic_name_modifier,
+		.id    = 16,
+	},
+	{
+		.name  = "faults:u",
+		.check = test__checkevent_symbolic_alias_modifier,
+		.id    = 17,
+	},
+	{
+		.name  = "L1-dcache-load-miss:kp",
+		.check = test__checkevent_genhw_modifier,
+		.id    = 18,
+	},
+	{
+		.name  = "mem:0:u",
+		.check = test__checkevent_breakpoint_modifier,
+		.id    = 19,
+	},
+	{
+		.name  = "mem:0:x:k",
+		.check = test__checkevent_breakpoint_x_modifier,
+		.id    = 20,
+	},
+	{
+		.name  = "mem:0:r:hp",
+		.check = test__checkevent_breakpoint_r_modifier,
+		.id    = 21,
+	},
+	{
+		.name  = "mem:0:w:up",
+		.check = test__checkevent_breakpoint_w_modifier,
+		.id    = 22,
+	},
+	{
+		.name  = "r1,syscalls:sys_enter_openat:k,1:1:hp",
+		.check = test__checkevent_list,
+		.id    = 23,
+	},
+	{
+		.name  = "instructions:G",
+		.check = test__checkevent_exclude_host_modifier,
+		.id    = 24,
+	},
+	{
+		.name  = "instructions:H",
+		.check = test__checkevent_exclude_guest_modifier,
+		.id    = 25,
+	},
+	{
+		.name  = "mem:0:rw",
+		.check = test__checkevent_breakpoint_rw,
+		.id    = 26,
+	},
+	{
+		.name  = "mem:0:rw:kp",
+		.check = test__checkevent_breakpoint_rw_modifier,
+		.id    = 27,
+	},
+	{
+		.name  = "{instructions:k,cycles:upp}",
+		.check = test__group1,
+		.id    = 28,
+	},
+	{
+		.name  = "{faults:k,cache-references}:u,cycles:k",
+		.check = test__group2,
+		.id    = 29,
+	},
+	{
+		.name  = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
+		.check = test__group3,
+		.id    = 30,
+	},
+	{
+		.name  = "{cycles:u,instructions:kp}:p",
+		.check = test__group4,
+		.id    = 31,
+	},
+	{
+		.name  = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
+		.check = test__group5,
+		.id    = 32,
+	},
+	{
+		.name  = "*:*",
+		.check = test__all_tracepoints,
+		.id    = 33,
+	},
+	{
+		.name  = "{cycles,cache-misses:G}:H",
+		.check = test__group_gh1,
+		.id    = 34,
+	},
+	{
+		.name  = "{cycles,cache-misses:H}:G",
+		.check = test__group_gh2,
+		.id    = 35,
+	},
+	{
+		.name  = "{cycles:G,cache-misses:H}:u",
+		.check = test__group_gh3,
+		.id    = 36,
+	},
+	{
+		.name  = "{cycles:G,cache-misses:H}:uG",
+		.check = test__group_gh4,
+		.id    = 37,
+	},
+	{
+		.name  = "{cycles,cache-misses,branch-misses}:S",
+		.check = test__leader_sample1,
+		.id    = 38,
+	},
+	{
+		.name  = "{instructions,branch-misses}:Su",
+		.check = test__leader_sample2,
+		.id    = 39,
+	},
+	{
+		.name  = "instructions:uDp",
+		.check = test__checkevent_pinned_modifier,
+		.id    = 40,
+	},
+	{
+		.name  = "{cycles,cache-misses,branch-misses}:D",
+		.check = test__pinned_group,
+		.id    = 41,
+	},
+	{
+		.name  = "mem:0/1",
+		.check = test__checkevent_breakpoint_len,
+		.id    = 42,
+	},
+	{
+		.name  = "mem:0/2:w",
+		.check = test__checkevent_breakpoint_len_w,
+		.id    = 43,
+	},
+	{
+		.name  = "mem:0/4:rw:u",
+		.check = test__checkevent_breakpoint_len_rw_modifier,
+		.id    = 44
+	},
+#if defined(__s390x__)
+	{
+		.name  = "kvm-s390:kvm_s390_create_vm",
+		.check = test__checkevent_tracepoint,
+		.id    = 100,
+	},
+#endif
+	{
+		.name  = "instructions:I",
+		.check = test__checkevent_exclude_idle_modifier,
+		.id    = 45,
+	},
+	{
+		.name  = "instructions:kIG",
+		.check = test__checkevent_exclude_idle_modifier_1,
+		.id    = 46,
+	},
+	{
+		.name  = "task-clock:P,cycles",
+		.check = test__checkevent_precise_max_modifier,
+		.id    = 47,
+	},
+	{
+		.name  = "instructions/name=insn/",
+		.check = test__checkevent_config_symbol,
+		.id    = 48,
+	},
+	{
+		.name  = "r1234/name=rawpmu/",
+		.check = test__checkevent_config_raw,
+		.id    = 49,
+	},
+	{
+		.name  = "4:0x6530160/name=numpmu/",
+		.check = test__checkevent_config_num,
+		.id    = 50,
+	},
+	{
+		.name  = "L1-dcache-misses/name=cachepmu/",
+		.check = test__checkevent_config_cache,
+		.id    = 51,
+	},
+};
+
+static struct evlist_test test__events_pmu[] = {
+	{
+		.name  = "cpu/config=10,config1,config2=3,period=1000/u",
+		.check = test__checkevent_pmu,
+		.id    = 0,
+	},
+	{
+		.name  = "cpu/config=1,name=krava/u,cpu/config=2/u",
+		.check = test__checkevent_pmu_name,
+		.id    = 1,
+	},
+	{
+		.name  = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+		.check = test__checkevent_pmu_partial_time_callgraph,
+		.id    = 2,
+	},
+};
+
+struct terms_test {
+	const char *str;
+	__u32 type;
+	int (*check)(struct list_head *terms);
+};
+
+static struct terms_test test__terms[] = {
+	[0] = {
+		.str   = "config=10,config1,config2=3,umask=1",
+		.check = test__checkterms_simple,
+	},
+};
+
+static int test_event(struct evlist_test *e)
+{
+	struct perf_evlist *evlist;
+	int ret;
+
+	evlist = perf_evlist__new();
+	if (evlist == NULL)
+		return -ENOMEM;
+
+	ret = parse_events(evlist, e->name, NULL);
+	if (ret) {
+		pr_debug("failed to parse event '%s', err %d\n",
+			 e->name, ret);
+	} else {
+		ret = e->check(evlist);
+	}
+
+	perf_evlist__delete(evlist);
+
+	return ret;
+}
+
+static int test_events(struct evlist_test *events, unsigned cnt)
+{
+	int ret1, ret2 = 0;
+	unsigned i;
+
+	for (i = 0; i < cnt; i++) {
+		struct evlist_test *e = &events[i];
+
+		pr_debug("running test %d '%s'\n", e->id, e->name);
+		ret1 = test_event(e);
+		if (ret1)
+			ret2 = ret1;
+	}
+
+	return ret2;
+}
+
+static int test_term(struct terms_test *t)
+{
+	struct list_head terms;
+	int ret;
+
+	INIT_LIST_HEAD(&terms);
+
+	ret = parse_events_terms(&terms, t->str);
+	if (ret) {
+		pr_debug("failed to parse terms '%s', err %d\n",
+			 t->str , ret);
+		return ret;
+	}
+
+	ret = t->check(&terms);
+	parse_events_terms__purge(&terms);
+
+	return ret;
+}
+
+static int test_terms(struct terms_test *terms, unsigned cnt)
+{
+	int ret = 0;
+	unsigned i;
+
+	for (i = 0; i < cnt; i++) {
+		struct terms_test *t = &terms[i];
+
+		pr_debug("running test %d '%s'\n", i, t->str);
+		ret = test_term(t);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int test_pmu(void)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	int ret;
+
+	snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/",
+		 sysfs__mountpoint());
+
+	ret = stat(path, &st);
+	if (ret)
+		pr_debug("omitting PMU cpu tests\n");
+	return !ret;
+}
+
+static int test_pmu_events(void)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+	int ret;
+
+	snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/",
+		 sysfs__mountpoint());
+
+	ret = stat(path, &st);
+	if (ret) {
+		pr_debug("omitting PMU cpu events tests\n");
+		return 0;
+	}
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_debug("can't open pmu event dir");
+		return -1;
+	}
+
+	while (!ret && (ent = readdir(dir))) {
+		struct evlist_test e;
+		char name[2 * NAME_MAX + 1 + 12 + 3];
+
+		/* Names containing . are special and cannot be used directly */
+		if (strchr(ent->d_name, '.'))
+			continue;
+
+		snprintf(name, sizeof(name), "cpu/event=%s/u", ent->d_name);
+
+		e.name  = name;
+		e.check = test__checkevent_pmu_events;
+
+		ret = test_event(&e);
+		if (ret)
+			break;
+		snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name);
+		e.name  = name;
+		e.check = test__checkevent_pmu_events_mix;
+		ret = test_event(&e);
+	}
+
+	closedir(dir);
+	return ret;
+}
+
+int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int ret1, ret2 = 0;
+
+#define TEST_EVENTS(tests)				\
+do {							\
+	ret1 = test_events(tests, ARRAY_SIZE(tests));	\
+	if (!ret2)					\
+		ret2 = ret1;				\
+} while (0)
+
+	TEST_EVENTS(test__events);
+
+	if (test_pmu())
+		TEST_EVENTS(test__events_pmu);
+
+	if (test_pmu()) {
+		int ret = test_pmu_events();
+		if (ret)
+			return ret;
+	}
+
+	ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
+	if (!ret2)
+		ret2 = ret1;
+
+	return ret2;
+}
diff --git a/tests/parse-no-sample-id-all.c b/tests/parse-no-sample-id-all.c
new file mode 100644
index 0000000..2196d14
--- /dev/null
+++ b/tests/parse-no-sample-id-all.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <stddef.h>
+
+#include "tests.h"
+
+#include "event.h"
+#include "evlist.h"
+#include "header.h"
+#include "util.h"
+#include "debug.h"
+
+static int process_event(struct perf_evlist **pevlist, union perf_event *event)
+{
+	struct perf_sample sample;
+
+	if (event->header.type == PERF_RECORD_HEADER_ATTR) {
+		if (perf_event__process_attr(NULL, event, pevlist)) {
+			pr_debug("perf_event__process_attr failed\n");
+			return -1;
+		}
+		return 0;
+	}
+
+	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+		return -1;
+
+	if (!*pevlist)
+		return -1;
+
+	if (perf_evlist__parse_sample(*pevlist, event, &sample)) {
+		pr_debug("perf_evlist__parse_sample failed\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int process_events(union perf_event **events, size_t count)
+{
+	struct perf_evlist *evlist = NULL;
+	int err = 0;
+	size_t i;
+
+	for (i = 0; i < count && !err; i++)
+		err = process_event(&evlist, events[i]);
+
+	perf_evlist__delete(evlist);
+
+	return err;
+}
+
+struct test_attr_event {
+	struct perf_event_header header;
+	struct perf_event_attr	 attr;
+	u64 id;
+};
+
+/**
+ * test__parse_no_sample_id_all - test parsing with no sample_id_all bit set.
+ *
+ * This function tests parsing data produced on kernel's that do not support the
+ * sample_id_all bit.  Without the sample_id_all bit, non-sample events (such as
+ * mmap events) do not have an id sample appended, and consequently logic
+ * designed to determine the id will not work.  That case happens when there is
+ * more than one selected event, so this test processes three events: 2
+ * attributes representing the selected events and one mmap event.
+ *
+ * Return: %0 on success, %-1 if the test fails.
+ */
+int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err;
+
+	struct test_attr_event event1 = {
+		.header = {
+			.type = PERF_RECORD_HEADER_ATTR,
+			.size = sizeof(struct test_attr_event),
+		},
+		.id = 1,
+	};
+	struct test_attr_event event2 = {
+		.header = {
+			.type = PERF_RECORD_HEADER_ATTR,
+			.size = sizeof(struct test_attr_event),
+		},
+		.id = 2,
+	};
+	struct mmap_event event3 = {
+		.header = {
+			.type = PERF_RECORD_MMAP,
+			.size = sizeof(struct mmap_event),
+		},
+	};
+	union perf_event *events[] = {
+		(union perf_event *)&event1,
+		(union perf_event *)&event2,
+		(union perf_event *)&event3,
+	};
+
+	err = process_events(events, ARRAY_SIZE(events));
+	if (err)
+		return -1;
+
+	return 0;
+}
diff --git a/tests/perf-hooks.c b/tests/perf-hooks.c
new file mode 100644
index 0000000..a693bcf
--- /dev/null
+++ b/tests/perf-hooks.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+#include <stdlib.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include "perf-hooks.h"
+
+static void sigsegv_handler(int sig __maybe_unused)
+{
+	pr_debug("SIGSEGV is observed as expected, try to recover.\n");
+	perf_hooks__recover();
+	signal(SIGSEGV, SIG_DFL);
+	raise(SIGSEGV);
+	exit(-1);
+}
+
+
+static void the_hook(void *_hook_flags)
+{
+	int *hook_flags = _hook_flags;
+	int *p = NULL;
+
+	*hook_flags = 1234;
+
+	/* Generate a segfault, test perf_hooks__recover */
+	*p = 0;
+}
+
+int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int hook_flags = 0;
+
+	signal(SIGSEGV, sigsegv_handler);
+	perf_hooks__set_hook("test", the_hook, &hook_flags);
+	perf_hooks__invoke_test();
+
+	/* hook is triggered? */
+	if (hook_flags != 1234) {
+		pr_debug("Setting failed: %d (%p)\n", hook_flags, &hook_flags);
+		return TEST_FAIL;
+	}
+
+	/* the buggy hook is removed? */
+	if (perf_hooks__get_hook("test"))
+		return TEST_FAIL;
+	return TEST_OK;
+}
diff --git a/tests/perf-record.c b/tests/perf-record.c
new file mode 100644
index 0000000..34394cc
--- /dev/null
+++ b/tests/perf-record.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <sched.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "perf.h"
+#include "debug.h"
+#include "tests.h"
+
+static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
+{
+	int i, cpu = -1, nrcpus = 1024;
+realloc:
+	CPU_ZERO(maskp);
+
+	if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
+		if (errno == EINVAL && nrcpus < (1024 << 8)) {
+			nrcpus = nrcpus << 2;
+			goto realloc;
+		}
+		perror("sched_getaffinity");
+			return -1;
+	}
+
+	for (i = 0; i < nrcpus; i++) {
+		if (CPU_ISSET(i, maskp)) {
+			if (cpu == -1)
+				cpu = i;
+			else
+				CPU_CLR(i, maskp);
+		}
+	}
+
+	return cpu;
+}
+
+int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.no_buffering = true,
+		.mmap_pages   = 256,
+	};
+	cpu_set_t cpu_mask;
+	size_t cpu_mask_size = sizeof(cpu_mask);
+	struct perf_evlist *evlist = perf_evlist__new_dummy();
+	struct perf_evsel *evsel;
+	struct perf_sample sample;
+	const char *cmd = "sleep";
+	const char *argv[] = { cmd, "1", NULL, };
+	char *bname, *mmap_filename;
+	u64 prev_time = 0;
+	bool found_cmd_mmap = false,
+	     found_libc_mmap = false,
+	     found_vdso_mmap = false,
+	     found_ld_mmap = false;
+	int err = -1, errs = 0, i, wakeups = 0;
+	u32 cpu;
+	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
+	char sbuf[STRERR_BUFSIZE];
+
+	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+		evlist = perf_evlist__new_default();
+
+	if (evlist == NULL) {
+		pr_debug("Not enough memory to create evlist\n");
+		goto out;
+	}
+
+	/*
+	 * Create maps of threads and cpus to monitor. In this case
+	 * we start with all threads and cpus (-1, -1) but then in
+	 * perf_evlist__prepare_workload we'll fill in the only thread
+	 * we're monitoring, the one forked there.
+	 */
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Prepare the workload in argv[] to run, it'll fork it, and then wait
+	 * for perf_evlist__start_workload() to exec it. This is done this way
+	 * so that we have time to open the evlist (calling sys_perf_event_open
+	 * on all the fds) and then mmap them.
+	 */
+	err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
+	if (err < 0) {
+		pr_debug("Couldn't run the workload!\n");
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Config the evsels, setting attr->comm on the first one, etc.
+	 */
+	evsel = perf_evlist__first(evlist);
+	perf_evsel__set_sample_bit(evsel, CPU);
+	perf_evsel__set_sample_bit(evsel, TID);
+	perf_evsel__set_sample_bit(evsel, TIME);
+	perf_evlist__config(evlist, &opts, NULL);
+
+	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
+	if (err < 0) {
+		pr_debug("sched__get_first_possible_cpu: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	cpu = err;
+
+	/*
+	 * So that we can check perf_sample.cpu on all the samples.
+	 */
+	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
+		pr_debug("sched_setaffinity: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Call sys_perf_event_open on all the fds on all the evsels,
+	 * grouping them if asked to.
+	 */
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * mmap the first fd on a given CPU and ask for events for the other
+	 * fds in the same CPU to be injected in the same mmap ring buffer
+	 * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
+	 */
+	err = perf_evlist__mmap(evlist, opts.mmap_pages);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	/*
+	 * Now that all is properly set up, enable the events, they will
+	 * count just on workload.pid, which will start...
+	 */
+	perf_evlist__enable(evlist);
+
+	/*
+	 * Now!
+	 */
+	perf_evlist__start_workload(evlist);
+
+	while (1) {
+		int before = total_events;
+
+		for (i = 0; i < evlist->nr_mmaps; i++) {
+			union perf_event *event;
+			struct perf_mmap *md;
+
+			md = &evlist->mmap[i];
+			if (perf_mmap__read_init(md) < 0)
+				continue;
+
+			while ((event = perf_mmap__read_event(md)) != NULL) {
+				const u32 type = event->header.type;
+				const char *name = perf_event__name(type);
+
+				++total_events;
+				if (type < PERF_RECORD_MAX)
+					nr_events[type]++;
+
+				err = perf_evlist__parse_sample(evlist, event, &sample);
+				if (err < 0) {
+					if (verbose > 0)
+						perf_event__fprintf(event, stderr);
+					pr_debug("Couldn't parse sample\n");
+					goto out_delete_evlist;
+				}
+
+				if (verbose > 0) {
+					pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
+					perf_event__fprintf(event, stderr);
+				}
+
+				if (prev_time > sample.time) {
+					pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n",
+						 name, prev_time, sample.time);
+					++errs;
+				}
+
+				prev_time = sample.time;
+
+				if (sample.cpu != cpu) {
+					pr_debug("%s with unexpected cpu, expected %d, got %d\n",
+						 name, cpu, sample.cpu);
+					++errs;
+				}
+
+				if ((pid_t)sample.pid != evlist->workload.pid) {
+					pr_debug("%s with unexpected pid, expected %d, got %d\n",
+						 name, evlist->workload.pid, sample.pid);
+					++errs;
+				}
+
+				if ((pid_t)sample.tid != evlist->workload.pid) {
+					pr_debug("%s with unexpected tid, expected %d, got %d\n",
+						 name, evlist->workload.pid, sample.tid);
+					++errs;
+				}
+
+				if ((type == PERF_RECORD_COMM ||
+				     type == PERF_RECORD_MMAP ||
+				     type == PERF_RECORD_MMAP2 ||
+				     type == PERF_RECORD_FORK ||
+				     type == PERF_RECORD_EXIT) &&
+				     (pid_t)event->comm.pid != evlist->workload.pid) {
+					pr_debug("%s with unexpected pid/tid\n", name);
+					++errs;
+				}
+
+				if ((type == PERF_RECORD_COMM ||
+				     type == PERF_RECORD_MMAP ||
+				     type == PERF_RECORD_MMAP2) &&
+				     event->comm.pid != event->comm.tid) {
+					pr_debug("%s with different pid/tid!\n", name);
+					++errs;
+				}
+
+				switch (type) {
+				case PERF_RECORD_COMM:
+					if (strcmp(event->comm.comm, cmd)) {
+						pr_debug("%s with unexpected comm!\n", name);
+						++errs;
+					}
+					break;
+				case PERF_RECORD_EXIT:
+					goto found_exit;
+				case PERF_RECORD_MMAP:
+					mmap_filename = event->mmap.filename;
+					goto check_bname;
+				case PERF_RECORD_MMAP2:
+					mmap_filename = event->mmap2.filename;
+				check_bname:
+					bname = strrchr(mmap_filename, '/');
+					if (bname != NULL) {
+						if (!found_cmd_mmap)
+							found_cmd_mmap = !strcmp(bname + 1, cmd);
+						if (!found_libc_mmap)
+							found_libc_mmap = !strncmp(bname + 1, "libc", 4);
+						if (!found_ld_mmap)
+							found_ld_mmap = !strncmp(bname + 1, "ld", 2);
+					} else if (!found_vdso_mmap)
+						found_vdso_mmap = !strcmp(mmap_filename, "[vdso]");
+					break;
+
+				case PERF_RECORD_SAMPLE:
+					/* Just ignore samples for now */
+					break;
+				default:
+					pr_debug("Unexpected perf_event->header.type %d!\n",
+						 type);
+					++errs;
+				}
+
+				perf_mmap__consume(md);
+			}
+			perf_mmap__read_done(md);
+		}
+
+		/*
+		 * We don't use poll here because at least at 3.1 times the
+		 * PERF_RECORD_{!SAMPLE} events don't honour
+		 * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
+		 */
+		if (total_events == before && false)
+			perf_evlist__poll(evlist, -1);
+
+		sleep(1);
+		if (++wakeups > 5) {
+			pr_debug("No PERF_RECORD_EXIT event!\n");
+			break;
+		}
+	}
+
+found_exit:
+	if (nr_events[PERF_RECORD_COMM] > 1) {
+		pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
+		++errs;
+	}
+
+	if (nr_events[PERF_RECORD_COMM] == 0) {
+		pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd);
+		++errs;
+	}
+
+	if (!found_cmd_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
+		++errs;
+	}
+
+	if (!found_libc_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc");
+		++errs;
+	}
+
+	if (!found_ld_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld");
+		++errs;
+	}
+
+	if (!found_vdso_mmap) {
+		pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
+		++errs;
+	}
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+out:
+	return (err < 0 || errs > 0) ? -1 : 0;
+}
diff --git a/tests/perf-targz-src-pkg b/tests/perf-targz-src-pkg
new file mode 100755
index 0000000..fae26b1
--- /dev/null
+++ b/tests/perf-targz-src-pkg
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Test one of the main kernel Makefile targets to generate a perf sources tarball
+# suitable for build outside the full kernel sources.
+#
+# This is to test that the tools/perf/MANIFEST file lists all the files needed to
+# be in such tarball, which sometimes gets broken when we move files around,
+# like when we made some files that were in tools/perf/ available to other tools/
+# codebases by moving it to tools/include/, etc.
+
+PERF=$1
+cd ${PERF}/../..
+make perf-targz-src-pkg > /dev/null
+TARBALL=$(ls -rt perf-*.tar.gz)
+TMP_DEST=$(mktemp -d)
+tar xf ${TARBALL} -C $TMP_DEST
+rm -f ${TARBALL}
+cd - > /dev/null
+make -C $TMP_DEST/perf*/tools/perf > /dev/null
+RC=$?
+rm -rf ${TMP_DEST}
+exit $RC
diff --git a/tests/pmu.c b/tests/pmu.c
new file mode 100644
index 0000000..7bedf86
--- /dev/null
+++ b/tests/pmu.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "parse-events.h"
+#include "pmu.h"
+#include "util.h"
+#include "tests.h"
+#include <errno.h>
+#include <linux/kernel.h>
+
+/* Simulated format definitions. */
+static struct test_format {
+	const char *name;
+	const char *value;
+} test_formats[] = {
+	{ "krava01", "config:0-1,62-63\n", },
+	{ "krava02", "config:10-17\n", },
+	{ "krava03", "config:5\n", },
+	{ "krava11", "config1:0,2,4,6,8,20-28\n", },
+	{ "krava12", "config1:63\n", },
+	{ "krava13", "config1:45-47\n", },
+	{ "krava21", "config2:0-3,10-13,20-23,30-33,40-43,50-53,60-63\n", },
+	{ "krava22", "config2:8,18,48,58\n", },
+	{ "krava23", "config2:28-29,38\n", },
+};
+
+/* Simulated users input. */
+static struct parse_events_term test_terms[] = {
+	{
+		.config    = (char *) "krava01",
+		.val.num   = 15,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava02",
+		.val.num   = 170,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava03",
+		.val.num   = 1,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava11",
+		.val.num   = 27,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava12",
+		.val.num   = 1,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava13",
+		.val.num   = 2,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava21",
+		.val.num   = 119,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava22",
+		.val.num   = 11,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+	{
+		.config    = (char *) "krava23",
+		.val.num   = 2,
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+	},
+};
+
+/*
+ * Prepare format directory data, exported by kernel
+ * at /sys/bus/event_source/devices/<dev>/format.
+ */
+static char *test_format_dir_get(void)
+{
+	static char dir[PATH_MAX];
+	unsigned int i;
+
+	snprintf(dir, PATH_MAX, "/tmp/perf-pmu-test-format-XXXXXX");
+	if (!mkdtemp(dir))
+		return NULL;
+
+	for (i = 0; i < ARRAY_SIZE(test_formats); i++) {
+		static char name[PATH_MAX];
+		struct test_format *format = &test_formats[i];
+		FILE *file;
+
+		scnprintf(name, PATH_MAX, "%s/%s", dir, format->name);
+
+		file = fopen(name, "w");
+		if (!file)
+			return NULL;
+
+		if (1 != fwrite(format->value, strlen(format->value), 1, file))
+			break;
+
+		fclose(file);
+	}
+
+	return dir;
+}
+
+/* Cleanup format directory. */
+static int test_format_dir_put(char *dir)
+{
+	char buf[PATH_MAX];
+	snprintf(buf, PATH_MAX, "rm -f %s/*\n", dir);
+	if (system(buf))
+		return -1;
+
+	snprintf(buf, PATH_MAX, "rmdir %s\n", dir);
+	return system(buf);
+}
+
+static struct list_head *test_terms_list(void)
+{
+	static LIST_HEAD(terms);
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_terms); i++)
+		list_add_tail(&test_terms[i].list, &terms);
+
+	return &terms;
+}
+
+int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	char *format = test_format_dir_get();
+	LIST_HEAD(formats);
+	struct list_head *terms = test_terms_list();
+	int ret;
+
+	if (!format)
+		return -EINVAL;
+
+	do {
+		struct perf_event_attr attr;
+
+		memset(&attr, 0, sizeof(attr));
+
+		ret = perf_pmu__format_parse(format, &formats);
+		if (ret)
+			break;
+
+		ret = perf_pmu__config_terms(&formats, &attr, terms,
+					     false, NULL);
+		if (ret)
+			break;
+
+		ret = -EINVAL;
+
+		if (attr.config  != 0xc00000000002a823)
+			break;
+		if (attr.config1 != 0x8000400000000145)
+			break;
+		if (attr.config2 != 0x0400000020041d07)
+			break;
+
+		ret = 0;
+	} while (0);
+
+	test_format_dir_put(format);
+	return ret;
+}
diff --git a/tests/python-use.c b/tests/python-use.c
new file mode 100644
index 0000000..5d2df65
--- /dev/null
+++ b/tests/python-use.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Just test if we can load the python binding.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include "tests.h"
+
+extern int verbose;
+
+int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	char *cmd;
+	int ret;
+
+	if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
+		     PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
+		return -1;
+
+	ret = system(cmd) ? -1 : 0;
+	free(cmd);
+	return ret;
+}
diff --git a/tests/sample-parsing.c b/tests/sample-parsing.c
new file mode 100644
index 0000000..0e2d00d
--- /dev/null
+++ b/tests/sample-parsing.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "util.h"
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+
+#include "tests.h"
+
+#define COMP(m) do {					\
+	if (s1->m != s2->m) {				\
+		pr_debug("Samples differ at '"#m"'\n");	\
+		return false;				\
+	}						\
+} while (0)
+
+#define MCOMP(m) do {					\
+	if (memcmp(&s1->m, &s2->m, sizeof(s1->m))) {	\
+		pr_debug("Samples differ at '"#m"'\n");	\
+		return false;				\
+	}						\
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+			 const struct perf_sample *s2,
+			 u64 type, u64 read_format)
+{
+	size_t i;
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		COMP(id);
+
+	if (type & PERF_SAMPLE_IP)
+		COMP(ip);
+
+	if (type & PERF_SAMPLE_TID) {
+		COMP(pid);
+		COMP(tid);
+	}
+
+	if (type & PERF_SAMPLE_TIME)
+		COMP(time);
+
+	if (type & PERF_SAMPLE_ADDR)
+		COMP(addr);
+
+	if (type & PERF_SAMPLE_ID)
+		COMP(id);
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		COMP(stream_id);
+
+	if (type & PERF_SAMPLE_CPU)
+		COMP(cpu);
+
+	if (type & PERF_SAMPLE_PERIOD)
+		COMP(period);
+
+	if (type & PERF_SAMPLE_READ) {
+		if (read_format & PERF_FORMAT_GROUP)
+			COMP(read.group.nr);
+		else
+			COMP(read.one.value);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+			COMP(read.time_enabled);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+			COMP(read.time_running);
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			for (i = 0; i < s1->read.group.nr; i++)
+				MCOMP(read.group.values[i]);
+		} else {
+			COMP(read.one.id);
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		COMP(callchain->nr);
+		for (i = 0; i < s1->callchain->nr; i++)
+			COMP(callchain->ips[i]);
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		COMP(raw_size);
+		if (memcmp(s1->raw_data, s2->raw_data, s1->raw_size)) {
+			pr_debug("Samples differ at 'raw_data'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		COMP(branch_stack->nr);
+		for (i = 0; i < s1->branch_stack->nr; i++)
+			MCOMP(branch_stack->entries[i]);
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		size_t sz = hweight_long(s1->user_regs.mask) * sizeof(u64);
+
+		COMP(user_regs.mask);
+		COMP(user_regs.abi);
+		if (s1->user_regs.abi &&
+		    (!s1->user_regs.regs || !s2->user_regs.regs ||
+		     memcmp(s1->user_regs.regs, s2->user_regs.regs, sz))) {
+			pr_debug("Samples differ at 'user_regs'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		COMP(user_stack.size);
+		if (memcmp(s1->user_stack.data, s2->user_stack.data,
+			   s1->user_stack.size)) {
+			pr_debug("Samples differ at 'user_stack'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		COMP(weight);
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		COMP(data_src);
+
+	if (type & PERF_SAMPLE_TRANSACTION)
+		COMP(transaction);
+
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		size_t sz = hweight_long(s1->intr_regs.mask) * sizeof(u64);
+
+		COMP(intr_regs.mask);
+		COMP(intr_regs.abi);
+		if (s1->intr_regs.abi &&
+		    (!s1->intr_regs.regs || !s2->intr_regs.regs ||
+		     memcmp(s1->intr_regs.regs, s2->intr_regs.regs, sz))) {
+			pr_debug("Samples differ at 'intr_regs'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		COMP(phys_addr);
+
+	return true;
+}
+
+static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
+{
+	struct perf_evsel evsel = {
+		.needs_swap = false,
+		.attr = {
+			.sample_type = sample_type,
+			.read_format = read_format,
+		},
+	};
+	union perf_event *event;
+	union {
+		struct ip_callchain callchain;
+		u64 data[64];
+	} callchain = {
+		/* 3 ips */
+		.data = {3, 201, 202, 203},
+	};
+	union {
+		struct branch_stack branch_stack;
+		u64 data[64];
+	} branch_stack = {
+		/* 1 branch_entry */
+		.data = {1, 211, 212, 213},
+	};
+	u64 regs[64];
+	const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
+	const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL};
+	struct perf_sample sample = {
+		.ip		= 101,
+		.pid		= 102,
+		.tid		= 103,
+		.time		= 104,
+		.addr		= 105,
+		.id		= 106,
+		.stream_id	= 107,
+		.period		= 108,
+		.weight		= 109,
+		.cpu		= 110,
+		.raw_size	= sizeof(raw_data),
+		.data_src	= 111,
+		.transaction	= 112,
+		.raw_data	= (void *)raw_data,
+		.callchain	= &callchain.callchain,
+		.branch_stack	= &branch_stack.branch_stack,
+		.user_regs	= {
+			.abi	= PERF_SAMPLE_REGS_ABI_64,
+			.mask	= sample_regs,
+			.regs	= regs,
+		},
+		.user_stack	= {
+			.size	= sizeof(data),
+			.data	= (void *)data,
+		},
+		.read		= {
+			.time_enabled = 0x030a59d664fca7deULL,
+			.time_running = 0x011b6ae553eb98edULL,
+		},
+		.intr_regs	= {
+			.abi	= PERF_SAMPLE_REGS_ABI_64,
+			.mask	= sample_regs,
+			.regs	= regs,
+		},
+		.phys_addr	= 113,
+	};
+	struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
+	struct perf_sample sample_out;
+	size_t i, sz, bufsz;
+	int err, ret = -1;
+
+	if (sample_type & PERF_SAMPLE_REGS_USER)
+		evsel.attr.sample_regs_user = sample_regs;
+
+	if (sample_type & PERF_SAMPLE_REGS_INTR)
+		evsel.attr.sample_regs_intr = sample_regs;
+
+	for (i = 0; i < sizeof(regs); i++)
+		*(i + (u8 *)regs) = i & 0xfe;
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		sample.read.group.nr     = 4;
+		sample.read.group.values = values;
+	} else {
+		sample.read.one.value = 0x08789faeb786aa87ULL;
+		sample.read.one.id    = 99;
+	}
+
+	sz = perf_event__sample_event_size(&sample, sample_type, read_format);
+	bufsz = sz + 4096; /* Add a bit for overrun checking */
+	event = malloc(bufsz);
+	if (!event) {
+		pr_debug("malloc failed\n");
+		return -1;
+	}
+
+	memset(event, 0xff, bufsz);
+	event->header.type = PERF_RECORD_SAMPLE;
+	event->header.misc = 0;
+	event->header.size = sz;
+
+	err = perf_event__synthesize_sample(event, sample_type, read_format,
+					    &sample);
+	if (err) {
+		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+			 "perf_event__synthesize_sample", sample_type, err);
+		goto out_free;
+	}
+
+	/* The data does not contain 0xff so we use that to check the size */
+	for (i = bufsz; i > 0; i--) {
+		if (*(i - 1 + (u8 *)event) != 0xff)
+			break;
+	}
+	if (i != sz) {
+		pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+			 i, sz);
+		goto out_free;
+	}
+
+	evsel.sample_size = __perf_evsel__sample_size(sample_type);
+
+	err = perf_evsel__parse_sample(&evsel, event, &sample_out);
+	if (err) {
+		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+			 "perf_evsel__parse_sample", sample_type, err);
+		goto out_free;
+	}
+
+	if (!samples_same(&sample, &sample_out, sample_type, read_format)) {
+		pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+			 sample_type);
+		goto out_free;
+	}
+
+	ret = 0;
+out_free:
+	free(event);
+	if (ret && read_format)
+		pr_debug("read_format %#"PRIx64"\n", read_format);
+	return ret;
+}
+
+/**
+ * test__sample_parsing - test sample parsing.
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample.  The test
+ * checks sample format bits separately and together.  If the test passes %0 is
+ * returned, otherwise %-1 is returned.
+ */
+int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
+	u64 sample_type;
+	u64 sample_regs;
+	size_t i;
+	int err;
+
+	/*
+	 * Fail the test if it has not been updated when new sample format bits
+	 * were added.  Please actually update the test rather than just change
+	 * the condition below.
+	 */
+	if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
+		pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
+		return -1;
+	}
+
+	/* Test each sample format bit separately */
+	for (sample_type = 1; sample_type != PERF_SAMPLE_MAX;
+	     sample_type <<= 1) {
+		/* Test read_format variations */
+		if (sample_type == PERF_SAMPLE_READ) {
+			for (i = 0; i < ARRAY_SIZE(rf); i++) {
+				err = do_test(sample_type, 0, rf[i]);
+				if (err)
+					return err;
+			}
+			continue;
+		}
+		sample_regs = 0;
+
+		if (sample_type == PERF_SAMPLE_REGS_USER)
+			sample_regs = 0x3fff;
+
+		if (sample_type == PERF_SAMPLE_REGS_INTR)
+			sample_regs = 0xff0fff;
+
+		err = do_test(sample_type, sample_regs, 0);
+		if (err)
+			return err;
+	}
+
+	/* Test all sample format bits together */
+	sample_type = PERF_SAMPLE_MAX - 1;
+	sample_regs = 0x3fff; /* shared yb intr and user regs */
+	for (i = 0; i < ARRAY_SIZE(rf); i++) {
+		err = do_test(sample_type, sample_regs, rf[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/tests/sdt.c b/tests/sdt.c
new file mode 100644
index 0000000..5059452
--- /dev/null
+++ b/tests/sdt.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <util/evlist.h>
+#include <linux/filter.h>
+#include "tests.h"
+#include "debug.h"
+#include "probe-file.h"
+#include "build-id.h"
+
+/* To test SDT event, we need libelf support to scan elf binary */
+#if defined(HAVE_SDT_EVENT) && defined(HAVE_LIBELF_SUPPORT)
+
+#include <sys/sdt.h>
+
+static int target_function(void)
+{
+	DTRACE_PROBE(perf, test_target);
+	return TEST_OK;
+}
+
+/* Copied from builtin-buildid-cache.c */
+static int build_id_cache__add_file(const char *filename)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+	u8 build_id[BUILD_ID_SIZE];
+	int err;
+
+	err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+	if (err < 0) {
+		pr_debug("Failed to read build id of %s\n", filename);
+		return err;
+	}
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+	err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false);
+	if (err < 0)
+		pr_debug("Failed to add build id cache of %s\n", filename);
+	return err;
+}
+
+static char *get_self_path(void)
+{
+	char *buf = calloc(PATH_MAX, sizeof(char));
+
+	if (buf && readlink("/proc/self/exe", buf, PATH_MAX - 1) < 0) {
+		pr_debug("Failed to get correct path of perf\n");
+		free(buf);
+		return NULL;
+	}
+	return buf;
+}
+
+static int search_cached_probe(const char *target,
+			       const char *group, const char *event)
+{
+	struct probe_cache *cache = probe_cache__new(target, NULL);
+	int ret = 0;
+
+	if (!cache) {
+		pr_debug("Failed to open probe cache of %s\n", target);
+		return -EINVAL;
+	}
+
+	if (!probe_cache__find_by_name(cache, group, event)) {
+		pr_debug("Failed to find %s:%s in the cache\n", group, event);
+		ret = -ENOENT;
+	}
+	probe_cache__delete(cache);
+
+	return ret;
+}
+
+int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+{
+	int ret = TEST_FAIL;
+	char __tempdir[] = "./test-buildid-XXXXXX";
+	char *tempdir = NULL, *myself = get_self_path();
+
+	if (myself == NULL || mkdtemp(__tempdir) == NULL) {
+		pr_debug("Failed to make a tempdir for build-id cache\n");
+		goto error;
+	}
+	/* Note that buildid_dir must be an absolute path */
+	tempdir = realpath(__tempdir, NULL);
+	if (tempdir == NULL)
+		goto error_rmdir;
+
+	/* At first, scan itself */
+	set_buildid_dir(tempdir);
+	if (build_id_cache__add_file(myself) < 0)
+		goto error_rmdir;
+
+	/* Open a cache and make sure the SDT is stored */
+	if (search_cached_probe(myself, "sdt_perf", "test_target") < 0)
+		goto error_rmdir;
+
+	/* TBD: probing on the SDT event and collect logs */
+
+	/* Call the target and get an event */
+	ret = target_function();
+
+error_rmdir:
+	/* Cleanup temporary buildid dir */
+	rm_rf(__tempdir);
+error:
+	free(tempdir);
+	free(myself);
+	return ret;
+}
+#else
+int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+{
+	pr_debug("Skip SDT event test because SDT support is not compiled\n");
+	return TEST_SKIP;
+}
+#endif
diff --git a/tests/shell/lib/probe.sh b/tests/shell/lib/probe.sh
new file mode 100644
index 0000000..6293cc6
--- /dev/null
+++ b/tests/shell/lib/probe.sh
@@ -0,0 +1,6 @@
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+skip_if_no_perf_probe() {
+	perf probe 2>&1 | grep -q 'is not a perf-command' && return 2
+	return 0
+}
diff --git a/tests/shell/lib/probe_vfs_getname.sh b/tests/shell/lib/probe_vfs_getname.sh
new file mode 100644
index 0000000..1c16e56
--- /dev/null
+++ b/tests/shell/lib/probe_vfs_getname.sh
@@ -0,0 +1,23 @@
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+perf probe -l 2>&1 | grep -q probe:vfs_getname
+had_vfs_getname=$?
+
+cleanup_probe_vfs_getname() {
+	if [ $had_vfs_getname -eq 1 ] ; then
+		perf probe -q -d probe:vfs_getname*
+	fi
+}
+
+add_probe_vfs_getname() {
+	local verbose=$1
+	if [ $had_vfs_getname -eq 1 ] ; then
+		line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
+		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string"
+	fi
+}
+
+skip_if_no_debuginfo() {
+	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for kernel|Debuginfo-analysis is not supported)" && return 2
+	return 1
+}
diff --git a/tests/shell/probe_vfs_getname.sh b/tests/shell/probe_vfs_getname.sh
new file mode 100755
index 0000000..9b76351
--- /dev/null
+++ b/tests/shell/probe_vfs_getname.sh
@@ -0,0 +1,14 @@
+# Add vfs_getname probe to get syscall args filenames
+#
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+skip_if_no_perf_probe || exit 2
+
+. $(dirname $0)/lib/probe_vfs_getname.sh
+
+add_probe_vfs_getname || skip_if_no_debuginfo
+err=$?
+cleanup_probe_vfs_getname
+exit $err
diff --git a/tests/shell/record+probe_libc_inet_pton.sh b/tests/shell/record+probe_libc_inet_pton.sh
new file mode 100755
index 0000000..016882d
--- /dev/null
+++ b/tests/shell/record+probe_libc_inet_pton.sh
@@ -0,0 +1,63 @@
+# probe libc's inet_pton & backtrace it with ping
+
+# Installs a probe on libc's inet_pton function, that will use uprobes,
+# then use 'perf trace' on a ping to localhost asking for just one packet
+# with the a backtrace 3 levels deep, check that it is what we expect.
+# This needs no debuginfo package, all is done using the libc ELF symtab
+# and the CFI info in the binaries.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
+nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
+
+trace_libc_inet_pton_backtrace() {
+	idx=0
+	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
+	expected[1]=".*inet_pton[[:space:]]\($libc\)$"
+	case "$(uname -m)" in
+	s390x)
+		eventattr='call-graph=dwarf,max-stack=4'
+		expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
+		expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$"
+		expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
+		;;
+	*)
+		eventattr='max-stack=3'
+		expected[2]="getaddrinfo[[:space:]]\($libc\)$"
+		expected[3]=".*\(.*/bin/ping.*\)$"
+		;;
+	esac
+
+	file=`mktemp -u /tmp/perf.data.XXX`
+
+	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
+	perf script -i $file | while read line ; do
+		echo $line
+		echo "$line" | egrep -q "${expected[$idx]}"
+		if [ $? -ne 0 ] ; then
+			printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line"
+			exit 1
+		fi
+		let idx+=1
+		[ -z "${expected[$idx]}" ] && break
+	done
+
+	# If any statements are executed from this point onwards,
+	# the exit code of the last among these will be reflected
+	# in err below. If the exit code is 0, the test will pass
+	# even if the perf script output does not match.
+}
+
+# Check for IPv6 interface existence
+ip a sh lo | fgrep -q inet6 || exit 2
+
+skip_if_no_perf_probe && \
+perf probe -q $libc inet_pton && \
+trace_libc_inet_pton_backtrace
+err=$?
+rm -f ${file}
+perf probe -q -d probe_libc:inet_pton
+exit $err
diff --git a/tests/shell/record+script_probe_vfs_getname.sh b/tests/shell/record+script_probe_vfs_getname.sh
new file mode 100755
index 0000000..ba29535
--- /dev/null
+++ b/tests/shell/record+script_probe_vfs_getname.sh
@@ -0,0 +1,41 @@
+# Use vfs_getname probe to get syscall args filenames
+
+# Uses the 'perf test shell' library to add probe:vfs_getname to the system
+# then use it with 'perf record' using 'touch' to write to a temp file, then
+# checks that that was captured by the vfs_getname probe in the generated
+# perf.data file, with the temp file name as the pathname argument.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+skip_if_no_perf_probe || exit 2
+
+. $(dirname $0)/lib/probe_vfs_getname.sh
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+file=$(mktemp /tmp/temporary_file.XXXXX)
+
+record_open_file() {
+	echo "Recording open file:"
+	perf record -o ${perfdata} -e probe:vfs_getname touch $file
+}
+
+perf_script_filenames() {
+	echo "Looking at perf.data file for vfs_getname records for the file we touched:"
+	perf script -i ${perfdata} | \
+	egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\""
+}
+
+add_probe_vfs_getname || skip_if_no_debuginfo
+err=$?
+if [ $err -ne 0 ] ; then
+	exit $err
+fi
+
+record_open_file && perf_script_filenames
+err=$?
+rm -f ${perfdata}
+rm -f ${file}
+cleanup_probe_vfs_getname
+exit $err
diff --git a/tests/shell/trace+probe_vfs_getname.sh b/tests/shell/trace+probe_vfs_getname.sh
new file mode 100755
index 0000000..55ad979
--- /dev/null
+++ b/tests/shell/trace+probe_vfs_getname.sh
@@ -0,0 +1,36 @@
+# Check open filename arg using perf trace + vfs_getname
+
+# Uses the 'perf test shell' library to add probe:vfs_getname to the system
+# then use it with 'perf trace' using 'touch' to write to a temp file, then
+# checks that that was captured by the vfs_getname was used by 'perf trace',
+# that already handles "probe:vfs_getname" if present, and used in the
+# "open" syscall "filename" argument beautifier.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+skip_if_no_perf_probe || exit 2
+
+. $(dirname $0)/lib/probe_vfs_getname.sh
+
+file=$(mktemp /tmp/temporary_file.XXXXX)
+
+trace_open_vfs_getname() {
+	evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+	perf trace -e $evts touch $file 2>&1 | \
+	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
+}
+
+
+add_probe_vfs_getname || skip_if_no_debuginfo
+err=$?
+if [ $err -ne 0 ] ; then
+	exit $err
+fi
+
+trace_open_vfs_getname
+err=$?
+rm -f ${file}
+cleanup_probe_vfs_getname
+exit $err
diff --git a/tests/stat.c b/tests/stat.c
new file mode 100644
index 0000000..9425002
--- /dev/null
+++ b/tests/stat.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "event.h"
+#include "tests.h"
+#include "stat.h"
+#include "counts.h"
+#include "debug.h"
+
+static bool has_term(struct stat_config_event *config,
+		     u64 tag, u64 val)
+{
+	unsigned i;
+
+	for (i = 0; i < config->nr; i++) {
+		if ((config->data[i].tag == tag) &&
+		    (config->data[i].val == val))
+			return true;
+	}
+
+	return false;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	struct stat_config_event *config = &event->stat_config;
+	struct perf_stat_config stat_config;
+
+#define HAS(term, val) \
+	has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
+
+	TEST_ASSERT_VAL("wrong nr",        config->nr == PERF_STAT_CONFIG_TERM__MAX);
+	TEST_ASSERT_VAL("wrong aggr_mode", HAS(AGGR_MODE, AGGR_CORE));
+	TEST_ASSERT_VAL("wrong scale",     HAS(SCALE, 1));
+	TEST_ASSERT_VAL("wrong interval",  HAS(INTERVAL, 1));
+
+#undef HAS
+
+	perf_event__read_stat_config(&stat_config, config);
+
+	TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
+	TEST_ASSERT_VAL("wrong scale",     stat_config.scale == 1);
+	TEST_ASSERT_VAL("wrong interval",  stat_config.interval == 1);
+	return 0;
+}
+
+int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct perf_stat_config stat_config = {
+		.aggr_mode	= AGGR_CORE,
+		.scale		= 1,
+		.interval	= 1,
+	};
+
+	TEST_ASSERT_VAL("failed to synthesize stat_config",
+		!perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
+
+	return 0;
+}
+
+static int process_stat_event(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct stat_event *st = &event->stat;
+
+	TEST_ASSERT_VAL("wrong cpu",    st->cpu    == 1);
+	TEST_ASSERT_VAL("wrong thread", st->thread == 2);
+	TEST_ASSERT_VAL("wrong id",     st->id     == 3);
+	TEST_ASSERT_VAL("wrong val",    st->val    == 100);
+	TEST_ASSERT_VAL("wrong run",    st->ena    == 200);
+	TEST_ASSERT_VAL("wrong ena",    st->run    == 300);
+	return 0;
+}
+
+int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct perf_counts_values count;
+
+	count.val = 100;
+	count.ena = 200;
+	count.run = 300;
+
+	TEST_ASSERT_VAL("failed to synthesize stat_config",
+		!perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+
+	return 0;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+				    union perf_event *event,
+				    struct perf_sample *sample __maybe_unused,
+				    struct machine *machine __maybe_unused)
+{
+	struct stat_round_event *stat_round = &event->stat_round;
+
+	TEST_ASSERT_VAL("wrong time", stat_round->time == 0xdeadbeef);
+	TEST_ASSERT_VAL("wrong type", stat_round->type == PERF_STAT_ROUND_TYPE__INTERVAL);
+	return 0;
+}
+
+int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("failed to synthesize stat_config",
+		!perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
+						   process_stat_round_event, NULL));
+
+	return 0;
+}
diff --git a/tests/sw-clock.c b/tests/sw-clock.c
new file mode 100644
index 0000000..f9490b2
--- /dev/null
+++ b/tests/sw-clock.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+
+#include "tests.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+
+#define NR_LOOPS  10000000
+
+/*
+ * This test will open software clock events (cpu-clock, task-clock)
+ * then check their frequency -> period conversion has no artifact of
+ * setting period to 1 forcefully.
+ */
+static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
+{
+	int i, err = -1;
+	volatile int tmp = 0;
+	u64 total_periods = 0;
+	int nr_samples = 0;
+	char sbuf[STRERR_BUFSIZE];
+	union perf_event *event;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = clock_id,
+		.sample_type = PERF_SAMPLE_PERIOD,
+		.exclude_kernel = 1,
+		.disabled = 1,
+		.freq = 1,
+	};
+	struct cpu_map *cpus;
+	struct thread_map *threads;
+	struct perf_mmap *md;
+
+	attr.sample_freq = 500;
+
+	evlist = perf_evlist__new();
+	if (evlist == NULL) {
+		pr_debug("perf_evlist__new\n");
+		return -1;
+	}
+
+	evsel = perf_evsel__new(&attr);
+	if (evsel == NULL) {
+		pr_debug("perf_evsel__new\n");
+		goto out_delete_evlist;
+	}
+	perf_evlist__add(evlist, evsel);
+
+	cpus = cpu_map__dummy_new();
+	threads = thread_map__new_by_tid(getpid());
+	if (!cpus || !threads) {
+		err = -ENOMEM;
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_free_maps;
+	}
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	cpus	= NULL;
+	threads = NULL;
+
+	if (perf_evlist__open(evlist)) {
+		const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
+
+		err = -errno;
+		pr_debug("Couldn't open evlist: %s\nHint: check %s, using %" PRIu64 " in this test.\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)),
+			 knob, (u64)attr.sample_freq);
+		goto out_delete_evlist;
+	}
+
+	err = perf_evlist__mmap(evlist, 128);
+	if (err < 0) {
+		pr_debug("failed to mmap event: %d (%s)\n", errno,
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__enable(evlist);
+
+	/* collect samples */
+	for (i = 0; i < NR_LOOPS; i++)
+		tmp++;
+
+	perf_evlist__disable(evlist);
+
+	md = &evlist->mmap[0];
+	if (perf_mmap__read_init(md) < 0)
+		goto out_init;
+
+	while ((event = perf_mmap__read_event(md)) != NULL) {
+		struct perf_sample sample;
+
+		if (event->header.type != PERF_RECORD_SAMPLE)
+			goto next_event;
+
+		err = perf_evlist__parse_sample(evlist, event, &sample);
+		if (err < 0) {
+			pr_debug("Error during parse sample\n");
+			goto out_delete_evlist;
+		}
+
+		total_periods += sample.period;
+		nr_samples++;
+next_event:
+		perf_mmap__consume(md);
+	}
+	perf_mmap__read_done(md);
+
+out_init:
+	if ((u64) nr_samples == total_periods) {
+		pr_debug("All (%d) samples have period value of 1!\n",
+			 nr_samples);
+		err = -1;
+	}
+
+out_free_maps:
+	cpu_map__put(cpus);
+	thread_map__put(threads);
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return err;
+}
+
+int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int ret;
+
+	ret = __test__sw_clock_freq(PERF_COUNT_SW_CPU_CLOCK);
+	if (!ret)
+		ret = __test__sw_clock_freq(PERF_COUNT_SW_TASK_CLOCK);
+
+	return ret;
+}
diff --git a/tests/switch-tracking.c b/tests/switch-tracking.c
new file mode 100644
index 0000000..9b5be51
--- /dev/null
+++ b/tests/switch-tracking.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/time.h>
+#include <sys/prctl.h>
+#include <errno.h>
+#include <time.h>
+#include <stdlib.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+static int spin_sleep(void)
+{
+	struct timeval start, now, diff, maxtime;
+	struct timespec ts;
+	int err, i;
+
+	maxtime.tv_sec = 0;
+	maxtime.tv_usec = 50000;
+
+	err = gettimeofday(&start, NULL);
+	if (err)
+		return err;
+
+	/* Spin for 50ms */
+	while (1) {
+		for (i = 0; i < 1000; i++)
+			barrier();
+
+		err = gettimeofday(&now, NULL);
+		if (err)
+			return err;
+
+		timersub(&now, &start, &diff);
+		if (timercmp(&diff, &maxtime, > /* For checkpatch */))
+			break;
+	}
+
+	ts.tv_nsec = 50 * 1000 * 1000;
+	ts.tv_sec = 0;
+
+	/* Sleep for 50ms */
+	err = nanosleep(&ts, NULL);
+	if (err == EINTR)
+		err = 0;
+
+	return err;
+}
+
+struct switch_tracking {
+	struct perf_evsel *switch_evsel;
+	struct perf_evsel *cycles_evsel;
+	pid_t *tids;
+	int nr_tids;
+	int comm_seen[4];
+	int cycles_before_comm_1;
+	int cycles_between_comm_2_and_comm_3;
+	int cycles_after_comm_4;
+};
+
+static int check_comm(struct switch_tracking *switch_tracking,
+		      union perf_event *event, const char *comm, int nr)
+{
+	if (event->header.type == PERF_RECORD_COMM &&
+	    (pid_t)event->comm.pid == getpid() &&
+	    (pid_t)event->comm.tid == getpid() &&
+	    strcmp(event->comm.comm, comm) == 0) {
+		if (switch_tracking->comm_seen[nr]) {
+			pr_debug("Duplicate comm event\n");
+			return -1;
+		}
+		switch_tracking->comm_seen[nr] = 1;
+		pr_debug3("comm event: %s nr: %d\n", event->comm.comm, nr);
+		return 1;
+	}
+	return 0;
+}
+
+static int check_cpu(struct switch_tracking *switch_tracking, int cpu)
+{
+	int i, nr = cpu + 1;
+
+	if (cpu < 0)
+		return -1;
+
+	if (!switch_tracking->tids) {
+		switch_tracking->tids = calloc(nr, sizeof(pid_t));
+		if (!switch_tracking->tids)
+			return -1;
+		for (i = 0; i < nr; i++)
+			switch_tracking->tids[i] = -1;
+		switch_tracking->nr_tids = nr;
+		return 0;
+	}
+
+	if (cpu >= switch_tracking->nr_tids) {
+		void *addr;
+
+		addr = realloc(switch_tracking->tids, nr * sizeof(pid_t));
+		if (!addr)
+			return -1;
+		switch_tracking->tids = addr;
+		for (i = switch_tracking->nr_tids; i < nr; i++)
+			switch_tracking->tids[i] = -1;
+		switch_tracking->nr_tids = nr;
+		return 0;
+	}
+
+	return 0;
+}
+
+static int process_sample_event(struct perf_evlist *evlist,
+				union perf_event *event,
+				struct switch_tracking *switch_tracking)
+{
+	struct perf_sample sample;
+	struct perf_evsel *evsel;
+	pid_t next_tid, prev_tid;
+	int cpu, err;
+
+	if (perf_evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("perf_evlist__parse_sample failed\n");
+		return -1;
+	}
+
+	evsel = perf_evlist__id2evsel(evlist, sample.id);
+	if (evsel == switch_tracking->switch_evsel) {
+		next_tid = perf_evsel__intval(evsel, &sample, "next_pid");
+		prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid");
+		cpu = sample.cpu;
+		pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n",
+			  cpu, prev_tid, next_tid);
+		err = check_cpu(switch_tracking, cpu);
+		if (err)
+			return err;
+		/*
+		 * Check for no missing sched_switch events i.e. that the
+		 * evsel->system_wide flag has worked.
+		 */
+		if (switch_tracking->tids[cpu] != -1 &&
+		    switch_tracking->tids[cpu] != prev_tid) {
+			pr_debug("Missing sched_switch events\n");
+			return -1;
+		}
+		switch_tracking->tids[cpu] = next_tid;
+	}
+
+	if (evsel == switch_tracking->cycles_evsel) {
+		pr_debug3("cycles event\n");
+		if (!switch_tracking->comm_seen[0])
+			switch_tracking->cycles_before_comm_1 = 1;
+		if (switch_tracking->comm_seen[1] &&
+		    !switch_tracking->comm_seen[2])
+			switch_tracking->cycles_between_comm_2_and_comm_3 = 1;
+		if (switch_tracking->comm_seen[3])
+			switch_tracking->cycles_after_comm_4 = 1;
+	}
+
+	return 0;
+}
+
+static int process_event(struct perf_evlist *evlist, union perf_event *event,
+			 struct switch_tracking *switch_tracking)
+{
+	if (event->header.type == PERF_RECORD_SAMPLE)
+		return process_sample_event(evlist, event, switch_tracking);
+
+	if (event->header.type == PERF_RECORD_COMM) {
+		int err, done = 0;
+
+		err = check_comm(switch_tracking, event, "Test COMM 1", 0);
+		if (err < 0)
+			return -1;
+		done += err;
+		err = check_comm(switch_tracking, event, "Test COMM 2", 1);
+		if (err < 0)
+			return -1;
+		done += err;
+		err = check_comm(switch_tracking, event, "Test COMM 3", 2);
+		if (err < 0)
+			return -1;
+		done += err;
+		err = check_comm(switch_tracking, event, "Test COMM 4", 3);
+		if (err < 0)
+			return -1;
+		done += err;
+		if (done != 1) {
+			pr_debug("Unexpected comm event\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+struct event_node {
+	struct list_head list;
+	union perf_event *event;
+	u64 event_time;
+};
+
+static int add_event(struct perf_evlist *evlist, struct list_head *events,
+		     union perf_event *event)
+{
+	struct perf_sample sample;
+	struct event_node *node;
+
+	node = malloc(sizeof(struct event_node));
+	if (!node) {
+		pr_debug("malloc failed\n");
+		return -1;
+	}
+	node->event = event;
+	list_add(&node->list, events);
+
+	if (perf_evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("perf_evlist__parse_sample failed\n");
+		return -1;
+	}
+
+	if (!sample.time) {
+		pr_debug("event with no time\n");
+		return -1;
+	}
+
+	node->event_time = sample.time;
+
+	return 0;
+}
+
+static void free_event_nodes(struct list_head *events)
+{
+	struct event_node *node;
+
+	while (!list_empty(events)) {
+		node = list_entry(events->next, struct event_node, list);
+		list_del(&node->list);
+		free(node);
+	}
+}
+
+static int compar(const void *a, const void *b)
+{
+	const struct event_node *nodea = a;
+	const struct event_node *nodeb = b;
+	s64 cmp = nodea->event_time - nodeb->event_time;
+
+	return cmp;
+}
+
+static int process_events(struct perf_evlist *evlist,
+			  struct switch_tracking *switch_tracking)
+{
+	union perf_event *event;
+	unsigned pos, cnt = 0;
+	LIST_HEAD(events);
+	struct event_node *events_array, *node;
+	struct perf_mmap *md;
+	int i, ret;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(md) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(md)) != NULL) {
+			cnt += 1;
+			ret = add_event(evlist, &events, event);
+			 perf_mmap__consume(md);
+			if (ret < 0)
+				goto out_free_nodes;
+		}
+		perf_mmap__read_done(md);
+	}
+
+	events_array = calloc(cnt, sizeof(struct event_node));
+	if (!events_array) {
+		pr_debug("calloc failed\n");
+		ret = -1;
+		goto out_free_nodes;
+	}
+
+	pos = 0;
+	list_for_each_entry(node, &events, list)
+		events_array[pos++] = *node;
+
+	qsort(events_array, cnt, sizeof(struct event_node), compar);
+
+	for (pos = 0; pos < cnt; pos++) {
+		ret = process_event(evlist, events_array[pos].event,
+				    switch_tracking);
+		if (ret < 0)
+			goto out_free;
+	}
+
+	ret = 0;
+out_free:
+	pr_debug("%u events recorded\n", cnt);
+	free(events_array);
+out_free_nodes:
+	free_event_nodes(&events);
+	return ret;
+}
+
+/**
+ * test__switch_tracking - test using sched_switch and tracking events.
+ *
+ * This function implements a test that checks that sched_switch events and
+ * tracking events can be recorded for a workload (current process) using the
+ * evsel->system_wide and evsel->tracking flags (respectively) with other events
+ * sometimes enabled or disabled.
+ */
+int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	const char *sched_switch = "sched:sched_switch";
+	struct switch_tracking switch_tracking = { .tids = NULL, };
+	struct record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 4000,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel, *cpu_clocks_evsel, *cycles_evsel;
+	struct perf_evsel *switch_evsel, *tracking_evsel;
+	const char *comm;
+	int err = -1;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (!threads) {
+		pr_debug("thread_map__new failed!\n");
+		goto out_err;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus) {
+		pr_debug("cpu_map__new failed!\n");
+		goto out_err;
+	}
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("perf_evlist__new failed!\n");
+		goto out_err;
+	}
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	/* First event */
+	err = parse_events(evlist, "cpu-clock:u", NULL);
+	if (err) {
+		pr_debug("Failed to parse event dummy:u\n");
+		goto out_err;
+	}
+
+	cpu_clocks_evsel = perf_evlist__last(evlist);
+
+	/* Second event */
+	err = parse_events(evlist, "cycles:u", NULL);
+	if (err) {
+		pr_debug("Failed to parse event cycles:u\n");
+		goto out_err;
+	}
+
+	cycles_evsel = perf_evlist__last(evlist);
+
+	/* Third event */
+	if (!perf_evlist__can_select_event(evlist, sched_switch)) {
+		pr_debug("No sched_switch\n");
+		err = 0;
+		goto out;
+	}
+
+	err = parse_events(evlist, sched_switch, NULL);
+	if (err) {
+		pr_debug("Failed to parse event %s\n", sched_switch);
+		goto out_err;
+	}
+
+	switch_evsel = perf_evlist__last(evlist);
+
+	perf_evsel__set_sample_bit(switch_evsel, CPU);
+	perf_evsel__set_sample_bit(switch_evsel, TIME);
+
+	switch_evsel->system_wide = true;
+	switch_evsel->no_aux_samples = true;
+	switch_evsel->immediate = true;
+
+	/* Test moving an event to the front */
+	if (cycles_evsel == perf_evlist__first(evlist)) {
+		pr_debug("cycles event already at front");
+		goto out_err;
+	}
+	perf_evlist__to_front(evlist, cycles_evsel);
+	if (cycles_evsel != perf_evlist__first(evlist)) {
+		pr_debug("Failed to move cycles event to front");
+		goto out_err;
+	}
+
+	perf_evsel__set_sample_bit(cycles_evsel, CPU);
+	perf_evsel__set_sample_bit(cycles_evsel, TIME);
+
+	/* Fourth event */
+	err = parse_events(evlist, "dummy:u", NULL);
+	if (err) {
+		pr_debug("Failed to parse event dummy:u\n");
+		goto out_err;
+	}
+
+	tracking_evsel = perf_evlist__last(evlist);
+
+	perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+	tracking_evsel->attr.freq = 0;
+	tracking_evsel->attr.sample_period = 1;
+
+	perf_evsel__set_sample_bit(tracking_evsel, TIME);
+
+	/* Config events */
+	perf_evlist__config(evlist, &opts, NULL);
+
+	/* Check moved event is still at the front */
+	if (cycles_evsel != perf_evlist__first(evlist)) {
+		pr_debug("Front event no longer at front");
+		goto out_err;
+	}
+
+	/* Check tracking event is tracking */
+	if (!tracking_evsel->attr.mmap || !tracking_evsel->attr.comm) {
+		pr_debug("Tracking event not tracking\n");
+		goto out_err;
+	}
+
+	/* Check non-tracking events are not tracking */
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel != tracking_evsel) {
+			if (evsel->attr.mmap || evsel->attr.comm) {
+				pr_debug("Non-tracking event is tracking\n");
+				goto out_err;
+			}
+		}
+	}
+
+	if (perf_evlist__open(evlist) < 0) {
+		pr_debug("Not supported\n");
+		err = 0;
+		goto out;
+	}
+
+	err = perf_evlist__mmap(evlist, UINT_MAX);
+	if (err) {
+		pr_debug("perf_evlist__mmap failed!\n");
+		goto out_err;
+	}
+
+	perf_evlist__enable(evlist);
+
+	err = perf_evsel__disable(cpu_clocks_evsel);
+	if (err) {
+		pr_debug("perf_evlist__disable_event failed!\n");
+		goto out_err;
+	}
+
+	err = spin_sleep();
+	if (err) {
+		pr_debug("spin_sleep failed!\n");
+		goto out_err;
+	}
+
+	comm = "Test COMM 1";
+	err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+	if (err) {
+		pr_debug("PR_SET_NAME failed!\n");
+		goto out_err;
+	}
+
+	err = perf_evsel__disable(cycles_evsel);
+	if (err) {
+		pr_debug("perf_evlist__disable_event failed!\n");
+		goto out_err;
+	}
+
+	comm = "Test COMM 2";
+	err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+	if (err) {
+		pr_debug("PR_SET_NAME failed!\n");
+		goto out_err;
+	}
+
+	err = spin_sleep();
+	if (err) {
+		pr_debug("spin_sleep failed!\n");
+		goto out_err;
+	}
+
+	comm = "Test COMM 3";
+	err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+	if (err) {
+		pr_debug("PR_SET_NAME failed!\n");
+		goto out_err;
+	}
+
+	err = perf_evsel__enable(cycles_evsel);
+	if (err) {
+		pr_debug("perf_evlist__disable_event failed!\n");
+		goto out_err;
+	}
+
+	comm = "Test COMM 4";
+	err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+	if (err) {
+		pr_debug("PR_SET_NAME failed!\n");
+		goto out_err;
+	}
+
+	err = spin_sleep();
+	if (err) {
+		pr_debug("spin_sleep failed!\n");
+		goto out_err;
+	}
+
+	perf_evlist__disable(evlist);
+
+	switch_tracking.switch_evsel = switch_evsel;
+	switch_tracking.cycles_evsel = cycles_evsel;
+
+	err = process_events(evlist, &switch_tracking);
+
+	zfree(&switch_tracking.tids);
+
+	if (err)
+		goto out_err;
+
+	/* Check all 4 comm events were seen i.e. that evsel->tracking works */
+	if (!switch_tracking.comm_seen[0] || !switch_tracking.comm_seen[1] ||
+	    !switch_tracking.comm_seen[2] || !switch_tracking.comm_seen[3]) {
+		pr_debug("Missing comm events\n");
+		goto out_err;
+	}
+
+	/* Check cycles event got enabled */
+	if (!switch_tracking.cycles_before_comm_1) {
+		pr_debug("Missing cycles events\n");
+		goto out_err;
+	}
+
+	/* Check cycles event got disabled */
+	if (switch_tracking.cycles_between_comm_2_and_comm_3) {
+		pr_debug("cycles events even though event was disabled\n");
+		goto out_err;
+	}
+
+	/* Check cycles event got enabled again */
+	if (!switch_tracking.cycles_after_comm_4) {
+		pr_debug("Missing cycles events\n");
+		goto out_err;
+	}
+out:
+	if (evlist) {
+		perf_evlist__disable(evlist);
+		perf_evlist__delete(evlist);
+	} else {
+		cpu_map__put(cpus);
+		thread_map__put(threads);
+	}
+
+	return err;
+
+out_err:
+	err = -1;
+	goto out;
+}
diff --git a/tests/task-exit.c b/tests/task-exit.c
new file mode 100644
index 0000000..e92fa60
--- /dev/null
+++ b/tests/task-exit.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+#include <errno.h>
+#include <signal.h>
+
+static int exited;
+static int nr_exit;
+
+static void sig_handler(int sig __maybe_unused)
+{
+	exited = 1;
+}
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * we asked by setting its exec_error to this handler.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused,
+					siginfo_t *info __maybe_unused,
+					void *ucontext __maybe_unused)
+{
+	exited	= 1;
+	nr_exit = -1;
+}
+
+/*
+ * This test will start a workload that does nothing then it checks
+ * if the number of exit event reported by the kernel is 1 or not
+ * in order to check the kernel returns correct number of event.
+ */
+int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = -1;
+	union perf_event *event;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist;
+	struct target target = {
+		.uid		= UINT_MAX,
+		.uses_mmap	= true,
+	};
+	const char *argv[] = { "true", NULL };
+	char sbuf[STRERR_BUFSIZE];
+	struct cpu_map *cpus;
+	struct thread_map *threads;
+	struct perf_mmap *md;
+
+	signal(SIGCHLD, sig_handler);
+
+	evlist = perf_evlist__new_default();
+	if (evlist == NULL) {
+		pr_debug("perf_evlist__new_default\n");
+		return -1;
+	}
+
+	/*
+	 * Create maps of threads and cpus to monitor. In this case
+	 * we start with all threads and cpus (-1, -1) but then in
+	 * perf_evlist__prepare_workload we'll fill in the only thread
+	 * we're monitoring, the one forked there.
+	 */
+	cpus = cpu_map__dummy_new();
+	threads = thread_map__new_by_tid(-1);
+	if (!cpus || !threads) {
+		err = -ENOMEM;
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_free_maps;
+	}
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	cpus	= NULL;
+	threads = NULL;
+
+	err = perf_evlist__prepare_workload(evlist, &target, argv, false,
+					    workload_exec_failed_signal);
+	if (err < 0) {
+		pr_debug("Couldn't run the workload!\n");
+		goto out_delete_evlist;
+	}
+
+	evsel = perf_evlist__first(evlist);
+	evsel->attr.task = 1;
+#ifdef __s390x__
+	evsel->attr.sample_freq = 1000000;
+#else
+	evsel->attr.sample_freq = 1;
+#endif
+	evsel->attr.inherit = 0;
+	evsel->attr.watermark = 0;
+	evsel->attr.wakeup_events = 1;
+	evsel->attr.exclude_kernel = 1;
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("Couldn't open the evlist: %s\n",
+			 str_error_r(-err, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	if (perf_evlist__mmap(evlist, 128) < 0) {
+		pr_debug("failed to mmap events: %d (%s)\n", errno,
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__start_workload(evlist);
+
+retry:
+	md = &evlist->mmap[0];
+	if (perf_mmap__read_init(md) < 0)
+		goto out_init;
+
+	while ((event = perf_mmap__read_event(md)) != NULL) {
+		if (event->header.type == PERF_RECORD_EXIT)
+			nr_exit++;
+
+		perf_mmap__consume(md);
+	}
+	perf_mmap__read_done(md);
+
+out_init:
+	if (!exited || !nr_exit) {
+		perf_evlist__poll(evlist, -1);
+		goto retry;
+	}
+
+	if (nr_exit != 1) {
+		pr_debug("received %d EXIT records\n", nr_exit);
+		err = -1;
+	}
+
+out_free_maps:
+	cpu_map__put(cpus);
+	thread_map__put(threads);
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return err;
+}
diff --git a/tests/tests.h b/tests/tests.h
new file mode 100644
index 0000000..a9760e7
--- /dev/null
+++ b/tests/tests.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TESTS_H
+#define TESTS_H
+
+#include <stdbool.h>
+
+#define TEST_ASSERT_VAL(text, cond)					 \
+do {									 \
+	if (!(cond)) {							 \
+		pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
+		return -1;						 \
+	}								 \
+} while (0)
+
+#define TEST_ASSERT_EQUAL(text, val, expected)				 \
+do {									 \
+	if (val != expected) {						 \
+		pr_debug("FAILED %s:%d %s (%d != %d)\n",		 \
+			 __FILE__, __LINE__, text, val, expected);	 \
+		return -1;						 \
+	}								 \
+} while (0)
+
+enum {
+	TEST_OK   =  0,
+	TEST_FAIL = -1,
+	TEST_SKIP = -2,
+};
+
+struct test {
+	const char *desc;
+	int (*func)(struct test *test, int subtest);
+	struct {
+		bool skip_if_fail;
+		int (*get_nr)(void);
+		const char *(*get_desc)(int subtest);
+	} subtest;
+	bool (*is_supported)(void);
+	void *priv;
+};
+
+/* Tests */
+int test__vmlinux_matches_kallsyms(struct test *test, int subtest);
+int test__openat_syscall_event(struct test *test, int subtest);
+int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest);
+int test__basic_mmap(struct test *test, int subtest);
+int test__PERF_RECORD(struct test *test, int subtest);
+int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
+int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
+int test__syscall_openat_tp_fields(struct test *test, int subtest);
+int test__pmu(struct test *test, int subtest);
+int test__attr(struct test *test, int subtest);
+int test__dso_data(struct test *test, int subtest);
+int test__dso_data_cache(struct test *test, int subtest);
+int test__dso_data_reopen(struct test *test, int subtest);
+int test__parse_events(struct test *test, int subtest);
+int test__hists_link(struct test *test, int subtest);
+int test__python_use(struct test *test, int subtest);
+int test__bp_signal(struct test *test, int subtest);
+int test__bp_signal_overflow(struct test *test, int subtest);
+int test__bp_accounting(struct test *test, int subtest);
+int test__task_exit(struct test *test, int subtest);
+int test__mem(struct test *test, int subtest);
+int test__sw_clock_freq(struct test *test, int subtest);
+int test__code_reading(struct test *test, int subtest);
+int test__sample_parsing(struct test *test, int subtest);
+int test__keep_tracking(struct test *test, int subtest);
+int test__parse_no_sample_id_all(struct test *test, int subtest);
+int test__dwarf_unwind(struct test *test, int subtest);
+int test__expr(struct test *test, int subtest);
+int test__hists_filter(struct test *test, int subtest);
+int test__mmap_thread_lookup(struct test *test, int subtest);
+int test__thread_mg_share(struct test *test, int subtest);
+int test__hists_output(struct test *test, int subtest);
+int test__hists_cumulate(struct test *test, int subtest);
+int test__switch_tracking(struct test *test, int subtest);
+int test__fdarray__filter(struct test *test, int subtest);
+int test__fdarray__add(struct test *test, int subtest);
+int test__kmod_path__parse(struct test *test, int subtest);
+int test__thread_map(struct test *test, int subtest);
+int test__llvm(struct test *test, int subtest);
+const char *test__llvm_subtest_get_desc(int subtest);
+int test__llvm_subtest_get_nr(void);
+int test__bpf(struct test *test, int subtest);
+const char *test__bpf_subtest_get_desc(int subtest);
+int test__bpf_subtest_get_nr(void);
+int test__session_topology(struct test *test, int subtest);
+int test__thread_map_synthesize(struct test *test, int subtest);
+int test__thread_map_remove(struct test *test, int subtest);
+int test__cpu_map_synthesize(struct test *test, int subtest);
+int test__synthesize_stat_config(struct test *test, int subtest);
+int test__synthesize_stat(struct test *test, int subtest);
+int test__synthesize_stat_round(struct test *test, int subtest);
+int test__event_update(struct test *test, int subtest);
+int test__event_times(struct test *test, int subtest);
+int test__backward_ring_buffer(struct test *test, int subtest);
+int test__cpu_map_print(struct test *test, int subtest);
+int test__sdt_event(struct test *test, int subtest);
+int test__is_printable_array(struct test *test, int subtest);
+int test__bitmap_print(struct test *test, int subtest);
+int test__perf_hooks(struct test *test, int subtest);
+int test__clang(struct test *test, int subtest);
+const char *test__clang_subtest_get_desc(int subtest);
+int test__clang_subtest_get_nr(void);
+int test__unit_number__scnprint(struct test *test, int subtest);
+int test__mem2node(struct test *t, int subtest);
+
+bool test__bp_signal_is_supported(void);
+
+#if defined(__arm__) || defined(__aarch64__)
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread);
+#endif
+#endif
+#endif /* TESTS_H */
diff --git a/tests/thread-map.c b/tests/thread-map.c
new file mode 100644
index 0000000..4de1939
--- /dev/null
+++ b/tests/thread-map.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "thread_map.h"
+#include "debug.h"
+
+#define NAME	(const char *) "perf"
+#define NAMEUL	(unsigned long) NAME
+
+int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct thread_map *map;
+
+	TEST_ASSERT_VAL("failed to set process name",
+			!prctl(PR_SET_NAME, NAMEUL, 0, 0, 0));
+
+	/* test map on current pid */
+	map = thread_map__new_by_pid(getpid());
+	TEST_ASSERT_VAL("failed to alloc map", map);
+
+	thread_map__read_comms(map);
+
+	TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+	TEST_ASSERT_VAL("wrong pid",
+			thread_map__pid(map, 0) == getpid());
+	TEST_ASSERT_VAL("wrong comm",
+			thread_map__comm(map, 0) &&
+			!strcmp(thread_map__comm(map, 0), NAME));
+	TEST_ASSERT_VAL("wrong refcnt",
+			refcount_read(&map->refcnt) == 1);
+	thread_map__put(map);
+
+	/* test dummy pid */
+	map = thread_map__new_dummy();
+	TEST_ASSERT_VAL("failed to alloc map", map);
+
+	thread_map__read_comms(map);
+
+	TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+	TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1);
+	TEST_ASSERT_VAL("wrong comm",
+			thread_map__comm(map, 0) &&
+			!strcmp(thread_map__comm(map, 0), "dummy"));
+	TEST_ASSERT_VAL("wrong refcnt",
+			refcount_read(&map->refcnt) == 1);
+	thread_map__put(map);
+	return 0;
+}
+
+static int process_event(struct perf_tool *tool __maybe_unused,
+			 union perf_event *event,
+			 struct perf_sample *sample __maybe_unused,
+			 struct machine *machine __maybe_unused)
+{
+	struct thread_map_event *map = &event->thread_map;
+	struct thread_map *threads;
+
+	TEST_ASSERT_VAL("wrong nr",   map->nr == 1);
+	TEST_ASSERT_VAL("wrong pid",  map->entries[0].pid == (u64) getpid());
+	TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, NAME));
+
+	threads = thread_map__new_event(&event->thread_map);
+	TEST_ASSERT_VAL("failed to alloc map", threads);
+
+	TEST_ASSERT_VAL("wrong nr", threads->nr == 1);
+	TEST_ASSERT_VAL("wrong pid",
+			thread_map__pid(threads, 0) == getpid());
+	TEST_ASSERT_VAL("wrong comm",
+			thread_map__comm(threads, 0) &&
+			!strcmp(thread_map__comm(threads, 0), NAME));
+	TEST_ASSERT_VAL("wrong refcnt",
+			refcount_read(&threads->refcnt) == 1);
+	thread_map__put(threads);
+	return 0;
+}
+
+int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct thread_map *threads;
+
+	TEST_ASSERT_VAL("failed to set process name",
+			!prctl(PR_SET_NAME, NAMEUL, 0, 0, 0));
+
+	/* test map on current pid */
+	threads = thread_map__new_by_pid(getpid());
+	TEST_ASSERT_VAL("failed to alloc map", threads);
+
+	thread_map__read_comms(threads);
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+
+	return 0;
+}
+
+int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct thread_map *threads;
+	char *str;
+	int i;
+
+	TEST_ASSERT_VAL("failed to allocate map string",
+			asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
+
+	threads = thread_map__new_str(str, NULL, 0, false);
+
+	TEST_ASSERT_VAL("failed to allocate thread_map",
+			threads);
+
+	if (verbose > 0)
+		thread_map__fprintf(threads, stderr);
+
+	TEST_ASSERT_VAL("failed to remove thread",
+			!thread_map__remove(threads, 0));
+
+	TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1);
+
+	if (verbose > 0)
+		thread_map__fprintf(threads, stderr);
+
+	TEST_ASSERT_VAL("failed to remove thread",
+			!thread_map__remove(threads, 0));
+
+	TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0);
+
+	if (verbose > 0)
+		thread_map__fprintf(threads, stderr);
+
+	TEST_ASSERT_VAL("failed to not remove thread",
+			thread_map__remove(threads, 0));
+
+	for (i = 0; i < threads->nr; i++)
+		free(threads->map[i].comm);
+
+	free(threads);
+	return 0;
+}
diff --git a/tests/thread-mg-share.c b/tests/thread-mg-share.c
new file mode 100644
index 0000000..b1d1bba
--- /dev/null
+++ b/tests/thread-mg-share.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include "machine.h"
+#include "thread.h"
+#include "map.h"
+#include "debug.h"
+
+int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct machines machines;
+	struct machine *machine;
+
+	/* thread group */
+	struct thread *leader;
+	struct thread *t1, *t2, *t3;
+	struct map_groups *mg;
+
+	/* other process */
+	struct thread *other, *other_leader;
+	struct map_groups *other_mg;
+
+	/*
+	 * This test create 2 processes abstractions (struct thread)
+	 * with several threads and checks they properly share and
+	 * maintain map groups info (struct map_groups).
+	 *
+	 * thread group (pid: 0, tids: 0, 1, 2, 3)
+	 * other  group (pid: 4, tids: 4, 5)
+	*/
+
+	machines__init(&machines);
+	machine = &machines.host;
+
+	/* create process with 4 threads */
+	leader = machine__findnew_thread(machine, 0, 0);
+	t1     = machine__findnew_thread(machine, 0, 1);
+	t2     = machine__findnew_thread(machine, 0, 2);
+	t3     = machine__findnew_thread(machine, 0, 3);
+
+	/* and create 1 separated process, without thread leader */
+	other  = machine__findnew_thread(machine, 4, 5);
+
+	TEST_ASSERT_VAL("failed to create threads",
+			leader && t1 && t2 && t3 && other);
+
+	mg = leader->mg;
+	TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 4);
+
+	/* test the map groups pointer is shared */
+	TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
+	TEST_ASSERT_VAL("map groups don't match", mg == t2->mg);
+	TEST_ASSERT_VAL("map groups don't match", mg == t3->mg);
+
+	/*
+	 * Verify the other leader was created by previous call.
+	 * It should have shared map groups with no change in
+	 * refcnt.
+	 */
+	other_leader = machine__find_thread(machine, 4, 4);
+	TEST_ASSERT_VAL("failed to find other leader", other_leader);
+
+	/*
+	 * Ok, now that all the rbtree related operations were done,
+	 * lets remove all of them from there so that we can do the
+	 * refcounting tests.
+	 */
+	machine__remove_thread(machine, leader);
+	machine__remove_thread(machine, t1);
+	machine__remove_thread(machine, t2);
+	machine__remove_thread(machine, t3);
+	machine__remove_thread(machine, other);
+	machine__remove_thread(machine, other_leader);
+
+	other_mg = other->mg;
+	TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 2);
+
+	TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
+
+	/* release thread group */
+	thread__put(leader);
+	TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 3);
+
+	thread__put(t1);
+	TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 2);
+
+	thread__put(t2);
+	TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 1);
+
+	thread__put(t3);
+
+	/* release other group  */
+	thread__put(other_leader);
+	TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 1);
+
+	thread__put(other);
+
+	machines__exit(&machines);
+	return 0;
+}
diff --git a/tests/topology.c b/tests/topology.c
new file mode 100644
index 0000000..17cb1bb
--- /dev/null
+++ b/tests/topology.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "tests.h"
+#include "util.h"
+#include "session.h"
+#include "evlist.h"
+#include "debug.h"
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+#define DATA_SIZE	10
+
+static int get_temp(char *path)
+{
+	int fd;
+
+	strcpy(path, TEMPL);
+
+	fd = mkstemp(path);
+	if (fd < 0) {
+		perror("mkstemp failed");
+		return -1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+static int session_write_header(char *path)
+{
+	struct perf_session *session;
+	struct perf_data data = {
+		.file      = {
+			.path = path,
+		},
+		.mode      = PERF_DATA_MODE_WRITE,
+	};
+
+	session = perf_session__new(&data, false, NULL);
+	TEST_ASSERT_VAL("can't get session", session);
+
+	session->evlist = perf_evlist__new_default();
+	TEST_ASSERT_VAL("can't get evlist", session->evlist);
+
+	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
+	perf_header__set_feat(&session->header, HEADER_NRCPUS);
+
+	session->header.data_size += DATA_SIZE;
+
+	TEST_ASSERT_VAL("failed to write header",
+			!perf_session__write_header(session, session->evlist, data.file.fd, true));
+
+	perf_session__delete(session);
+
+	return 0;
+}
+
+static int check_cpu_topology(char *path, struct cpu_map *map)
+{
+	struct perf_session *session;
+	struct perf_data data = {
+		.file      = {
+			.path = path,
+		},
+		.mode      = PERF_DATA_MODE_READ,
+	};
+	int i;
+
+	session = perf_session__new(&data, false, NULL);
+	TEST_ASSERT_VAL("can't get session", session);
+
+	for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
+		if (!cpu_map__has(map, i))
+			continue;
+		pr_debug("CPU %d, core %d, socket %d\n", i,
+			 session->header.env.cpu[i].core_id,
+			 session->header.env.cpu[i].socket_id);
+	}
+
+	for (i = 0; i < map->nr; i++) {
+		TEST_ASSERT_VAL("Core ID doesn't match",
+			(session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff)));
+
+		TEST_ASSERT_VAL("Socket ID doesn't match",
+			(session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL)));
+	}
+
+	perf_session__delete(session);
+
+	return 0;
+}
+
+int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	char path[PATH_MAX];
+	struct cpu_map *map;
+	int ret = -1;
+
+	TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
+
+	pr_debug("templ file: %s\n", path);
+
+	if (session_write_header(path))
+		goto free_path;
+
+	map = cpu_map__new(NULL);
+	if (map == NULL) {
+		pr_debug("failed to get system cpumap\n");
+		goto free_path;
+	}
+
+	if (check_cpu_topology(path, map))
+		goto free_map;
+	ret = 0;
+
+free_map:
+	cpu_map__put(map);
+free_path:
+	unlink(path);
+	return ret;
+}
diff --git a/tests/unit_number__scnprintf.c b/tests/unit_number__scnprintf.c
new file mode 100644
index 0000000..2bb8cb0
--- /dev/null
+++ b/tests/unit_number__scnprintf.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "tests.h"
+#include "units.h"
+#include "debug.h"
+
+int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+	struct {
+		u64		 n;
+		const char	*str;
+	} test[] = {
+		{ 1,			"1B"	},
+		{ 10*1024,		"10K"	},
+		{ 20*1024*1024,		"20M"	},
+		{ 30*1024*1024*1024ULL,	"30G"	},
+		{ 0,			"0B"	},
+		{ 0,			NULL	},
+	};
+	unsigned i = 0;
+
+	while (test[i].str) {
+		char buf[100];
+
+		unit_number__scnprintf(buf, sizeof(buf), test[i].n);
+
+		pr_debug("n %" PRIu64 ", str '%s', buf '%s'\n",
+			 test[i].n, test[i].str, buf);
+
+		if (strcmp(test[i].str, buf))
+			return TEST_FAIL;
+
+		i++;
+	}
+
+	return TEST_OK;
+}
diff --git a/tests/vmlinux-kallsyms.c b/tests/vmlinux-kallsyms.c
new file mode 100644
index 0000000..1e5adb6
--- /dev/null
+++ b/tests/vmlinux-kallsyms.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <inttypes.h>
+#include <string.h>
+#include "map.h"
+#include "symbol.h"
+#include "util.h"
+#include "tests.h"
+#include "debug.h"
+#include "machine.h"
+
+#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
+
+int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int err = -1;
+	struct rb_node *nd;
+	struct symbol *sym;
+	struct map *kallsyms_map, *vmlinux_map, *map;
+	struct machine kallsyms, vmlinux;
+	enum map_type type = MAP__FUNCTION;
+	struct maps *maps = &vmlinux.kmaps.maps[type];
+	u64 mem_start, mem_end;
+	bool header_printed;
+
+	/*
+	 * Step 1:
+	 *
+	 * Init the machines that will hold kernel, modules obtained from
+	 * both vmlinux + .ko files and from /proc/kallsyms split by modules.
+	 */
+	machine__init(&kallsyms, "", HOST_KERNEL_ID);
+	machine__init(&vmlinux, "", HOST_KERNEL_ID);
+
+	/*
+	 * Step 2:
+	 *
+	 * Create the kernel maps for kallsyms and the DSO where we will then
+	 * load /proc/kallsyms. Also create the modules maps from /proc/modules
+	 * and find the .ko files that match them in /lib/modules/`uname -r`/.
+	 */
+	if (machine__create_kernel_maps(&kallsyms) < 0) {
+		pr_debug("machine__create_kernel_maps ");
+		goto out;
+	}
+
+	/*
+	 * Step 3:
+	 *
+	 * Load and split /proc/kallsyms into multiple maps, one per module.
+	 * Do not use kcore, as this test was designed before kcore support
+	 * and has parts that only make sense if using the non-kcore code.
+	 * XXX: extend it to stress the kcorre code as well, hint: the list
+	 * of modules extracted from /proc/kcore, in its current form, can't
+	 * be compacted against the list of modules found in the "vmlinux"
+	 * code and with the one got from /proc/modules from the "kallsyms" code.
+	 */
+	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
+		pr_debug("dso__load_kallsyms ");
+		goto out;
+	}
+
+	/*
+	 * Step 4:
+	 *
+	 * kallsyms will be internally on demand sorted by name so that we can
+	 * find the reference relocation * symbol, i.e. the symbol we will use
+	 * to see if the running kernel was relocated by checking if it has the
+	 * same value in the vmlinux file we load.
+	 */
+	kallsyms_map = machine__kernel_map(&kallsyms);
+
+	/*
+	 * Step 5:
+	 *
+	 * Now repeat step 2, this time for the vmlinux file we'll auto-locate.
+	 */
+	if (machine__create_kernel_maps(&vmlinux) < 0) {
+		pr_debug("machine__create_kernel_maps ");
+		goto out;
+	}
+
+	vmlinux_map = machine__kernel_map(&vmlinux);
+
+	/*
+	 * Step 6:
+	 *
+	 * Locate a vmlinux file in the vmlinux path that has a buildid that
+	 * matches the one of the running kernel.
+	 *
+	 * While doing that look if we find the ref reloc symbol, if we find it
+	 * we'll have its ref_reloc_symbol.unrelocated_addr and then
+	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
+	 * to fixup the symbols.
+	 */
+	if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
+		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
+		err = TEST_SKIP;
+		goto out;
+	}
+
+	err = 0;
+	/*
+	 * Step 7:
+	 *
+	 * Now look at the symbols in the vmlinux DSO and check if we find all of them
+	 * in the kallsyms dso. For the ones that are in both, check its names and
+	 * end addresses too.
+	 */
+	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
+		struct symbol *pair, *first_pair;
+
+		sym  = rb_entry(nd, struct symbol, rb_node);
+
+		if (sym->start == sym->end)
+			continue;
+
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
+
+		first_pair = machine__find_kernel_symbol(&kallsyms, type,
+							 mem_start, NULL);
+		pair = first_pair;
+
+		if (pair && UM(pair->start) == mem_start) {
+next_pair:
+			if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
+				/*
+				 * kallsyms don't have the symbol end, so we
+				 * set that by using the next symbol start - 1,
+				 * in some cases we get this up to a page
+				 * wrong, trace_kmalloc when I was developing
+				 * this code was one such example, 2106 bytes
+				 * off the real size. More than that and we
+				 * _really_ have a problem.
+				 */
+				s64 skew = mem_end - UM(pair->end);
+				if (llabs(skew) >= page_size)
+					pr_debug("WARN: %#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+						 mem_start, sym->name, mem_end,
+						 UM(pair->end));
+
+				/*
+				 * Do not count this as a failure, because we
+				 * could really find a case where it's not
+				 * possible to get proper function end from
+				 * kallsyms.
+				 */
+				continue;
+			} else {
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
+				if (pair) {
+					if (UM(pair->start) == mem_start)
+						goto next_pair;
+
+					pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
+						 mem_start, sym->name, pair->name);
+				} else {
+					pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
+						 mem_start, sym->name, first_pair->name);
+				}
+
+				continue;
+			}
+		} else
+			pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n",
+				 mem_start, sym->name);
+
+		err = -1;
+	}
+
+	if (verbose <= 0)
+		goto out;
+
+	header_printed = false;
+
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct map *
+		/*
+		 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
+		 * the kernel will have the path for the vmlinux file being used,
+		 * so use the short name, less descriptive but the same ("[kernel]" in
+		 * both cases.
+		 */
+		pair = map_groups__find_by_name(&kallsyms.kmaps, type,
+						(map->dso->kernel ?
+							map->dso->short_name :
+							map->dso->name));
+		if (pair) {
+			pair->priv = 1;
+		} else {
+			if (!header_printed) {
+				pr_info("WARN: Maps only in vmlinux:\n");
+				header_printed = true;
+			}
+			map__fprintf(map, stderr);
+		}
+	}
+
+	header_printed = false;
+
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct map *pair;
+
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
+
+		pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
+		if (pair == NULL || pair->priv)
+			continue;
+
+		if (pair->start == mem_start) {
+			if (!header_printed) {
+				pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+				header_printed = true;
+			}
+
+			pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+				map->start, map->end, map->pgoff, map->dso->name);
+			if (mem_end != pair->end)
+				pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
+					pair->start, pair->end, pair->pgoff);
+			pr_info(" %s\n", pair->dso->name);
+			pair->priv = 1;
+		}
+	}
+
+	header_printed = false;
+
+	maps = &kallsyms.kmaps.maps[type];
+
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		if (!map->priv) {
+			if (!header_printed) {
+				pr_info("WARN: Maps only in kallsyms:\n");
+				header_printed = true;
+			}
+			map__fprintf(map, stderr);
+		}
+	}
+out:
+	machine__exit(&kallsyms);
+	machine__exit(&vmlinux);
+	return err;
+}
diff --git a/trace/beauty/Build b/trace/beauty/Build
new file mode 100644
index 0000000..66330d4
--- /dev/null
+++ b/trace/beauty/Build
@@ -0,0 +1,10 @@
+libperf-y += clone.o
+libperf-y += fcntl.o
+libperf-y += flock.o
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
+libperf-y += ioctl.o
+endif
+libperf-y += kcmp.o
+libperf-y += pkey_alloc.o
+libperf-y += prctl.o
+libperf-y += statx.o
diff --git a/trace/beauty/arch_errno_names.c b/trace/beauty/arch_errno_names.c
new file mode 100644
index 0000000..ede031c
--- /dev/null
+++ b/trace/beauty/arch_errno_names.c
@@ -0,0 +1 @@
+#include "trace/beauty/generated/arch_errno_name_array.c"
diff --git a/trace/beauty/arch_errno_names.sh b/trace/beauty/arch_errno_names.sh
new file mode 100755
index 0000000..22c9fc9
--- /dev/null
+++ b/trace/beauty/arch_errno_names.sh
@@ -0,0 +1,100 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate C file mapping errno codes to errno names.
+#
+# Copyright IBM Corp. 2018
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+
+gcc="$1"
+toolsdir="$2"
+include_path="-I$toolsdir/include/uapi"
+
+arch_string()
+{
+	echo "$1" |sed -e 'y/- /__/' |tr '[[:upper:]]' '[[:lower:]]'
+}
+
+asm_errno_file()
+{
+	local arch="$1"
+	local header
+
+	header="$toolsdir/arch/$arch/include/uapi/asm/errno.h"
+	if test -r "$header"; then
+		echo "$header"
+	else
+		echo "$toolsdir/include/uapi/asm-generic/errno.h"
+	fi
+}
+
+create_errno_lookup_func()
+{
+	local arch=$(arch_string "$1")
+	local nr name
+
+	cat <<EoFuncBegin
+static const char *errno_to_name__$arch(int err)
+{
+	switch (err) {
+EoFuncBegin
+
+	while read name nr; do
+		printf '\tcase %d: return "%s";\n' $nr $name
+	done
+
+	cat <<EoFuncEnd
+	default:
+		return "(unknown)";
+	}
+}
+
+EoFuncEnd
+}
+
+process_arch()
+{
+	local arch="$1"
+	local asm_errno=$(asm_errno_file "$arch")
+
+	$gcc $include_path -E -dM -x c $asm_errno \
+		|grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
+		|awk '{ print $2","$3; }' \
+		|sort -t, -k2 -nu \
+		|IFS=, create_errno_lookup_func "$arch"
+}
+
+create_arch_errno_table_func()
+{
+	local archlist="$1"
+	local default="$2"
+	local arch
+
+	printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n'
+	printf '{\n'
+	for arch in $archlist; do
+		printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch")
+		printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch")
+	done
+	printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default")
+	printf '}\n'
+}
+
+cat <<EoHEADER
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <string.h>
+
+EoHEADER
+
+# Create list of architectures and ignore those that do not appear
+# in tools/perf/arch
+archlist=""
+for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do
+	test -d arch/$arch && archlist="$archlist $arch"
+done
+
+for arch in x86 $archlist generic; do
+	process_arch "$arch"
+done
+create_arch_errno_table_func "x86 $archlist" "generic"
diff --git a/trace/beauty/beauty.h b/trace/beauty/beauty.h
new file mode 100644
index 0000000..984a504
--- /dev/null
+++ b/trace/beauty/beauty.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TRACE_BEAUTY_H
+#define _PERF_TRACE_BEAUTY_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/types.h>
+
+struct strarray {
+	int	    offset;
+	int	    nr_entries;
+	const char **entries;
+};
+
+#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
+	.nr_entries = ARRAY_SIZE(array), \
+	.entries = array, \
+}
+
+#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
+	.offset	    = off, \
+	.nr_entries = ARRAY_SIZE(array), \
+	.entries = array, \
+}
+
+size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+
+struct trace;
+struct thread;
+
+size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size);
+
+/**
+ * @val: value of syscall argument being formatted
+ * @args: All the args, use syscall_args__val(arg, nth) to access one
+ * @thread: tid state (maps, pid, tid, etc)
+ * @trace: 'perf trace' internals: all threads, etc
+ * @parm: private area, may be an strarray, for instance
+ * @idx: syscall arg idx (is this the first?)
+ * @mask: a syscall arg may mask another arg, see syscall_arg__scnprintf_futex_op
+ */
+
+struct syscall_arg {
+	unsigned long val;
+	unsigned char *args;
+	struct thread *thread;
+	struct trace  *trace;
+	void	      *parm;
+	u8	      idx;
+	u8	      mask;
+};
+
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx);
+
+size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
+
+size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FD syscall_arg__scnprintf_fd
+
+size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_HEX syscall_arg__scnprintf_hex
+
+size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_INT syscall_arg__scnprintf_int
+
+size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_LONG syscall_arg__scnprintf_long
+
+size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PID syscall_arg__scnprintf_pid
+
+size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_CLONE_FLAGS syscall_arg__scnprintf_clone_flags
+
+size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd
+
+size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg
+
+size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FLOCK syscall_arg__scnprintf_flock
+
+size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd
+
+size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_KCMP_TYPE syscall_arg__scnprintf_kcmp_type
+
+size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
+
+size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
+
+size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
+
+size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option
+
+size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2
+
+size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3
+
+size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
+
+size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask
+
+size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size);
+
+void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
+				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg));
+
+const char *arch_syscalls__strerrno(const char *arch, int err);
+
+#endif /* _PERF_TRACE_BEAUTY_H */
diff --git a/trace/beauty/clone.c b/trace/beauty/clone.c
new file mode 100644
index 0000000..d64d049
--- /dev/null
+++ b/trace/beauty/clone.c
@@ -0,0 +1,75 @@
+/*
+ * trace/beauty/cone.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <uapi/linux/sched.h>
+
+static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+	int printed = 0;
+
+#define	P_FLAG(n) \
+	if (flags & CLONE_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~CLONE_##n; \
+	}
+
+	P_FLAG(VM);
+	P_FLAG(FS);
+	P_FLAG(FILES);
+	P_FLAG(SIGHAND);
+	P_FLAG(PTRACE);
+	P_FLAG(VFORK);
+	P_FLAG(PARENT);
+	P_FLAG(THREAD);
+	P_FLAG(NEWNS);
+	P_FLAG(SYSVSEM);
+	P_FLAG(SETTLS);
+	P_FLAG(PARENT_SETTID);
+	P_FLAG(CHILD_CLEARTID);
+	P_FLAG(DETACHED);
+	P_FLAG(UNTRACED);
+	P_FLAG(CHILD_SETTID);
+	P_FLAG(NEWCGROUP);
+	P_FLAG(NEWUTS);
+	P_FLAG(NEWIPC);
+	P_FLAG(NEWUSER);
+	P_FLAG(NEWPID);
+	P_FLAG(NEWNET);
+	P_FLAG(IO);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long flags = arg->val;
+	enum syscall_clone_args {
+		SCC_FLAGS	  = (1 << 0),
+		SCC_CHILD_STACK	  = (1 << 1),
+		SCC_PARENT_TIDPTR = (1 << 2),
+		SCC_CHILD_TIDPTR  = (1 << 3),
+		SCC_TLS		  = (1 << 4),
+	};
+	if (!(flags & CLONE_PARENT_SETTID))
+		arg->mask |= SCC_PARENT_TIDPTR;
+
+	if (!(flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)))
+		arg->mask |= SCC_CHILD_TIDPTR;
+
+	if (!(flags & CLONE_SETTLS))
+		arg->mask |= SCC_TLS;
+
+	return clone__scnprintf_flags(flags, bf, size);
+}
diff --git a/trace/beauty/drm_ioctl.sh b/trace/beauty/drm_ioctl.sh
new file mode 100755
index 0000000..2149d3a
--- /dev/null
+++ b/trace/beauty/drm_ioctl.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+drm_header_dir=$1
+printf "#ifndef DRM_COMMAND_BASE\n"
+grep "#define DRM_COMMAND_BASE" $drm_header_dir/drm.h
+printf "#endif\n"
+
+printf "static const char *drm_ioctl_cmds[] = {\n"
+grep "^#define DRM_IOCTL.*DRM_IO" $drm_header_dir/drm.h | \
+	sed -r 's/^#define +DRM_IOCTL_([A-Z0-9_]+)[	 ]+DRM_IO[A-Z]* *\( *(0x[[:xdigit:]]+),*.*/	[\2] = "\1",/g'
+grep "^#define DRM_I915_[A-Z_0-9]\+[	 ]\+0x" $drm_header_dir/i915_drm.h | \
+	sed -r 's/^#define +DRM_I915_([A-Z0-9_]+)[	 ]+(0x[[:xdigit:]]+)/\t[DRM_COMMAND_BASE + \2] = "I915_\1",/g'
+printf "};\n"
diff --git a/trace/beauty/eventfd.c b/trace/beauty/eventfd.c
new file mode 100644
index 0000000..5d6a477
--- /dev/null
+++ b/trace/beauty/eventfd.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef EFD_SEMAPHORE
+#define EFD_SEMAPHORE		1
+#endif
+
+#ifndef EFD_NONBLOCK
+#define EFD_NONBLOCK		00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+#define EFD_CLOEXEC		02000000
+#endif
+
+static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_FLAG(n) \
+	if (flags & EFD_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~EFD_##n; \
+	}
+
+	P_FLAG(SEMAPHORE);
+	P_FLAG(CLOEXEC);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
diff --git a/trace/beauty/fcntl.c b/trace/beauty/fcntl.c
new file mode 100644
index 0000000..9e8900c
--- /dev/null
+++ b/trace/beauty/fcntl.c
@@ -0,0 +1,100 @@
+/*
+ * trace/beauty/fcntl.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <uapi/linux/fcntl.h>
+
+static size_t fcntl__scnprintf_getfd(unsigned long val, char *bf, size_t size)
+{
+	return scnprintf(bf, size, "%s", val ? "CLOEXEC" : "0");
+}
+
+static size_t syscall_arg__scnprintf_fcntl_getfd(char *bf, size_t size, struct syscall_arg *arg)
+{
+	return fcntl__scnprintf_getfd(arg->val, bf, size);
+}
+
+static size_t fcntl__scnprintf_getlease(unsigned long val, char *bf, size_t size)
+{
+	static const char *fcntl_setlease[] = { "RDLCK", "WRLCK", "UNLCK", };
+	static DEFINE_STRARRAY(fcntl_setlease);
+
+	return strarray__scnprintf(&strarray__fcntl_setlease, bf, size, "%x", val);
+}
+
+static size_t syscall_arg__scnprintf_fcntl_getlease(char *bf, size_t size, struct syscall_arg *arg)
+{
+	return fcntl__scnprintf_getlease(arg->val, bf, size);
+}
+
+size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg)
+{
+	if (arg->val == F_GETFL) {
+		syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_open_flags);
+		goto mask_arg;
+	}
+	if (arg->val == F_GETFD) {
+		syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getfd);
+		goto mask_arg;
+	}
+	if (arg->val == F_DUPFD_CLOEXEC || arg->val == F_DUPFD) {
+		syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fd);
+		goto out;
+	}
+	if (arg->val == F_GETOWN) {
+		syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_pid);
+		goto mask_arg;
+	}
+	if (arg->val == F_GETLEASE) {
+		syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getlease);
+		goto mask_arg;
+	}
+	/*
+	 * Some commands ignore the third fcntl argument, "arg", so mask it
+	 */
+	if (arg->val == F_GET_SEALS ||
+	    arg->val == F_GETSIG) {
+mask_arg:
+		arg->mask |= (1 << 2);
+	}
+out:
+	return syscall_arg__scnprintf_strarrays(bf, size, arg);
+}
+
+size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int cmd = syscall_arg__val(arg, 1);
+
+	if (cmd == F_DUPFD)
+		return syscall_arg__scnprintf_fd(bf, size, arg);
+
+	if (cmd == F_SETFD)
+		return fcntl__scnprintf_getfd(arg->val, bf, size);
+
+	if (cmd == F_SETFL)
+		return open__scnprintf_flags(arg->val, bf, size);
+
+	if (cmd == F_SETOWN)
+		return syscall_arg__scnprintf_pid(bf, size, arg);
+
+	if (cmd == F_SETLEASE)
+		return fcntl__scnprintf_getlease(arg->val, bf, size);
+	/*
+	 * We still don't grab the contents of pointers on entry or exit,
+	 * so just print them as hex numbers
+	 */
+	if (cmd == F_SETLK || cmd == F_SETLKW || cmd == F_GETLK ||
+	    cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW || cmd == F_OFD_GETLK ||
+	    cmd == F_GETOWN_EX || cmd == F_SETOWN_EX ||
+	    cmd == F_GET_RW_HINT || cmd == F_SET_RW_HINT ||
+	    cmd == F_GET_FILE_RW_HINT || cmd == F_SET_FILE_RW_HINT)
+		return syscall_arg__scnprintf_hex(bf, size, arg);
+
+	return syscall_arg__scnprintf_long(bf, size, arg);
+}
diff --git a/trace/beauty/flock.c b/trace/beauty/flock.c
new file mode 100644
index 0000000..c4ff6ad
--- /dev/null
+++ b/trace/beauty/flock.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <uapi/linux/fcntl.h>
+
+#ifndef LOCK_MAND
+#define LOCK_MAND	 32
+#endif
+
+#ifndef LOCK_READ
+#define LOCK_READ	 64
+#endif
+
+#ifndef LOCK_WRITE
+#define LOCK_WRITE	128
+#endif
+
+#ifndef LOCK_RW
+#define LOCK_RW		192
+#endif
+
+size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, op = arg->val;
+
+	if (op == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_CMD(cmd) \
+	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
+		op &= ~LOCK_##cmd; \
+	}
+
+	P_CMD(SH);
+	P_CMD(EX);
+	P_CMD(NB);
+	P_CMD(UN);
+	P_CMD(MAND);
+	P_CMD(RW);
+	P_CMD(READ);
+	P_CMD(WRITE);
+#undef P_OP
+
+	if (op)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
+
+	return printed;
+}
diff --git a/trace/beauty/futex_op.c b/trace/beauty/futex_op.c
new file mode 100644
index 0000000..61850fb
--- /dev/null
+++ b/trace/beauty/futex_op.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/futex.h>
+
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET	  9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET	 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI	 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI	 12
+#endif
+#ifndef FUTEX_CLOCK_REALTIME
+#define FUTEX_CLOCK_REALTIME	256
+#endif
+
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+	enum syscall_futex_args {
+		SCF_UADDR   = (1 << 0),
+		SCF_OP	    = (1 << 1),
+		SCF_VAL	    = (1 << 2),
+		SCF_TIMEOUT = (1 << 3),
+		SCF_UADDR2  = (1 << 4),
+		SCF_VAL3    = (1 << 5),
+	};
+	int op = arg->val;
+	int cmd = op & FUTEX_CMD_MASK;
+	size_t printed = 0;
+
+	switch (cmd) {
+#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
+	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
+	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(WAKE_OP);							  break;
+	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
+	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
+	}
+
+	if (op & FUTEX_PRIVATE_FLAG)
+		printed += scnprintf(bf + printed, size - printed, "|PRIV");
+
+	if (op & FUTEX_CLOCK_REALTIME)
+		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
+
+	return printed;
+}
+
+#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
diff --git a/trace/beauty/futex_val3.c b/trace/beauty/futex_val3.c
new file mode 100644
index 0000000..26f6b32
--- /dev/null
+++ b/trace/beauty/futex_val3.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/futex.h>
+
+#ifndef FUTEX_BITSET_MATCH_ANY
+#define FUTEX_BITSET_MATCH_ANY 0xffffffff
+#endif
+
+static size_t syscall_arg__scnprintf_futex_val3(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned int bitset = arg->val;
+
+	if (bitset == FUTEX_BITSET_MATCH_ANY)
+		return scnprintf(bf, size, "MATCH_ANY");
+
+	return scnprintf(bf, size, "%#xd", bitset);
+}
+
+#define SCA_FUTEX_VAL3  syscall_arg__scnprintf_futex_val3
diff --git a/trace/beauty/ioctl.c b/trace/beauty/ioctl.c
new file mode 100644
index 0000000..1be3b4c
--- /dev/null
+++ b/trace/beauty/ioctl.c
@@ -0,0 +1,162 @@
+/*
+ * trace/beauty/ioctl.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+
+/*
+ * FIXME: to support all arches we have to improve this, for
+ * now, to build on older systems without things like TIOCGEXCL,
+ * get it directly from our copy.
+ *
+ * Right now only x86 is being supported for beautifying ioctl args
+ * in 'perf trace', see tools/perf/trace/beauty/Build and builtin-trace.c
+ */
+#include <uapi/asm-generic/ioctls.h>
+
+static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size)
+{
+	static const char *ioctl_tty_cmd[] = {
+	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
+	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", "TIOCSCTTY",
+	"TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", "TIOCGWINSZ", "TIOCSWINSZ",
+	"TIOCMGET", "TIOCMBIS", "TIOCMBIC", "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR",
+	"FIONREAD", "TIOCLINUX", "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT",
+	"FIONBIO", "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP",
+	[_IOC_NR(TIOCSBRK)] = "TIOCSBRK", "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2",
+	"TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
+	"TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT",
+	"TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER",
+	[_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
+	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
+	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
+	"TIOCMIWAIT", "TIOCGICOUNT", };
+	static DEFINE_STRARRAY(ioctl_tty_cmd);
+
+	if (nr < strarray__ioctl_tty_cmd.nr_entries && strarray__ioctl_tty_cmd.entries[nr] != NULL)
+		return scnprintf(bf, size, "%s", strarray__ioctl_tty_cmd.entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 'T', nr, dir);
+}
+
+static size_t ioctl__scnprintf_drm_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/drm_ioctl_array.c"
+	static DEFINE_STRARRAY(drm_ioctl_cmds);
+
+	if (nr < strarray__drm_ioctl_cmds.nr_entries && strarray__drm_ioctl_cmds.entries[nr] != NULL)
+		return scnprintf(bf, size, "DRM_%s", strarray__drm_ioctl_cmds.entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 'd', nr, dir);
+}
+
+static size_t ioctl__scnprintf_sndrv_pcm_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/sndrv_pcm_ioctl_array.c"
+	static DEFINE_STRARRAY(sndrv_pcm_ioctl_cmds);
+
+	if (nr < strarray__sndrv_pcm_ioctl_cmds.nr_entries && strarray__sndrv_pcm_ioctl_cmds.entries[nr] != NULL)
+		return scnprintf(bf, size, "SNDRV_PCM_%s", strarray__sndrv_pcm_ioctl_cmds.entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 'A', nr, dir);
+}
+
+static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/sndrv_ctl_ioctl_array.c"
+	static DEFINE_STRARRAY(sndrv_ctl_ioctl_cmds);
+
+	if (nr < strarray__sndrv_ctl_ioctl_cmds.nr_entries && strarray__sndrv_ctl_ioctl_cmds.entries[nr] != NULL)
+		return scnprintf(bf, size, "SNDRV_CTL_%s", strarray__sndrv_ctl_ioctl_cmds.entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 'U', nr, dir);
+}
+
+static size_t ioctl__scnprintf_kvm_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/kvm_ioctl_array.c"
+	static DEFINE_STRARRAY(kvm_ioctl_cmds);
+
+	if (nr < strarray__kvm_ioctl_cmds.nr_entries && strarray__kvm_ioctl_cmds.entries[nr] != NULL)
+		return scnprintf(bf, size, "KVM_%s", strarray__kvm_ioctl_cmds.entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir);
+}
+
+static size_t ioctl__scnprintf_vhost_virtio_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c"
+	static DEFINE_STRARRAY(vhost_virtio_ioctl_cmds);
+	static DEFINE_STRARRAY(vhost_virtio_ioctl_read_cmds);
+	struct strarray *s = (dir & _IOC_READ) ? &strarray__vhost_virtio_ioctl_read_cmds : &strarray__vhost_virtio_ioctl_cmds;
+
+	if (nr < s->nr_entries && s->entries[nr] != NULL)
+		return scnprintf(bf, size, "VHOST_%s", s->entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAF, nr, dir);
+}
+
+static size_t ioctl__scnprintf_perf_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/perf_ioctl_array.c"
+	static DEFINE_STRARRAY(perf_ioctl_cmds);
+
+	if (nr < strarray__perf_ioctl_cmds.nr_entries && strarray__perf_ioctl_cmds.entries[nr] != NULL)
+		return scnprintf(bf, size, "PERF_%s", strarray__perf_ioctl_cmds.entries[nr]);
+
+	return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir);
+}
+
+static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size)
+{
+	int dir	 = _IOC_DIR(cmd),
+	    type = _IOC_TYPE(cmd),
+	    nr	 = _IOC_NR(cmd),
+	    sz	 = _IOC_SIZE(cmd);
+	int printed = 0;
+	static const struct ioctl_type {
+		int	type;
+		size_t	(*scnprintf)(int nr, int dir, char *bf, size_t size);
+	} ioctl_types[] = { /* Must be ordered by type */
+			      { .type	= '$', .scnprintf = ioctl__scnprintf_perf_cmd, },
+		['A' - '$'] = { .type	= 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, },
+		['T' - '$'] = { .type	= 'T', .scnprintf = ioctl__scnprintf_tty_cmd, },
+		['U' - '$'] = { .type	= 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, },
+		['d' - '$'] = { .type	= 'd', .scnprintf = ioctl__scnprintf_drm_cmd, },
+		[0xAE - '$'] = { .type	= 0xAE, .scnprintf = ioctl__scnprintf_kvm_cmd, },
+		[0xAF - '$'] = { .type	= 0xAF, .scnprintf = ioctl__scnprintf_vhost_virtio_cmd, },
+	};
+	const int nr_types = ARRAY_SIZE(ioctl_types);
+
+	if (type >= ioctl_types[0].type && type <= ioctl_types[nr_types - 1].type) {
+		const int index = type - ioctl_types[0].type;
+
+		if (ioctl_types[index].scnprintf != NULL)
+			return ioctl_types[index].scnprintf(nr, dir, bf, size);
+	}
+
+	printed += scnprintf(bf + printed, size - printed, "%c", '(');
+
+	if (dir == _IOC_NONE) {
+		printed += scnprintf(bf + printed, size - printed, "%s", "NONE");
+	} else {
+		if (dir & _IOC_READ)
+			printed += scnprintf(bf + printed, size - printed, "%s", "READ");
+		if (dir & _IOC_WRITE)
+			printed += scnprintf(bf + printed, size - printed, "%s%s", dir & _IOC_READ ? "|" : "", "WRITE");
+	}
+
+	return printed + scnprintf(bf + printed, size - printed, ", %#x, %#x, %#x)", type, nr, sz);
+}
+
+size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long cmd = arg->val;
+
+	return ioctl__scnprintf_cmd(cmd, bf, size);
+}
diff --git a/trace/beauty/kcmp.c b/trace/beauty/kcmp.c
new file mode 100644
index 0000000..f62040e
--- /dev/null
+++ b/trace/beauty/kcmp.c
@@ -0,0 +1,44 @@
+/*
+ * trace/beauty/kcmp.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <machine.h>
+#include <uapi/linux/kcmp.h>
+
+#include "trace/beauty/generated/kcmp_type_array.c"
+
+size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long fd = arg->val;
+	int type = syscall_arg__val(arg, 2);
+	pid_t pid;
+
+	if (type != KCMP_FILE)
+		return syscall_arg__scnprintf_long(bf, size, arg);
+
+	pid = syscall_arg__val(arg, arg->idx == 3 ? 0 : 1); /* idx1 -> pid1, idx2 -> pid2 */
+	return pid__scnprintf_fd(arg->trace, pid, fd, bf, size);
+}
+
+static size_t kcmp__scnprintf_type(int type, char *bf, size_t size)
+{
+	static DEFINE_STRARRAY(kcmp_types);
+	return strarray__scnprintf(&strarray__kcmp_types, bf, size, "%d", type);
+}
+
+size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long type = arg->val;
+
+	if (type != KCMP_FILE)
+		arg->mask |= (1 << 3) | (1 << 4); /* Ignore idx1 and idx2 */
+
+	return kcmp__scnprintf_type(type, bf, size);
+}
diff --git a/trace/beauty/kcmp_type.sh b/trace/beauty/kcmp_type.sh
new file mode 100755
index 0000000..40d063b
--- /dev/null
+++ b/trace/beauty/kcmp_type.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+header_dir=$1
+
+printf "static const char *kcmp_types[] = {\n"
+regex='^[[:space:]]+(KCMP_(\w+)),'
+egrep $regex ${header_dir}/kcmp.h | grep -v KCMP_TYPES, | \
+	sed -r "s/$regex/\1 \2/g" | \
+	xargs printf "\t[%s]\t= \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/kvm_ioctl.sh b/trace/beauty/kvm_ioctl.sh
new file mode 100755
index 0000000..bd28817
--- /dev/null
+++ b/trace/beauty/kvm_ioctl.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+kvm_header_dir=$1
+
+printf "static const char *kvm_ioctl_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+KVM_(\w+)[[:space:]]+_IO[RW]*\([[:space:]]*KVMIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
+egrep $regex ${kvm_header_dir}/kvm.h	| \
+	sed -r "s/$regex/\2 \1/g"	| \
+	egrep -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \
+	sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/madvise_behavior.sh b/trace/beauty/madvise_behavior.sh
new file mode 100755
index 0000000..60ef864
--- /dev/null
+++ b/trace/beauty/madvise_behavior.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+header_dir=$1
+
+printf "static const char *madvise_advices[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/mman-common.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/mmap.c b/trace/beauty/mmap.c
new file mode 100644
index 0000000..9f68077
--- /dev/null
+++ b/trace/beauty/mmap.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/mman.h>
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
+					       struct syscall_arg *arg)
+{
+	int printed = 0, prot = arg->val;
+
+	if (prot == PROT_NONE)
+		return scnprintf(bf, size, "NONE");
+#define	P_MMAP_PROT(n) \
+	if (prot & PROT_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		prot &= ~PROT_##n; \
+	}
+
+	P_MMAP_PROT(EXEC);
+	P_MMAP_PROT(READ);
+	P_MMAP_PROT(WRITE);
+	P_MMAP_PROT(SEM);
+	P_MMAP_PROT(GROWSDOWN);
+	P_MMAP_PROT(GROWSUP);
+#undef P_MMAP_PROT
+
+	if (prot)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
+
+	return printed;
+}
+
+#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags & MAP_ANONYMOUS)
+		arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
+
+#define	P_MMAP_FLAG(n) \
+	if (flags & MAP_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MAP_##n; \
+	}
+
+	P_MMAP_FLAG(SHARED);
+	P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
+	P_MMAP_FLAG(32BIT);
+#endif
+	P_MMAP_FLAG(ANONYMOUS);
+	P_MMAP_FLAG(DENYWRITE);
+	P_MMAP_FLAG(EXECUTABLE);
+	P_MMAP_FLAG(FILE);
+	P_MMAP_FLAG(FIXED);
+#ifdef MAP_FIXED_NOREPLACE
+	P_MMAP_FLAG(FIXED_NOREPLACE);
+#endif
+	P_MMAP_FLAG(GROWSDOWN);
+	P_MMAP_FLAG(HUGETLB);
+	P_MMAP_FLAG(LOCKED);
+	P_MMAP_FLAG(NONBLOCK);
+	P_MMAP_FLAG(NORESERVE);
+	P_MMAP_FLAG(POPULATE);
+	P_MMAP_FLAG(STACK);
+	P_MMAP_FLAG(UNINITIALIZED);
+#ifdef MAP_SYNC
+	P_MMAP_FLAG(SYNC);
+#endif
+#undef P_MMAP_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
+
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
+						  struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define P_MREMAP_FLAG(n) \
+	if (flags & MREMAP_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MREMAP_##n; \
+	}
+
+	P_MREMAP_FLAG(MAYMOVE);
+	P_MREMAP_FLAG(FIXED);
+#undef P_MREMAP_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
+
+static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size)
+{
+#include "trace/beauty/generated/madvise_behavior_array.c"
+       static DEFINE_STRARRAY(madvise_advices);
+
+       if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL)
+               return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]);
+
+       return scnprintf(bf, size, "%#", behavior);
+}
+
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
+						      struct syscall_arg *arg)
+{
+	return madvise__scnprintf_behavior(arg->val, bf, size);
+}
+
+#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
diff --git a/trace/beauty/mode_t.c b/trace/beauty/mode_t.c
new file mode 100644
index 0000000..d929ad7
--- /dev/null
+++ b/trace/beauty/mode_t.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* From include/linux/stat.h */
+#ifndef S_IRWXUGO
+#define S_IRWXUGO	(S_IRWXU|S_IRWXG|S_IRWXO)
+#endif
+#ifndef S_IALLUGO
+#define S_IALLUGO	(S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#endif
+#ifndef S_IRUGO
+#define S_IRUGO         (S_IRUSR|S_IRGRP|S_IROTH)
+#endif
+#ifndef S_IWUGO
+#define S_IWUGO         (S_IWUSR|S_IWGRP|S_IWOTH)
+#endif
+#ifndef S_IXUGO
+#define S_IXUGO         (S_IXUSR|S_IXGRP|S_IXOTH)
+#endif
+
+static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, mode = arg->val;
+
+#define	P_MODE(n) \
+	if ((mode & S_##n) == S_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		mode &= ~S_##n; \
+	}
+
+	P_MODE(IALLUGO);
+	P_MODE(IRWXUGO);
+	P_MODE(IRUGO);
+	P_MODE(IWUGO);
+	P_MODE(IXUGO);
+	P_MODE(IFMT);
+	P_MODE(IFSOCK);
+	P_MODE(IFLNK);
+	P_MODE(IFREG);
+	P_MODE(IFBLK);
+	P_MODE(IFDIR);
+	P_MODE(IFCHR);
+	P_MODE(IFIFO);
+	P_MODE(ISUID);
+	P_MODE(ISGID);
+	P_MODE(ISVTX);
+	P_MODE(IRWXU);
+	P_MODE(IRUSR);
+	P_MODE(IWUSR);
+	P_MODE(IXUSR);
+	P_MODE(IRWXG);
+	P_MODE(IRGRP);
+	P_MODE(IWGRP);
+	P_MODE(IXGRP);
+	P_MODE(IRWXO);
+	P_MODE(IROTH);
+	P_MODE(IWOTH);
+	P_MODE(IXOTH);
+#undef P_MODE
+
+	if (mode)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode);
+
+	return printed;
+}
+
+#define SCA_MODE_T syscall_arg__scnprintf_mode_t
diff --git a/trace/beauty/msg_flags.c b/trace/beauty/msg_flags.c
new file mode 100644
index 0000000..c064d6a
--- /dev/null
+++ b/trace/beauty/msg_flags.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef MSG_PROBE
+#define MSG_PROBE		     0x10
+#endif
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE		   0x10000
+#endif
+#ifndef MSG_SENDPAGE_NOTLAST
+#define MSG_SENDPAGE_NOTLAST	   0x20000
+#endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN		0x20000000
+#endif
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC	0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
+					       struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_MSG_FLAG(n) \
+	if (flags & MSG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MSG_##n; \
+	}
+
+	P_MSG_FLAG(OOB);
+	P_MSG_FLAG(PEEK);
+	P_MSG_FLAG(DONTROUTE);
+	P_MSG_FLAG(CTRUNC);
+	P_MSG_FLAG(PROBE);
+	P_MSG_FLAG(TRUNC);
+	P_MSG_FLAG(DONTWAIT);
+	P_MSG_FLAG(EOR);
+	P_MSG_FLAG(WAITALL);
+	P_MSG_FLAG(FIN);
+	P_MSG_FLAG(SYN);
+	P_MSG_FLAG(CONFIRM);
+	P_MSG_FLAG(RST);
+	P_MSG_FLAG(ERRQUEUE);
+	P_MSG_FLAG(NOSIGNAL);
+	P_MSG_FLAG(MORE);
+	P_MSG_FLAG(WAITFORONE);
+	P_MSG_FLAG(SENDPAGE_NOTLAST);
+	P_MSG_FLAG(FASTOPEN);
+	P_MSG_FLAG(CMSG_CLOEXEC);
+#undef P_MSG_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
diff --git a/trace/beauty/open_flags.c b/trace/beauty/open_flags.c
new file mode 100644
index 0000000..6aec617
--- /dev/null
+++ b/trace/beauty/open_flags.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#ifndef O_DIRECT
+#define O_DIRECT	00040000
+#endif
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY	00200000
+#endif
+
+#ifndef O_NOATIME
+#define O_NOATIME	01000000
+#endif
+
+#ifndef O_TMPFILE
+#define O_TMPFILE	020000000
+#endif
+
+#undef O_LARGEFILE
+#define O_LARGEFILE	00100000
+
+size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+	int printed = 0;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "RDONLY");
+#define	P_FLAG(n) \
+	if (flags & O_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~O_##n; \
+	}
+
+	P_FLAG(RDWR);
+	P_FLAG(APPEND);
+	P_FLAG(ASYNC);
+	P_FLAG(CLOEXEC);
+	P_FLAG(CREAT);
+	P_FLAG(DIRECT);
+	P_FLAG(DIRECTORY);
+	P_FLAG(EXCL);
+	P_FLAG(LARGEFILE);
+	P_FLAG(NOFOLLOW);
+	P_FLAG(TMPFILE);
+	P_FLAG(NOATIME);
+	P_FLAG(NOCTTY);
+#ifdef O_NONBLOCK
+	P_FLAG(NONBLOCK);
+#elif O_NDELAY
+	P_FLAG(NDELAY);
+#endif
+#ifdef O_PATH
+	P_FLAG(PATH);
+#endif
+#ifdef O_DSYNC
+	if ((flags & O_SYNC) == O_SYNC)
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
+	else {
+		P_FLAG(DSYNC);
+	}
+#else
+	P_FLAG(SYNC);
+#endif
+	P_FLAG(TRUNC);
+	P_FLAG(WRONLY);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int flags = arg->val;
+
+	if (!(flags & O_CREAT))
+		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
+
+	return open__scnprintf_flags(flags, bf, size);
+}
diff --git a/trace/beauty/perf_event_open.c b/trace/beauty/perf_event_open.c
new file mode 100644
index 0000000..2bafd7c
--- /dev/null
+++ b/trace/beauty/perf_event_open.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
+#endif
+
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return 0;
+
+#define	P_FLAG(n) \
+	if (flags & PERF_FLAG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~PERF_FLAG_##n; \
+	}
+
+	P_FLAG(FD_NO_GROUP);
+	P_FLAG(FD_OUTPUT);
+	P_FLAG(PID_CGROUP);
+	P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
diff --git a/trace/beauty/perf_ioctl.sh b/trace/beauty/perf_ioctl.sh
new file mode 100755
index 0000000..faea423
--- /dev/null
+++ b/trace/beauty/perf_ioctl.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+header_dir=$1
+
+printf "static const char *perf_ioctl_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+PERF_EVENT_IOC_(\w+)[[:space:]]+_IO[RW]*[[:space:]]*\([[:space:]]*.\$.[[:space:]]*,[[:space:]]*([[:digit:]]+).*'
+egrep $regex ${header_dir}/perf_event.h	| \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/pid.c b/trace/beauty/pid.c
new file mode 100644
index 0000000..0313df3
--- /dev/null
+++ b/trace/beauty/pid.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int pid = arg->val;
+	struct trace *trace = arg->trace;
+	size_t printed = scnprintf(bf, size, "%d", pid);
+	struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
+
+	if (thread != NULL) {
+		if (!thread->comm_set)
+			thread__set_comm_from_proc(thread);
+
+		if (thread->comm_set)
+			printed += scnprintf(bf + printed, size - printed,
+					     " (%s)", thread__comm_str(thread));
+		thread__put(thread);
+	}
+
+	return printed;
+}
diff --git a/trace/beauty/pkey_alloc.c b/trace/beauty/pkey_alloc.c
new file mode 100644
index 0000000..2ba784a
--- /dev/null
+++ b/trace/beauty/pkey_alloc.c
@@ -0,0 +1,50 @@
+/*
+ * trace/beauty/pkey_alloc.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+	int i, printed = 0;
+
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+	static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+	if (access_rights == 0) {
+		const char *s = strarray__pkey_alloc_access_rights.entries[0];
+		if (s)
+			return scnprintf(bf, size, "%s", s);
+		return scnprintf(bf, size, "%d", 0);
+	}
+
+	for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
+		int bit = 1 << (i - 1);
+
+		if (!(access_rights & bit))
+			continue;
+
+		if (printed != 0)
+			printed += scnprintf(bf + printed, size - printed, "|");
+
+		if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
+			printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+		else
+			printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
+	}
+
+	return printed;
+}
+
+size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long cmd = arg->val;
+
+	return pkey_alloc__scnprintf_access_rights(cmd, bf, size);
+}
diff --git a/trace/beauty/pkey_alloc_access_rights.sh b/trace/beauty/pkey_alloc_access_rights.sh
new file mode 100755
index 0000000..62e51a0
--- /dev/null
+++ b/trace/beauty/pkey_alloc_access_rights.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+header_dir=$1
+
+printf "static const char *pkey_alloc_access_rights[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*'
+egrep $regex ${header_dir}/mman-common.h	| \
+	sed -r "s/$regex/\2 \2 \1/g"	| \
+	sort | xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/prctl.c b/trace/beauty/prctl.c
new file mode 100644
index 0000000..246130d
--- /dev/null
+++ b/trace/beauty/prctl.c
@@ -0,0 +1,82 @@
+/*
+ * trace/beauty/prctl.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <uapi/linux/prctl.h>
+
+#include "trace/beauty/generated/prctl_option_array.c"
+
+static size_t prctl__scnprintf_option(int option, char *bf, size_t size)
+{
+	static DEFINE_STRARRAY(prctl_options);
+	return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", option);
+}
+
+static size_t prctl__scnprintf_set_mm(int option, char *bf, size_t size)
+{
+	static DEFINE_STRARRAY(prctl_set_mm_options);
+	return strarray__scnprintf(&strarray__prctl_set_mm_options, bf, size, "%d", option);
+}
+
+size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int option = syscall_arg__val(arg, 0);
+
+	if (option == PR_SET_MM)
+		return prctl__scnprintf_set_mm(arg->val, bf, size);
+	/*
+	 * We still don't grab the contents of pointers on entry or exit,
+	 * so just print them as hex numbers
+	 */
+	if (option == PR_SET_NAME)
+		return syscall_arg__scnprintf_hex(bf, size, arg);
+
+	return syscall_arg__scnprintf_long(bf, size, arg);
+}
+
+size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int option = syscall_arg__val(arg, 0);
+
+	if (option == PR_SET_MM)
+		return syscall_arg__scnprintf_hex(bf, size, arg);
+
+	return syscall_arg__scnprintf_long(bf, size, arg);
+}
+
+size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long option = arg->val;
+	enum {
+                SPO_ARG2 = (1 << 1),
+                SPO_ARG3 = (1 << 2),
+                SPO_ARG4 = (1 << 3),
+                SPO_ARG5 = (1 << 4),
+                SPO_ARG6 = (1 << 5),
+        };
+	const u8 all_but2 = SPO_ARG3 | SPO_ARG4 | SPO_ARG5 | SPO_ARG6;
+	const u8 all = SPO_ARG2 | all_but2;
+	const u8 masks[] = {
+		[PR_GET_DUMPABLE]	 = all,
+		[PR_SET_DUMPABLE]	 = all_but2,
+		[PR_SET_NAME]		 = all_but2,
+		[PR_GET_CHILD_SUBREAPER] = all_but2,
+		[PR_SET_CHILD_SUBREAPER] = all_but2,
+		[PR_GET_SECUREBITS]	 = all,
+		[PR_SET_SECUREBITS]	 = all_but2,
+		[PR_SET_MM]		 = SPO_ARG4 | SPO_ARG5 | SPO_ARG6,
+		[PR_GET_PDEATHSIG]	 = all,
+		[PR_SET_PDEATHSIG]	 = all_but2,
+	};
+
+	if (option < ARRAY_SIZE(masks))
+		arg->mask |= masks[option];
+
+	return prctl__scnprintf_option(option, bf, size);
+}
diff --git a/trace/beauty/prctl_option.sh b/trace/beauty/prctl_option.sh
new file mode 100755
index 0000000..0be4138
--- /dev/null
+++ b/trace/beauty/prctl_option.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+header_dir=$1
+
+printf "static const char *prctl_options[] = {\n"
+regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
+egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
+
+printf "static const char *prctl_set_mm_options[] = {\n"
+regex='^#[[:space:]]+define[[:space:]]+PR_SET_MM_(\w+)[[:space:]]*([[:digit:]]+).*'
+egrep $regex ${header_dir}/prctl.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/sched_policy.c b/trace/beauty/sched_policy.c
new file mode 100644
index 0000000..ba5096a
--- /dev/null
+++ b/trace/beauty/sched_policy.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sched.h>
+
+/*
+ * Not defined anywhere else, probably, just to make sure we
+ * catch future flags
+ */
+#define SCHED_POLICY_MASK 0xff
+
+#ifndef SCHED_DEADLINE
+#define SCHED_DEADLINE 6
+#endif
+#ifndef SCHED_RESET_ON_FORK
+#define SCHED_RESET_ON_FORK 0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size,
+						  struct syscall_arg *arg)
+{
+	const char *policies[] = {
+		"NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE",
+	};
+	size_t printed;
+	int policy = arg->val,
+	    flags = policy & ~SCHED_POLICY_MASK;
+
+	policy &= SCHED_POLICY_MASK;
+	if (policy <= SCHED_DEADLINE)
+		printed = scnprintf(bf, size, "%s", policies[policy]);
+	else
+		printed = scnprintf(bf, size, "%#x", policy);
+
+#define	P_POLICY_FLAG(n) \
+	if (flags & SCHED_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+		flags &= ~SCHED_##n; \
+	}
+
+	P_POLICY_FLAG(RESET_ON_FORK);
+#undef P_POLICY_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+	return printed;
+}
+
+#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy
diff --git a/trace/beauty/seccomp.c b/trace/beauty/seccomp.c
new file mode 100644
index 0000000..b7097fd
--- /dev/null
+++ b/trace/beauty/seccomp.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int op = arg->val;
+	size_t printed = 0;
+
+	switch (op) {
+#define	P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
+	P_SECCOMP_SET_MODE_OP(STRICT);
+	P_SECCOMP_SET_MODE_OP(FILTER);
+#undef P_SECCOMP_SET_MODE_OP
+	default: printed = scnprintf(bf, size, "%#x", op);			  break;
+	}
+
+	return printed;
+}
+
+#define SCA_SECCOMP_OP  syscall_arg__scnprintf_seccomp_op
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
+						   struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & SECCOMP_FILTER_FLAG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~SECCOMP_FILTER_FLAG_##n; \
+	}
+
+	P_FLAG(TSYNC);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
diff --git a/trace/beauty/signum.c b/trace/beauty/signum.c
new file mode 100644
index 0000000..bde18a5
--- /dev/null
+++ b/trace/beauty/signum.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+
+static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int sig = arg->val;
+
+	switch (sig) {
+#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
+	P_SIGNUM(HUP);
+	P_SIGNUM(INT);
+	P_SIGNUM(QUIT);
+	P_SIGNUM(ILL);
+	P_SIGNUM(TRAP);
+	P_SIGNUM(ABRT);
+	P_SIGNUM(BUS);
+	P_SIGNUM(FPE);
+	P_SIGNUM(KILL);
+	P_SIGNUM(USR1);
+	P_SIGNUM(SEGV);
+	P_SIGNUM(USR2);
+	P_SIGNUM(PIPE);
+	P_SIGNUM(ALRM);
+	P_SIGNUM(TERM);
+	P_SIGNUM(CHLD);
+	P_SIGNUM(CONT);
+	P_SIGNUM(STOP);
+	P_SIGNUM(TSTP);
+	P_SIGNUM(TTIN);
+	P_SIGNUM(TTOU);
+	P_SIGNUM(URG);
+	P_SIGNUM(XCPU);
+	P_SIGNUM(XFSZ);
+	P_SIGNUM(VTALRM);
+	P_SIGNUM(PROF);
+	P_SIGNUM(WINCH);
+	P_SIGNUM(IO);
+	P_SIGNUM(PWR);
+	P_SIGNUM(SYS);
+#ifdef SIGEMT
+	P_SIGNUM(EMT);
+#endif
+#ifdef SIGSTKFLT
+	P_SIGNUM(STKFLT);
+#endif
+#ifdef SIGSWI
+	P_SIGNUM(SWI);
+#endif
+	default: break;
+	}
+
+	return scnprintf(bf, size, "%#x", sig);
+}
+
+#define SCA_SIGNUM syscall_arg__scnprintf_signum
diff --git a/trace/beauty/sndrv_ctl_ioctl.sh b/trace/beauty/sndrv_ctl_ioctl.sh
new file mode 100755
index 0000000..aad5ab1
--- /dev/null
+++ b/trace/beauty/sndrv_ctl_ioctl.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+sound_header_dir=$1
+
+printf "static const char *sndrv_ctl_ioctl_cmds[] = {\n"
+grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $sound_header_dir/asound.h | \
+	sed -r 's/^#define +SNDRV_CTL_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.U., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
+printf "};\n"
diff --git a/trace/beauty/sndrv_pcm_ioctl.sh b/trace/beauty/sndrv_pcm_ioctl.sh
new file mode 100755
index 0000000..b7e9ef6
--- /dev/null
+++ b/trace/beauty/sndrv_pcm_ioctl.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+sound_header_dir=$1
+
+printf "static const char *sndrv_pcm_ioctl_cmds[] = {\n"
+grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $sound_header_dir/asound.h | \
+	sed -r 's/^#define +SNDRV_PCM_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.A., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
+printf "};\n"
diff --git a/trace/beauty/socket_type.c b/trace/beauty/socket_type.c
new file mode 100644
index 0000000..bca26ae
--- /dev/null
+++ b/trace/beauty/socket_type.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP		6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC		02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK		00004000
+#endif
+
+#ifndef SOCK_TYPE_MASK
+#define SOCK_TYPE_MASK 0xf
+#endif
+
+static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+	size_t printed;
+	int type = arg->val,
+	    flags = type & ~SOCK_TYPE_MASK;
+
+	type &= SOCK_TYPE_MASK;
+	/*
+	 * Can't use a strarray, MIPS may override for ABI reasons.
+	 */
+	switch (type) {
+#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
+	P_SK_TYPE(STREAM);
+	P_SK_TYPE(DGRAM);
+	P_SK_TYPE(RAW);
+	P_SK_TYPE(RDM);
+	P_SK_TYPE(SEQPACKET);
+	P_SK_TYPE(DCCP);
+	P_SK_TYPE(PACKET);
+#undef P_SK_TYPE
+	default:
+		printed = scnprintf(bf, size, "%#x", type);
+	}
+
+#define	P_SK_FLAG(n) \
+	if (flags & SOCK_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+		flags &= ~SOCK_##n; \
+	}
+
+	P_SK_FLAG(CLOEXEC);
+	P_SK_FLAG(NONBLOCK);
+#undef P_SK_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+	return printed;
+}
+
+#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
diff --git a/trace/beauty/statx.c b/trace/beauty/statx.c
new file mode 100644
index 0000000..5643b69
--- /dev/null
+++ b/trace/beauty/statx.c
@@ -0,0 +1,72 @@
+/*
+ * trace/beauty/statx.c
+ *
+ *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <uapi/linux/fcntl.h>
+#include <uapi/linux/stat.h>
+
+size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "SYNC_AS_STAT");
+#define	P_FLAG(n) \
+	if (flags & AT_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~AT_##n; \
+	}
+
+	P_FLAG(SYMLINK_NOFOLLOW);
+	P_FLAG(REMOVEDIR);
+	P_FLAG(SYMLINK_FOLLOW);
+	P_FLAG(NO_AUTOMOUNT);
+	P_FLAG(EMPTY_PATH);
+	P_FLAG(STATX_FORCE_SYNC);
+	P_FLAG(STATX_DONT_SYNC);
+
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & STATX_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~STATX_##n; \
+	}
+
+	P_FLAG(TYPE);
+	P_FLAG(MODE);
+	P_FLAG(NLINK);
+	P_FLAG(UID);
+	P_FLAG(GID);
+	P_FLAG(ATIME);
+	P_FLAG(MTIME);
+	P_FLAG(CTIME);
+	P_FLAG(INO);
+	P_FLAG(SIZE);
+	P_FLAG(BLOCKS);
+	P_FLAG(BTIME);
+
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
diff --git a/trace/beauty/vhost_virtio_ioctl.sh b/trace/beauty/vhost_virtio_ioctl.sh
new file mode 100755
index 0000000..76f1de6
--- /dev/null
+++ b/trace/beauty/vhost_virtio_ioctl.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+vhost_virtio_header_dir=$1
+
+printf "static const char *vhost_virtio_ioctl_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
+egrep $regex ${vhost_virtio_header_dir}/vhost.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
+
+printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
+egrep $regex ${vhost_virtio_header_dir}/vhost.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/trace/beauty/waitid_options.c b/trace/beauty/waitid_options.c
new file mode 100644
index 0000000..8465281
--- /dev/null
+++ b/trace/beauty/waitid_options.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/wait.h>
+
+static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
+						    struct syscall_arg *arg)
+{
+	int printed = 0, options = arg->val;
+
+#define	P_OPTION(n) \
+	if (options & W##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		options &= ~W##n; \
+	}
+
+	P_OPTION(NOHANG);
+	P_OPTION(UNTRACED);
+	P_OPTION(CONTINUED);
+#undef P_OPTION
+
+	if (options)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options);
+
+	return printed;
+}
+
+#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options
diff --git a/trace/strace/groups/file b/trace/strace/groups/file
new file mode 100644
index 0000000..722e25d
--- /dev/null
+++ b/trace/strace/groups/file
@@ -0,0 +1,20 @@
+access
+chmod
+creat
+execve
+faccessat
+getcwd
+lstat
+mkdir
+open
+openat
+quotactl
+read
+readlink
+rename
+rmdir
+stat
+statfs
+symlink
+unlink
+write
diff --git a/ui/Build b/ui/Build
new file mode 100644
index 0000000..0a73538
--- /dev/null
+++ b/ui/Build
@@ -0,0 +1,14 @@
+libperf-y += setup.o
+libperf-y += helpline.o
+libperf-y += progress.o
+libperf-y += util.o
+libperf-y += hist.o
+libperf-y += stdio/hist.o
+
+CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
+
+libperf-$(CONFIG_SLANG) += browser.o
+libperf-$(CONFIG_SLANG) += browsers/
+libperf-$(CONFIG_SLANG) += tui/
+
+CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
diff --git a/ui/browser.c b/ui/browser.c
new file mode 100644
index 0000000..4f75561
--- /dev/null
+++ b/ui/browser.c
@@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../util.h"
+#include "../string2.h"
+#include "../config.h"
+#include "../../perf.h"
+#include "libslang.h"
+#include "ui.h"
+#include "util.h"
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/string.h>
+#include <stdlib.h>
+#include <sys/ttydefaults.h>
+#include "browser.h"
+#include "helpline.h"
+#include "keysyms.h"
+#include "../color.h"
+#include "sane_ctype.h"
+
+static int ui_browser__percent_color(struct ui_browser *browser,
+				     double percent, bool current)
+{
+	if (current && (!browser->use_navkeypressed || browser->navkeypressed))
+		return HE_COLORSET_SELECTED;
+	if (percent >= MIN_RED)
+		return HE_COLORSET_TOP;
+	if (percent >= MIN_GREEN)
+		return HE_COLORSET_MEDIUM;
+	return HE_COLORSET_NORMAL;
+}
+
+int ui_browser__set_color(struct ui_browser *browser, int color)
+{
+	int ret = browser->current_color;
+	browser->current_color = color;
+	SLsmg_set_color(color);
+	return ret;
+}
+
+void ui_browser__set_percent_color(struct ui_browser *browser,
+				   double percent, bool current)
+{
+	 int color = ui_browser__percent_color(browser, percent, current);
+	 ui_browser__set_color(browser, color);
+}
+
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x)
+{
+	SLsmg_gotorc(browser->y + y, browser->x + x);
+}
+
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
+{
+	SLsmg_gotorc(browser->y + y + browser->extra_title_lines, browser->x + x);
+}
+
+void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
+			       unsigned int width)
+{
+	slsmg_write_nstring(msg, width);
+}
+
+void ui_browser__vprintf(struct ui_browser *browser __maybe_unused, const char *fmt, va_list args)
+{
+	slsmg_vprintf(fmt, args);
+}
+
+void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	ui_browser__vprintf(browser, fmt, args);
+	va_end(args);
+}
+
+static struct list_head *
+ui_browser__list_head_filter_entries(struct ui_browser *browser,
+				     struct list_head *pos)
+{
+	do {
+		if (!browser->filter || !browser->filter(browser, pos))
+			return pos;
+		pos = pos->next;
+	} while (pos != browser->entries);
+
+	return NULL;
+}
+
+static struct list_head *
+ui_browser__list_head_filter_prev_entries(struct ui_browser *browser,
+					  struct list_head *pos)
+{
+	do {
+		if (!browser->filter || !browser->filter(browser, pos))
+			return pos;
+		pos = pos->prev;
+	} while (pos != browser->entries);
+
+	return NULL;
+}
+
+void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence)
+{
+	struct list_head *head = browser->entries;
+	struct list_head *pos;
+
+	if (browser->nr_entries == 0)
+		return;
+
+	switch (whence) {
+	case SEEK_SET:
+		pos = ui_browser__list_head_filter_entries(browser, head->next);
+		break;
+	case SEEK_CUR:
+		pos = browser->top;
+		break;
+	case SEEK_END:
+		pos = ui_browser__list_head_filter_prev_entries(browser, head->prev);
+		break;
+	default:
+		return;
+	}
+
+	assert(pos != NULL);
+
+	if (offset > 0) {
+		while (offset-- != 0)
+			pos = ui_browser__list_head_filter_entries(browser, pos->next);
+	} else {
+		while (offset++ != 0)
+			pos = ui_browser__list_head_filter_prev_entries(browser, pos->prev);
+	}
+
+	browser->top = pos;
+}
+
+void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence)
+{
+	struct rb_root *root = browser->entries;
+	struct rb_node *nd;
+
+	switch (whence) {
+	case SEEK_SET:
+		nd = rb_first(root);
+		break;
+	case SEEK_CUR:
+		nd = browser->top;
+		break;
+	case SEEK_END:
+		nd = rb_last(root);
+		break;
+	default:
+		return;
+	}
+
+	if (offset > 0) {
+		while (offset-- != 0)
+			nd = rb_next(nd);
+	} else {
+		while (offset++ != 0)
+			nd = rb_prev(nd);
+	}
+
+	browser->top = nd;
+}
+
+unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser)
+{
+	struct rb_node *nd;
+	int row = 0;
+
+	if (browser->top == NULL)
+                browser->top = rb_first(browser->entries);
+
+	nd = browser->top;
+
+	while (nd != NULL) {
+		ui_browser__gotorc(browser, row, 0);
+		browser->write(browser, nd, row);
+		if (++row == browser->rows)
+			break;
+		nd = rb_next(nd);
+	}
+
+	return row;
+}
+
+bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row)
+{
+	return browser->top_idx + row == browser->index;
+}
+
+void ui_browser__refresh_dimensions(struct ui_browser *browser)
+{
+	browser->width = SLtt_Screen_Cols - 1;
+	browser->height = browser->rows = SLtt_Screen_Rows - 2;
+	browser->rows -= browser->extra_title_lines;
+	browser->y = 1;
+	browser->x = 0;
+}
+
+void ui_browser__handle_resize(struct ui_browser *browser)
+{
+	ui__refresh_dimensions(false);
+	ui_browser__show(browser, browser->title, ui_helpline__current);
+	ui_browser__refresh(browser);
+}
+
+int ui_browser__warning(struct ui_browser *browser, int timeout,
+			const char *format, ...)
+{
+	va_list args;
+	char *text;
+	int key = 0, err;
+
+	va_start(args, format);
+	err = vasprintf(&text, format, args);
+	va_end(args);
+
+	if (err < 0) {
+		va_start(args, format);
+		ui_helpline__vpush(format, args);
+		va_end(args);
+	} else {
+		while ((key = ui__question_window("Warning!", text,
+						   "Press any key...",
+						   timeout)) == K_RESIZE)
+			ui_browser__handle_resize(browser);
+		free(text);
+	}
+
+	return key;
+}
+
+int ui_browser__help_window(struct ui_browser *browser, const char *text)
+{
+	int key;
+
+	while ((key = ui__help_window(text)) == K_RESIZE)
+		ui_browser__handle_resize(browser);
+
+	return key;
+}
+
+bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text)
+{
+	int key;
+
+	while ((key = ui__dialog_yesno(text)) == K_RESIZE)
+		ui_browser__handle_resize(browser);
+
+	return key == K_ENTER || toupper(key) == 'Y';
+}
+
+void ui_browser__reset_index(struct ui_browser *browser)
+{
+	browser->index = browser->top_idx = 0;
+	browser->seek(browser, 0, SEEK_SET);
+}
+
+void __ui_browser__show_title(struct ui_browser *browser, const char *title)
+{
+	SLsmg_gotorc(0, 0);
+	ui_browser__set_color(browser, HE_COLORSET_ROOT);
+	ui_browser__write_nstring(browser, title, browser->width + 1);
+}
+
+void ui_browser__show_title(struct ui_browser *browser, const char *title)
+{
+	pthread_mutex_lock(&ui__lock);
+	__ui_browser__show_title(browser, title);
+	pthread_mutex_unlock(&ui__lock);
+}
+
+int ui_browser__show(struct ui_browser *browser, const char *title,
+		     const char *helpline, ...)
+{
+	int err;
+	va_list ap;
+
+	if (browser->refresh_dimensions == NULL)
+		browser->refresh_dimensions = ui_browser__refresh_dimensions;
+
+	browser->refresh_dimensions(browser);
+
+	pthread_mutex_lock(&ui__lock);
+	__ui_browser__show_title(browser, title);
+
+	browser->title = title;
+	zfree(&browser->helpline);
+
+	va_start(ap, helpline);
+	err = vasprintf(&browser->helpline, helpline, ap);
+	va_end(ap);
+	if (err > 0)
+		ui_helpline__push(browser->helpline);
+	pthread_mutex_unlock(&ui__lock);
+	return err ? 0 : -1;
+}
+
+void ui_browser__hide(struct ui_browser *browser)
+{
+	pthread_mutex_lock(&ui__lock);
+	ui_helpline__pop();
+	zfree(&browser->helpline);
+	pthread_mutex_unlock(&ui__lock);
+}
+
+static void ui_browser__scrollbar_set(struct ui_browser *browser)
+{
+	int height = browser->height, h = 0, pct = 0,
+	    col = browser->width,
+	    row = 0;
+
+	if (browser->nr_entries > 1) {
+		pct = ((browser->index * (browser->height - 1)) /
+		       (browser->nr_entries - 1));
+	}
+
+	SLsmg_set_char_set(1);
+
+	while (h < height) {
+	        ui_browser__gotorc(browser, row++, col);
+		SLsmg_write_char(h == pct ? SLSMG_DIAMOND_CHAR : SLSMG_CKBRD_CHAR);
+		++h;
+	}
+
+	SLsmg_set_char_set(0);
+}
+
+static int __ui_browser__refresh(struct ui_browser *browser)
+{
+	int row;
+	int width = browser->width;
+
+	row = browser->refresh(browser);
+	ui_browser__set_color(browser, HE_COLORSET_NORMAL);
+
+	if (!browser->use_navkeypressed || browser->navkeypressed)
+		ui_browser__scrollbar_set(browser);
+	else
+		width += 1;
+
+	SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x,
+			  browser->rows - row, width, ' ');
+
+	return 0;
+}
+
+int ui_browser__refresh(struct ui_browser *browser)
+{
+	pthread_mutex_lock(&ui__lock);
+	__ui_browser__refresh(browser);
+	pthread_mutex_unlock(&ui__lock);
+
+	return 0;
+}
+
+/*
+ * Here we're updating nr_entries _after_ we started browsing, i.e.  we have to
+ * forget about any reference to any entry in the underlying data structure,
+ * that is why we do a SEEK_SET. Think about 'perf top' in the hists browser
+ * after an output_resort and hist decay.
+ */
+void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries)
+{
+	off_t offset = nr_entries - browser->nr_entries;
+
+	browser->nr_entries = nr_entries;
+
+	if (offset < 0) {
+		if (browser->top_idx < (u64)-offset)
+			offset = -browser->top_idx;
+
+		browser->index += offset;
+		browser->top_idx += offset;
+	}
+
+	browser->top = NULL;
+	browser->seek(browser, browser->top_idx, SEEK_SET);
+}
+
+int ui_browser__run(struct ui_browser *browser, int delay_secs)
+{
+	int err, key;
+
+	while (1) {
+		off_t offset;
+
+		pthread_mutex_lock(&ui__lock);
+		err = __ui_browser__refresh(browser);
+		SLsmg_refresh();
+		pthread_mutex_unlock(&ui__lock);
+		if (err < 0)
+			break;
+
+		key = ui__getch(delay_secs);
+
+		if (key == K_RESIZE) {
+			ui__refresh_dimensions(false);
+			browser->refresh_dimensions(browser);
+			__ui_browser__show_title(browser, browser->title);
+			ui_helpline__puts(browser->helpline);
+			continue;
+		}
+
+		if (browser->use_navkeypressed && !browser->navkeypressed) {
+			if (key == K_DOWN || key == K_UP ||
+			    (browser->columns && (key == K_LEFT || key == K_RIGHT)) ||
+			    key == K_PGDN || key == K_PGUP ||
+			    key == K_HOME || key == K_END ||
+			    key == ' ') {
+				browser->navkeypressed = true;
+				continue;
+			} else
+				return key;
+		}
+
+		switch (key) {
+		case K_DOWN:
+			if (browser->index == browser->nr_entries - 1)
+				break;
+			++browser->index;
+			if (browser->index == browser->top_idx + browser->rows) {
+				++browser->top_idx;
+				browser->seek(browser, +1, SEEK_CUR);
+			}
+			break;
+		case K_UP:
+			if (browser->index == 0)
+				break;
+			--browser->index;
+			if (browser->index < browser->top_idx) {
+				--browser->top_idx;
+				browser->seek(browser, -1, SEEK_CUR);
+			}
+			break;
+		case K_RIGHT:
+			if (!browser->columns)
+				goto out;
+			if (browser->horiz_scroll < browser->columns - 1)
+				++browser->horiz_scroll;
+			break;
+		case K_LEFT:
+			if (!browser->columns)
+				goto out;
+			if (browser->horiz_scroll != 0)
+				--browser->horiz_scroll;
+			break;
+		case K_PGDN:
+		case ' ':
+			if (browser->top_idx + browser->rows > browser->nr_entries - 1)
+				break;
+
+			offset = browser->rows;
+			if (browser->index + offset > browser->nr_entries - 1)
+				offset = browser->nr_entries - 1 - browser->index;
+			browser->index += offset;
+			browser->top_idx += offset;
+			browser->seek(browser, +offset, SEEK_CUR);
+			break;
+		case K_PGUP:
+			if (browser->top_idx == 0)
+				break;
+
+			if (browser->top_idx < browser->rows)
+				offset = browser->top_idx;
+			else
+				offset = browser->rows;
+
+			browser->index -= offset;
+			browser->top_idx -= offset;
+			browser->seek(browser, -offset, SEEK_CUR);
+			break;
+		case K_HOME:
+			ui_browser__reset_index(browser);
+			break;
+		case K_END:
+			offset = browser->rows - 1;
+			if (offset >= browser->nr_entries)
+				offset = browser->nr_entries - 1;
+
+			browser->index = browser->nr_entries - 1;
+			browser->top_idx = browser->index - offset;
+			browser->seek(browser, -offset, SEEK_END);
+			break;
+		default:
+		out:
+			return key;
+		}
+	}
+	return -1;
+}
+
+unsigned int ui_browser__list_head_refresh(struct ui_browser *browser)
+{
+	struct list_head *pos;
+	struct list_head *head = browser->entries;
+	int row = 0;
+
+	if (browser->top == NULL || browser->top == browser->entries)
+                browser->top = ui_browser__list_head_filter_entries(browser, head->next);
+
+	pos = browser->top;
+
+	list_for_each_from(pos, head) {
+		if (!browser->filter || !browser->filter(browser, pos)) {
+			ui_browser__gotorc(browser, row, 0);
+			browser->write(browser, pos, row);
+			if (++row == browser->rows)
+				break;
+		}
+	}
+
+	return row;
+}
+
+static struct ui_browser_colorset {
+	const char *name, *fg, *bg;
+	int colorset;
+} ui_browser__colorsets[] = {
+	{
+		.colorset = HE_COLORSET_TOP,
+		.name	  = "top",
+		.fg	  = "red",
+		.bg	  = "default",
+	},
+	{
+		.colorset = HE_COLORSET_MEDIUM,
+		.name	  = "medium",
+		.fg	  = "green",
+		.bg	  = "default",
+	},
+	{
+		.colorset = HE_COLORSET_NORMAL,
+		.name	  = "normal",
+		.fg	  = "default",
+		.bg	  = "default",
+	},
+	{
+		.colorset = HE_COLORSET_SELECTED,
+		.name	  = "selected",
+		.fg	  = "black",
+		.bg	  = "yellow",
+	},
+	{
+		.colorset = HE_COLORSET_JUMP_ARROWS,
+		.name	  = "jump_arrows",
+		.fg	  = "blue",
+		.bg	  = "default",
+	},
+	{
+		.colorset = HE_COLORSET_ADDR,
+		.name	  = "addr",
+		.fg	  = "magenta",
+		.bg	  = "default",
+	},
+	{
+		.colorset = HE_COLORSET_ROOT,
+		.name	  = "root",
+		.fg	  = "white",
+		.bg	  = "blue",
+	},
+	{
+		.name = NULL,
+	}
+};
+
+
+static int ui_browser__color_config(const char *var, const char *value,
+				    void *data __maybe_unused)
+{
+	char *fg = NULL, *bg;
+	int i;
+
+	/* same dir for all commands */
+	if (!strstarts(var, "colors.") != 0)
+		return 0;
+
+	for (i = 0; ui_browser__colorsets[i].name != NULL; ++i) {
+		const char *name = var + 7;
+
+		if (strcmp(ui_browser__colorsets[i].name, name) != 0)
+			continue;
+
+		fg = strdup(value);
+		if (fg == NULL)
+			break;
+
+		bg = strchr(fg, ',');
+		if (bg == NULL)
+			break;
+
+		*bg = '\0';
+		bg = ltrim(++bg);
+		ui_browser__colorsets[i].bg = bg;
+		ui_browser__colorsets[i].fg = fg;
+		return 0;
+	}
+
+	free(fg);
+	return -1;
+}
+
+void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence)
+{
+	switch (whence) {
+	case SEEK_SET:
+		browser->top = browser->entries;
+		break;
+	case SEEK_CUR:
+		browser->top = browser->top + browser->top_idx + offset;
+		break;
+	case SEEK_END:
+		browser->top = browser->top + browser->nr_entries - 1 + offset;
+		break;
+	default:
+		return;
+	}
+}
+
+unsigned int ui_browser__argv_refresh(struct ui_browser *browser)
+{
+	unsigned int row = 0, idx = browser->top_idx;
+	char **pos;
+
+	if (browser->top == NULL)
+		browser->top = browser->entries;
+
+	pos = (char **)browser->top;
+	while (idx < browser->nr_entries) {
+		if (!browser->filter || !browser->filter(browser, *pos)) {
+			ui_browser__gotorc(browser, row, 0);
+			browser->write(browser, pos, row);
+			if (++row == browser->rows)
+				break;
+		}
+
+		++idx;
+		++pos;
+	}
+
+	return row;
+}
+
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+			 u16 start, u16 end)
+{
+	SLsmg_set_char_set(1);
+	ui_browser__gotorc(browser, start, column);
+	SLsmg_draw_vline(end - start + 1);
+	SLsmg_set_char_set(0);
+}
+
+void ui_browser__write_graph(struct ui_browser *browser __maybe_unused,
+			     int graph)
+{
+	SLsmg_set_char_set(1);
+	SLsmg_write_char(graph);
+	SLsmg_set_char_set(0);
+}
+
+static void __ui_browser__line_arrow_up(struct ui_browser *browser,
+					unsigned int column,
+					u64 start, u64 end)
+{
+	unsigned int row, end_row;
+
+	SLsmg_set_char_set(1);
+
+	if (start < browser->top_idx + browser->rows) {
+		row = start - browser->top_idx;
+		ui_browser__gotorc(browser, row, column);
+		SLsmg_write_char(SLSMG_LLCORN_CHAR);
+		ui_browser__gotorc(browser, row, column + 1);
+		SLsmg_draw_hline(2);
+
+		if (row-- == 0)
+			goto out;
+	} else
+		row = browser->rows - 1;
+
+	if (end > browser->top_idx)
+		end_row = end - browser->top_idx;
+	else
+		end_row = 0;
+
+	ui_browser__gotorc(browser, end_row, column);
+	SLsmg_draw_vline(row - end_row + 1);
+
+	ui_browser__gotorc(browser, end_row, column);
+	if (end >= browser->top_idx) {
+		SLsmg_write_char(SLSMG_ULCORN_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 1);
+		SLsmg_write_char(SLSMG_HLINE_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 2);
+		SLsmg_write_char(SLSMG_RARROW_CHAR);
+	}
+out:
+	SLsmg_set_char_set(0);
+}
+
+static void __ui_browser__line_arrow_down(struct ui_browser *browser,
+					  unsigned int column,
+					  u64 start, u64 end)
+{
+	unsigned int row, end_row;
+
+	SLsmg_set_char_set(1);
+
+	if (start >= browser->top_idx) {
+		row = start - browser->top_idx;
+		ui_browser__gotorc(browser, row, column);
+		SLsmg_write_char(SLSMG_ULCORN_CHAR);
+		ui_browser__gotorc(browser, row, column + 1);
+		SLsmg_draw_hline(2);
+
+		if (++row == 0)
+			goto out;
+	} else
+		row = 0;
+
+	if (end >= browser->top_idx + browser->rows)
+		end_row = browser->rows - 1;
+	else
+		end_row = end - browser->top_idx;
+
+	ui_browser__gotorc(browser, row, column);
+	SLsmg_draw_vline(end_row - row + 1);
+
+	ui_browser__gotorc(browser, end_row, column);
+	if (end < browser->top_idx + browser->rows) {
+		SLsmg_write_char(SLSMG_LLCORN_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 1);
+		SLsmg_write_char(SLSMG_HLINE_CHAR);
+		ui_browser__gotorc(browser, end_row, column + 2);
+		SLsmg_write_char(SLSMG_RARROW_CHAR);
+	}
+out:
+	SLsmg_set_char_set(0);
+}
+
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+			      u64 start, u64 end)
+{
+	if (start > end)
+		__ui_browser__line_arrow_up(browser, column, start, end);
+	else
+		__ui_browser__line_arrow_down(browser, column, start, end);
+}
+
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+			    unsigned int row, bool arrow_down)
+{
+	unsigned int end_row;
+
+	if (row >= browser->top_idx)
+		end_row = row - browser->top_idx;
+	else
+		return;
+
+	SLsmg_set_char_set(1);
+
+	if (arrow_down) {
+		ui_browser__gotorc(browser, end_row, column - 1);
+		SLsmg_write_char(SLSMG_ULCORN_CHAR);
+		ui_browser__gotorc(browser, end_row, column);
+		SLsmg_draw_hline(2);
+		ui_browser__gotorc(browser, end_row + 1, column - 1);
+		SLsmg_write_char(SLSMG_LTEE_CHAR);
+	} else {
+		ui_browser__gotorc(browser, end_row, column - 1);
+		SLsmg_write_char(SLSMG_LTEE_CHAR);
+		ui_browser__gotorc(browser, end_row, column);
+		SLsmg_draw_hline(2);
+	}
+
+	SLsmg_set_char_set(0);
+}
+
+void ui_browser__init(void)
+{
+	int i = 0;
+
+	perf_config(ui_browser__color_config, NULL);
+
+	while (ui_browser__colorsets[i].name) {
+		struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
+		sltt_set_color(c->colorset, c->name, c->fg, c->bg);
+	}
+}
diff --git a/ui/browser.h b/ui/browser.h
new file mode 100644
index 0000000..aa5932e
--- /dev/null
+++ b/ui/browser.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_BROWSER_H_
+#define _PERF_UI_BROWSER_H_ 1
+
+#include <linux/types.h>
+#include <stdarg.h>
+
+#define HE_COLORSET_TOP		50
+#define HE_COLORSET_MEDIUM	51
+#define HE_COLORSET_NORMAL	52
+#define HE_COLORSET_SELECTED	53
+#define HE_COLORSET_JUMP_ARROWS	54
+#define HE_COLORSET_ADDR	55
+#define HE_COLORSET_ROOT	56
+
+struct ui_browser {
+	u64	      index, top_idx;
+	void	      *top, *entries;
+	u16	      y, x, width, height, rows, columns, horiz_scroll;
+	u8	      extra_title_lines;
+	int	      current_color;
+	void	      *priv;
+	const char    *title;
+	char	      *helpline;
+	void 	      (*refresh_dimensions)(struct ui_browser *browser);
+	unsigned int  (*refresh)(struct ui_browser *browser);
+	void	      (*write)(struct ui_browser *browser, void *entry, int row);
+	void	      (*seek)(struct ui_browser *browser, off_t offset, int whence);
+	bool	      (*filter)(struct ui_browser *browser, void *entry);
+	u32	      nr_entries;
+	bool	      navkeypressed;
+	bool	      use_navkeypressed;
+};
+
+int  ui_browser__set_color(struct ui_browser *browser, int color);
+void ui_browser__set_percent_color(struct ui_browser *browser,
+				   double percent, bool current);
+bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row);
+void ui_browser__refresh_dimensions(struct ui_browser *browser);
+void ui_browser__reset_index(struct ui_browser *browser);
+
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x);
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
+void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
+			       unsigned int width);
+void ui_browser__vprintf(struct ui_browser *browser, const char *fmt, va_list args);
+void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
+void ui_browser__write_graph(struct ui_browser *browser, int graph);
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+			      u64 start, u64 end);
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+			    unsigned int row, bool arrow_down);
+void __ui_browser__show_title(struct ui_browser *browser, const char *title);
+void ui_browser__show_title(struct ui_browser *browser, const char *title);
+int ui_browser__show(struct ui_browser *browser, const char *title,
+		     const char *helpline, ...);
+void ui_browser__hide(struct ui_browser *browser);
+int ui_browser__refresh(struct ui_browser *browser);
+int ui_browser__run(struct ui_browser *browser, int delay_secs);
+void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries);
+void ui_browser__handle_resize(struct ui_browser *browser);
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+			 u16 start, u16 end);
+
+int ui_browser__warning(struct ui_browser *browser, int timeout,
+			const char *format, ...);
+int ui_browser__help_window(struct ui_browser *browser, const char *text);
+bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text);
+int ui_browser__input_window(const char *title, const char *text, char *input,
+			     const char *exit_msg, int delay_sec);
+struct perf_env;
+int tui__header_window(struct perf_env *env);
+
+void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
+
+void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser);
+
+void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__list_head_refresh(struct ui_browser *browser);
+
+void ui_browser__init(void);
+#endif /* _PERF_UI_BROWSER_H_ */
diff --git a/ui/browsers/Build b/ui/browsers/Build
new file mode 100644
index 0000000..de223f5
--- /dev/null
+++ b/ui/browsers/Build
@@ -0,0 +1,10 @@
+libperf-y += annotate.o
+libperf-y += hists.o
+libperf-y += map.o
+libperf-y += scripts.o
+libperf-y += header.o
+
+CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
+CFLAGS_hists.o    += -DENABLE_SLFUTURE_CONST
+CFLAGS_map.o      += -DENABLE_SLFUTURE_CONST
+CFLAGS_scripts.o  += -DENABLE_SLFUTURE_CONST
diff --git a/ui/browsers/annotate.c b/ui/browsers/annotate.c
new file mode 100644
index 0000000..3781d74
--- /dev/null
+++ b/ui/browsers/annotate.c
@@ -0,0 +1,880 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/util.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../../util/annotate.h"
+#include "../../util/hist.h"
+#include "../../util/sort.h"
+#include "../../util/symbol.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include <inttypes.h>
+#include <pthread.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <sys/ttydefaults.h>
+
+struct disasm_line_samples {
+	double		      percent;
+	struct sym_hist_entry he;
+};
+
+struct arch;
+
+struct annotate_browser {
+	struct ui_browser	    b;
+	struct rb_root		    entries;
+	struct rb_node		   *curr_hot;
+	struct annotation_line	   *selection;
+	struct arch		   *arch;
+	bool			    searching_backwards;
+	char			    search_bf[128];
+};
+
+static inline struct annotation *browser__annotation(struct ui_browser *browser)
+{
+	struct map_symbol *ms = browser->priv;
+	return symbol__annotation(ms->sym);
+}
+
+static bool disasm_line__filter(struct ui_browser *browser, void *entry)
+{
+	struct annotation *notes = browser__annotation(browser);
+	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+	return annotation_line__filter(al, notes);
+}
+
+static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current)
+{
+	struct annotation *notes = browser__annotation(browser);
+
+	if (current && (!browser->use_navkeypressed || browser->navkeypressed))
+		return HE_COLORSET_SELECTED;
+	if (nr == notes->max_jump_sources)
+		return HE_COLORSET_TOP;
+	if (nr > 1)
+		return HE_COLORSET_MEDIUM;
+	return HE_COLORSET_NORMAL;
+}
+
+static int ui_browser__set_jumps_percent_color(void *browser, int nr, bool current)
+{
+	 int color = ui_browser__jumps_percent_color(browser, nr, current);
+	 return ui_browser__set_color(browser, color);
+}
+
+static int annotate_browser__set_color(void *browser, int color)
+{
+	return ui_browser__set_color(browser, color);
+}
+
+static void annotate_browser__write_graph(void *browser, int graph)
+{
+	ui_browser__write_graph(browser, graph);
+}
+
+static void annotate_browser__set_percent_color(void *browser, double percent, bool current)
+{
+	ui_browser__set_percent_color(browser, percent, current);
+}
+
+static void annotate_browser__printf(void *browser, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	ui_browser__vprintf(browser, fmt, args);
+	va_end(args);
+}
+
+static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
+{
+	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+	struct annotation *notes = browser__annotation(browser);
+	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+	struct annotation_write_ops ops = {
+		.first_line		 = row == 0,
+		.current_entry		 = ui_browser__is_current_entry(browser, row),
+		.change_color		 = (!notes->options->hide_src_code &&
+					    (!ops.current_entry ||
+					     (browser->use_navkeypressed &&
+					      !browser->navkeypressed))),
+		.width			 = browser->width,
+		.obj			 = browser,
+		.set_color		 = annotate_browser__set_color,
+		.set_percent_color	 = annotate_browser__set_percent_color,
+		.set_jumps_percent_color = ui_browser__set_jumps_percent_color,
+		.printf			 = annotate_browser__printf,
+		.write_graph		 = annotate_browser__write_graph,
+	};
+
+	/* The scroll bar isn't being used */
+	if (!browser->navkeypressed)
+		ops.width += 1;
+
+	annotation_line__write(al, notes, &ops);
+
+	if (ops.current_entry)
+		ab->selection = al;
+}
+
+static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
+{
+	struct disasm_line *pos = list_prev_entry(cursor, al.node);
+	const char *name;
+
+	if (!pos)
+		return false;
+
+	if (ins__is_lock(&pos->ins))
+		name = pos->ops.locked.ins.name;
+	else
+		name = pos->ins.name;
+
+	if (!name || !cursor->ins.name)
+		return false;
+
+	return ins__is_fused(ab->arch, name, cursor->ins.name);
+}
+
+static void annotate_browser__draw_current_jump(struct ui_browser *browser)
+{
+	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+	struct disasm_line *cursor = disasm_line(ab->selection);
+	struct annotation_line *target;
+	unsigned int from, to;
+	struct map_symbol *ms = ab->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	u8 pcnt_width = annotation__pcnt_width(notes);
+	int width;
+
+	/* PLT symbols contain external offsets */
+	if (strstr(sym->name, "@plt"))
+		return;
+
+	if (!disasm_line__is_valid_local_jump(cursor, sym))
+		return;
+
+	/*
+	 * This first was seen with a gcc function, _cpp_lex_token, that
+	 * has the usual jumps:
+	 *
+	 *  │1159e6c: ↓ jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 *
+	 * I.e. jumps to a label inside that function (_cpp_lex_token), and
+	 * those works, but also this kind:
+	 *
+	 *  │1159e8b: ↓ jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 *  I.e. jumps to another function, outside _cpp_lex_token, which
+	 *  are not being correctly handled generating as a side effect references
+	 *  to ab->offset[] entries that are set to NULL, so to make this code
+	 *  more robust, check that here.
+	 *
+	 *  A proper fix for will be put in place, looking at the function
+	 *  name right after the '<' token and probably treating this like a
+	 *  'call' instruction.
+	 */
+	target = notes->offsets[cursor->ops.target.offset];
+	if (target == NULL) {
+		ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n",
+				    cursor->ops.target.offset);
+		return;
+	}
+
+	if (notes->options->hide_src_code) {
+		from = cursor->al.idx_asm;
+		to = target->idx_asm;
+	} else {
+		from = (u64)cursor->al.idx;
+		to = (u64)target->idx;
+	}
+
+	width = annotation__cycles_width(notes);
+
+	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
+	__ui_browser__line_arrow(browser,
+				 pcnt_width + 2 + notes->widths.addr + width,
+				 from, to);
+
+	if (is_fused(ab, cursor)) {
+		ui_browser__mark_fused(browser,
+				       pcnt_width + 3 + notes->widths.addr + width,
+				       from - 1,
+				       to > from ? true : false);
+	}
+}
+
+static unsigned int annotate_browser__refresh(struct ui_browser *browser)
+{
+	struct annotation *notes = browser__annotation(browser);
+	int ret = ui_browser__list_head_refresh(browser);
+	int pcnt_width = annotation__pcnt_width(notes);
+
+	if (notes->options->jump_arrows)
+		annotate_browser__draw_current_jump(browser);
+
+	ui_browser__set_color(browser, HE_COLORSET_NORMAL);
+	__ui_browser__vline(browser, pcnt_width, 0, browser->rows - 1);
+	return ret;
+}
+
+static int disasm__cmp(struct annotation_line *a, struct annotation_line *b)
+{
+	int i;
+
+	for (i = 0; i < a->samples_nr; i++) {
+		if (a->samples[i].percent == b->samples[i].percent)
+			continue;
+		return a->samples[i].percent < b->samples[i].percent;
+	}
+	return 0;
+}
+
+static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct annotation_line *l;
+
+	while (*p != NULL) {
+		parent = *p;
+		l = rb_entry(parent, struct annotation_line, rb_node);
+
+		if (disasm__cmp(al, l))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&al->rb_node, parent, p);
+	rb_insert_color(&al->rb_node, root);
+}
+
+static void annotate_browser__set_top(struct annotate_browser *browser,
+				      struct annotation_line *pos, u32 idx)
+{
+	struct annotation *notes = browser__annotation(&browser->b);
+	unsigned back;
+
+	ui_browser__refresh_dimensions(&browser->b);
+	back = browser->b.height / 2;
+	browser->b.top_idx = browser->b.index = idx;
+
+	while (browser->b.top_idx != 0 && back != 0) {
+		pos = list_entry(pos->node.prev, struct annotation_line, node);
+
+		if (annotation_line__filter(pos, notes))
+			continue;
+
+		--browser->b.top_idx;
+		--back;
+	}
+
+	browser->b.top = pos;
+	browser->b.navkeypressed = true;
+}
+
+static void annotate_browser__set_rb_top(struct annotate_browser *browser,
+					 struct rb_node *nd)
+{
+	struct annotation *notes = browser__annotation(&browser->b);
+	struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node);
+	u32 idx = pos->idx;
+
+	if (notes->options->hide_src_code)
+		idx = pos->idx_asm;
+	annotate_browser__set_top(browser, pos, idx);
+	browser->curr_hot = nd;
+}
+
+static void annotate_browser__calc_percent(struct annotate_browser *browser,
+					   struct perf_evsel *evsel)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *pos;
+
+	browser->entries = RB_ROOT;
+
+	pthread_mutex_lock(&notes->lock);
+
+	symbol__calc_percent(sym, evsel);
+
+	list_for_each_entry(pos, &notes->src->source, al.node) {
+		double max_percent = 0.0;
+		int i;
+
+		if (pos->al.offset == -1) {
+			RB_CLEAR_NODE(&pos->al.rb_node);
+			continue;
+		}
+
+		for (i = 0; i < pos->al.samples_nr; i++) {
+			struct annotation_data *sample = &pos->al.samples[i];
+
+			if (max_percent < sample->percent)
+				max_percent = sample->percent;
+		}
+
+		if (max_percent < 0.01 && pos->al.ipc == 0) {
+			RB_CLEAR_NODE(&pos->al.rb_node);
+			continue;
+		}
+		disasm_rb_tree__insert(&browser->entries, &pos->al);
+	}
+	pthread_mutex_unlock(&notes->lock);
+
+	browser->curr_hot = rb_last(&browser->entries);
+}
+
+static bool annotate_browser__toggle_source(struct annotate_browser *browser)
+{
+	struct annotation *notes = browser__annotation(&browser->b);
+	struct annotation_line *al;
+	off_t offset = browser->b.index - browser->b.top_idx;
+
+	browser->b.seek(&browser->b, offset, SEEK_CUR);
+	al = list_entry(browser->b.top, struct annotation_line, node);
+
+	if (notes->options->hide_src_code) {
+		if (al->idx_asm < offset)
+			offset = al->idx;
+
+		browser->b.nr_entries = notes->nr_entries;
+		notes->options->hide_src_code = false;
+		browser->b.seek(&browser->b, -offset, SEEK_CUR);
+		browser->b.top_idx = al->idx - offset;
+		browser->b.index = al->idx;
+	} else {
+		if (al->idx_asm < 0) {
+			ui_helpline__puts("Only available for assembly lines.");
+			browser->b.seek(&browser->b, -offset, SEEK_CUR);
+			return false;
+		}
+
+		if (al->idx_asm < offset)
+			offset = al->idx_asm;
+
+		browser->b.nr_entries = notes->nr_asm_entries;
+		notes->options->hide_src_code = true;
+		browser->b.seek(&browser->b, -offset, SEEK_CUR);
+		browser->b.top_idx = al->idx_asm - offset;
+		browser->b.index = al->idx_asm;
+	}
+
+	return true;
+}
+
+static void ui_browser__init_asm_mode(struct ui_browser *browser)
+{
+	struct annotation *notes = browser__annotation(browser);
+	ui_browser__reset_index(browser);
+	browser->nr_entries = notes->nr_asm_entries;
+}
+
+#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
+
+static int sym_title(struct symbol *sym, struct map *map, char *title,
+		     size_t sz)
+{
+	return snprintf(title, sz, "%s  %s", sym->name, map->dso->long_name);
+}
+
+/*
+ * This can be called from external jumps, i.e. jumps from one functon
+ * to another, like from the kernel's entry_SYSCALL_64 function to the
+ * swapgs_restore_regs_and_return_to_usermode() function.
+ *
+ * So all we check here is that dl->ops.target.sym is set, if it is, just
+ * go to that function and when exiting from its disassembly, come back
+ * to the calling function.
+ */
+static bool annotate_browser__callq(struct annotate_browser *browser,
+				    struct perf_evsel *evsel,
+				    struct hist_browser_timer *hbt)
+{
+	struct map_symbol *ms = browser->b.priv;
+	struct disasm_line *dl = disasm_line(browser->selection);
+	struct annotation *notes;
+	char title[SYM_TITLE_MAX_SIZE];
+
+	if (!dl->ops.target.sym) {
+		ui_helpline__puts("The called function was not found.");
+		return true;
+	}
+
+	notes = symbol__annotation(dl->ops.target.sym);
+	pthread_mutex_lock(&notes->lock);
+
+	if (notes->src == NULL && symbol__alloc_hist(dl->ops.target.sym) < 0) {
+		pthread_mutex_unlock(&notes->lock);
+		ui__warning("Not enough memory for annotating '%s' symbol!\n",
+			    dl->ops.target.sym->name);
+		return true;
+	}
+
+	pthread_mutex_unlock(&notes->lock);
+	symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt);
+	sym_title(ms->sym, ms->map, title, sizeof(title));
+	ui_browser__show_title(&browser->b, title);
+	return true;
+}
+
+static
+struct disasm_line *annotate_browser__find_offset(struct annotate_browser *browser,
+					  s64 offset, s64 *idx)
+{
+	struct annotation *notes = browser__annotation(&browser->b);
+	struct disasm_line *pos;
+
+	*idx = 0;
+	list_for_each_entry(pos, &notes->src->source, al.node) {
+		if (pos->al.offset == offset)
+			return pos;
+		if (!annotation_line__filter(&pos->al, notes))
+			++*idx;
+	}
+
+	return NULL;
+}
+
+static bool annotate_browser__jump(struct annotate_browser *browser,
+				   struct perf_evsel *evsel,
+				   struct hist_browser_timer *hbt)
+{
+	struct disasm_line *dl = disasm_line(browser->selection);
+	u64 offset;
+	s64 idx;
+
+	if (!ins__is_jump(&dl->ins))
+		return false;
+
+	if (dl->ops.target.outside) {
+		annotate_browser__callq(browser, evsel, hbt);
+		return true;
+	}
+
+	offset = dl->ops.target.offset;
+	dl = annotate_browser__find_offset(browser, offset, &idx);
+	if (dl == NULL) {
+		ui_helpline__printf("Invalid jump offset: %" PRIx64, offset);
+		return true;
+	}
+
+	annotate_browser__set_top(browser, &dl->al, idx);
+
+	return true;
+}
+
+static
+struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser,
+					  char *s, s64 *idx)
+{
+	struct annotation *notes = browser__annotation(&browser->b);
+	struct annotation_line *al = browser->selection;
+
+	*idx = browser->b.index;
+	list_for_each_entry_continue(al, &notes->src->source, node) {
+		if (annotation_line__filter(al, notes))
+			continue;
+
+		++*idx;
+
+		if (al->line && strstr(al->line, s) != NULL)
+			return al;
+	}
+
+	return NULL;
+}
+
+static bool __annotate_browser__search(struct annotate_browser *browser)
+{
+	struct annotation_line *al;
+	s64 idx;
+
+	al = annotate_browser__find_string(browser, browser->search_bf, &idx);
+	if (al == NULL) {
+		ui_helpline__puts("String not found!");
+		return false;
+	}
+
+	annotate_browser__set_top(browser, al, idx);
+	browser->searching_backwards = false;
+	return true;
+}
+
+static
+struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser,
+						  char *s, s64 *idx)
+{
+	struct annotation *notes = browser__annotation(&browser->b);
+	struct annotation_line *al = browser->selection;
+
+	*idx = browser->b.index;
+	list_for_each_entry_continue_reverse(al, &notes->src->source, node) {
+		if (annotation_line__filter(al, notes))
+			continue;
+
+		--*idx;
+
+		if (al->line && strstr(al->line, s) != NULL)
+			return al;
+	}
+
+	return NULL;
+}
+
+static bool __annotate_browser__search_reverse(struct annotate_browser *browser)
+{
+	struct annotation_line *al;
+	s64 idx;
+
+	al = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx);
+	if (al == NULL) {
+		ui_helpline__puts("String not found!");
+		return false;
+	}
+
+	annotate_browser__set_top(browser, al, idx);
+	browser->searching_backwards = true;
+	return true;
+}
+
+static bool annotate_browser__search_window(struct annotate_browser *browser,
+					    int delay_secs)
+{
+	if (ui_browser__input_window("Search", "String: ", browser->search_bf,
+				     "ENTER: OK, ESC: Cancel",
+				     delay_secs * 2) != K_ENTER ||
+	    !*browser->search_bf)
+		return false;
+
+	return true;
+}
+
+static bool annotate_browser__search(struct annotate_browser *browser, int delay_secs)
+{
+	if (annotate_browser__search_window(browser, delay_secs))
+		return __annotate_browser__search(browser);
+
+	return false;
+}
+
+static bool annotate_browser__continue_search(struct annotate_browser *browser,
+					      int delay_secs)
+{
+	if (!*browser->search_bf)
+		return annotate_browser__search(browser, delay_secs);
+
+	return __annotate_browser__search(browser);
+}
+
+static bool annotate_browser__search_reverse(struct annotate_browser *browser,
+					   int delay_secs)
+{
+	if (annotate_browser__search_window(browser, delay_secs))
+		return __annotate_browser__search_reverse(browser);
+
+	return false;
+}
+
+static
+bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
+					       int delay_secs)
+{
+	if (!*browser->search_bf)
+		return annotate_browser__search_reverse(browser, delay_secs);
+
+	return __annotate_browser__search_reverse(browser);
+}
+
+static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
+{
+	struct map_symbol *ms = browser->priv;
+	struct symbol *sym = ms->sym;
+	char symbol_dso[SYM_TITLE_MAX_SIZE];
+
+	if (ui_browser__show(browser, title, help) < 0)
+		return -1;
+
+	sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso));
+
+	ui_browser__gotorc_title(browser, 0, 0);
+	ui_browser__set_color(browser, HE_COLORSET_ROOT);
+	ui_browser__write_nstring(browser, symbol_dso, browser->width + 1);
+	return 0;
+}
+
+static int annotate_browser__run(struct annotate_browser *browser,
+				 struct perf_evsel *evsel,
+				 struct hist_browser_timer *hbt)
+{
+	struct rb_node *nd = NULL;
+	struct hists *hists = evsel__hists(evsel);
+	struct map_symbol *ms = browser->b.priv;
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(ms->sym);
+	const char *help = "Press 'h' for help on key bindings";
+	int delay_secs = hbt ? hbt->refresh : 0;
+	char title[256];
+	int key;
+
+	annotation__scnprintf_samples_period(notes, title, sizeof(title), evsel);
+
+	if (annotate_browser__show(&browser->b, title, help) < 0)
+		return -1;
+
+	annotate_browser__calc_percent(browser, evsel);
+
+	if (browser->curr_hot) {
+		annotate_browser__set_rb_top(browser, browser->curr_hot);
+		browser->b.navkeypressed = false;
+	}
+
+	nd = browser->curr_hot;
+
+	while (1) {
+		key = ui_browser__run(&browser->b, delay_secs);
+
+		if (delay_secs != 0) {
+			annotate_browser__calc_percent(browser, evsel);
+			/*
+			 * Current line focus got out of the list of most active
+			 * lines, NULL it so that if TAB|UNTAB is pressed, we
+			 * move to curr_hot (current hottest line).
+			 */
+			if (nd != NULL && RB_EMPTY_NODE(nd))
+				nd = NULL;
+		}
+
+		switch (key) {
+		case K_TIMER:
+			if (hbt)
+				hbt->timer(hbt->arg);
+
+			if (delay_secs != 0) {
+				symbol__annotate_decay_histogram(sym, evsel->idx);
+				hists__scnprintf_title(hists, title, sizeof(title));
+				annotate_browser__show(&browser->b, title, help);
+			}
+			continue;
+		case K_TAB:
+			if (nd != NULL) {
+				nd = rb_prev(nd);
+				if (nd == NULL)
+					nd = rb_last(&browser->entries);
+			} else
+				nd = browser->curr_hot;
+			break;
+		case K_UNTAB:
+			if (nd != NULL) {
+				nd = rb_next(nd);
+				if (nd == NULL)
+					nd = rb_first(&browser->entries);
+			} else
+				nd = browser->curr_hot;
+			break;
+		case K_F1:
+		case 'h':
+			ui_browser__help_window(&browser->b,
+		"UP/DOWN/PGUP\n"
+		"PGDN/SPACE    Navigate\n"
+		"q/ESC/CTRL+C  Exit\n\n"
+		"ENTER         Go to target\n"
+		"ESC           Exit\n"
+		"H             Go to hottest instruction\n"
+		"TAB/shift+TAB Cycle thru hottest instructions\n"
+		"j             Toggle showing jump to target arrows\n"
+		"J             Toggle showing number of jump sources on targets\n"
+		"n             Search next string\n"
+		"o             Toggle disassembler output/simplified view\n"
+		"O             Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
+		"s             Toggle source code view\n"
+		"t             Circulate percent, total period, samples view\n"
+		"/             Search string\n"
+		"k             Toggle line numbers\n"
+		"P             Print to [symbol_name].annotation file.\n"
+		"r             Run available scripts\n"
+		"?             Search string backwards\n");
+			continue;
+		case 'r':
+			{
+				script_browse(NULL);
+				continue;
+			}
+		case 'k':
+			notes->options->show_linenr = !notes->options->show_linenr;
+			break;
+		case 'H':
+			nd = browser->curr_hot;
+			break;
+		case 's':
+			if (annotate_browser__toggle_source(browser))
+				ui_helpline__puts(help);
+			continue;
+		case 'o':
+			notes->options->use_offset = !notes->options->use_offset;
+			annotation__update_column_widths(notes);
+			continue;
+		case 'O':
+			if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+				notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+			continue;
+		case 'j':
+			notes->options->jump_arrows = !notes->options->jump_arrows;
+			continue;
+		case 'J':
+			notes->options->show_nr_jumps = !notes->options->show_nr_jumps;
+			annotation__update_column_widths(notes);
+			continue;
+		case '/':
+			if (annotate_browser__search(browser, delay_secs)) {
+show_help:
+				ui_helpline__puts(help);
+			}
+			continue;
+		case 'n':
+			if (browser->searching_backwards ?
+			    annotate_browser__continue_search_reverse(browser, delay_secs) :
+			    annotate_browser__continue_search(browser, delay_secs))
+				goto show_help;
+			continue;
+		case '?':
+			if (annotate_browser__search_reverse(browser, delay_secs))
+				goto show_help;
+			continue;
+		case 'D': {
+			static int seq;
+			ui_helpline__pop();
+			ui_helpline__fpush("%d: nr_ent=%d, height=%d, idx=%d, top_idx=%d, nr_asm_entries=%d",
+					   seq++, browser->b.nr_entries,
+					   browser->b.height,
+					   browser->b.index,
+					   browser->b.top_idx,
+					   notes->nr_asm_entries);
+		}
+			continue;
+		case K_ENTER:
+		case K_RIGHT:
+		{
+			struct disasm_line *dl = disasm_line(browser->selection);
+
+			if (browser->selection == NULL)
+				ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org");
+			else if (browser->selection->offset == -1)
+				ui_helpline__puts("Actions are only available for assembly lines.");
+			else if (!dl->ins.ops)
+				goto show_sup_ins;
+			else if (ins__is_ret(&dl->ins))
+				goto out;
+			else if (!(annotate_browser__jump(browser, evsel, hbt) ||
+				     annotate_browser__callq(browser, evsel, hbt))) {
+show_sup_ins:
+				ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions.");
+			}
+			continue;
+		}
+		case 'P':
+			map_symbol__annotation_dump(ms, evsel);
+			continue;
+		case 't':
+			if (notes->options->show_total_period) {
+				notes->options->show_total_period = false;
+				notes->options->show_nr_samples = true;
+			} else if (notes->options->show_nr_samples)
+				notes->options->show_nr_samples = false;
+			else
+				notes->options->show_total_period = true;
+			annotation__update_column_widths(notes);
+			continue;
+		case K_LEFT:
+		case K_ESC:
+		case 'q':
+		case CTRL('c'):
+			goto out;
+		default:
+			continue;
+		}
+
+		if (nd != NULL)
+			annotate_browser__set_rb_top(browser, nd);
+	}
+out:
+	ui_browser__hide(&browser->b);
+	return key;
+}
+
+int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt)
+{
+	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
+}
+
+int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt)
+{
+	/* reset abort key so that it can get Ctrl-C as a key */
+	SLang_reset_tty();
+	SLang_init_tty(0, 0, 0);
+
+	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
+}
+
+int symbol__tui_annotate(struct symbol *sym, struct map *map,
+			 struct perf_evsel *evsel,
+			 struct hist_browser_timer *hbt)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct map_symbol ms = {
+		.map = map,
+		.sym = sym,
+	};
+	struct annotate_browser browser = {
+		.b = {
+			.refresh = annotate_browser__refresh,
+			.seek	 = ui_browser__list_head_seek,
+			.write	 = annotate_browser__write,
+			.filter  = disasm_line__filter,
+			.extra_title_lines = 1, /* for hists__scnprintf_title() */
+			.priv	 = &ms,
+			.use_navkeypressed = true,
+		},
+	};
+	int ret = -1, err;
+
+	if (sym == NULL)
+		return -1;
+
+	if (map->dso->annotate_warned)
+		return -1;
+
+	err = symbol__annotate2(sym, map, evsel, &annotation__default_options, &browser.arch);
+	if (err) {
+		char msg[BUFSIZ];
+		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+		goto out_free_offsets;
+	}
+
+	ui_helpline__push("Press ESC to exit");
+
+	browser.b.width = notes->max_line_len;
+	browser.b.nr_entries = notes->nr_entries;
+	browser.b.entries = &notes->src->source,
+	browser.b.width += 18; /* Percentage */
+
+	if (notes->options->hide_src_code)
+		ui_browser__init_asm_mode(&browser.b);
+
+	ret = annotate_browser__run(&browser, evsel, hbt);
+
+	annotated_source__purge(notes->src);
+
+out_free_offsets:
+	zfree(&notes->offsets);
+	return ret;
+}
diff --git a/ui/browsers/header.c b/ui/browsers/header.c
new file mode 100644
index 0000000..d754921
--- /dev/null
+++ b/ui/browsers/header.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/cache.h"
+#include "util/debug.h"
+#include "ui/browser.h"
+#include "ui/keysyms.h"
+#include "ui/ui.h"
+#include "ui/util.h"
+#include "ui/libslang.h"
+#include "util/header.h"
+#include "util/session.h"
+
+#include <sys/ttydefaults.h>
+
+static void ui_browser__argv_write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	char **arg = entry;
+	char *str = *arg;
+	char empty[] = " ";
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+	unsigned long offset = (unsigned long)browser->priv;
+
+	if (offset >= strlen(str))
+		str = empty;
+	else
+		str = str + offset;
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(browser, str, browser->width);
+}
+
+static int list_menu__run(struct ui_browser *menu)
+{
+	int key;
+	unsigned long offset;
+	const char help[] =
+	"h/?/F1        Show this window\n"
+	"UP/DOWN/PGUP\n"
+	"PGDN/SPACE\n"
+	"LEFT/RIGHT    Navigate\n"
+	"q/ESC/CTRL+C  Exit browser";
+
+	if (ui_browser__show(menu, "Header information", "Press 'q' to exit") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(menu, 0);
+
+		switch (key) {
+		case K_RIGHT:
+			offset = (unsigned long)menu->priv;
+			offset += 10;
+			menu->priv = (void *)offset;
+			continue;
+		case K_LEFT:
+			offset = (unsigned long)menu->priv;
+			if (offset >= 10)
+				offset -= 10;
+			menu->priv = (void *)offset;
+			continue;
+		case K_F1:
+		case 'h':
+		case '?':
+			ui_browser__help_window(menu, help);
+			continue;
+		case K_ESC:
+		case 'q':
+		case CTRL('c'):
+			key = -1;
+			break;
+		default:
+			continue;
+		}
+
+		break;
+	}
+
+	ui_browser__hide(menu);
+	return key;
+}
+
+static int ui__list_menu(int argc, char * const argv[])
+{
+	struct ui_browser menu = {
+		.entries    = (void *)argv,
+		.refresh    = ui_browser__argv_refresh,
+		.seek	    = ui_browser__argv_seek,
+		.write	    = ui_browser__argv_write,
+		.nr_entries = argc,
+	};
+
+	return list_menu__run(&menu);
+}
+
+int tui__header_window(struct perf_env *env)
+{
+	int i, argc = 0;
+	char **argv;
+	struct perf_session *session;
+	char *ptr, *pos;
+	size_t size;
+	FILE *fp = open_memstream(&ptr, &size);
+
+	session = container_of(env, struct perf_session, header.env);
+	perf_header__fprintf_info(session, fp, true);
+	fclose(fp);
+
+	for (pos = ptr, argc = 0; (pos = strchr(pos, '\n')) != NULL; pos++)
+		argc++;
+
+	argv = calloc(argc + 1, sizeof(*argv));
+	if (argv == NULL)
+		goto out;
+
+	argv[0] = pos = ptr;
+	for (i = 1; (pos = strchr(pos, '\n')) != NULL; i++) {
+		*pos++ = '\0';
+		argv[i] = pos;
+	}
+
+	BUG_ON(i != argc + 1);
+
+	ui__list_menu(argc, argv);
+
+out:
+	free(argv);
+	free(ptr);
+	return 0;
+}
diff --git a/ui/browsers/hists.c b/ui/browsers/hists.c
new file mode 100644
index 0000000..e5f2472
--- /dev/null
+++ b/ui/browsers/hists.c
@@ -0,0 +1,3289 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/rbtree.h>
+#include <sys/ttydefaults.h>
+
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/hist.h"
+#include "../../util/pstack.h"
+#include "../../util/sort.h"
+#include "../../util/util.h"
+#include "../../util/top.h"
+#include "../../util/thread.h"
+#include "../../arch/common.h"
+
+#include "../browsers/hists.h"
+#include "../helpline.h"
+#include "../util.h"
+#include "../ui.h"
+#include "map.h"
+#include "annotate.h"
+#include "srcline.h"
+#include "string2.h"
+#include "units.h"
+
+#include "sane_ctype.h"
+
+extern void hist_browser__init_hpp(void);
+
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size);
+static void hist_browser__update_nr_entries(struct hist_browser *hb);
+
+static struct rb_node *hists__filter_entries(struct rb_node *nd,
+					     float min_pcnt);
+
+static bool hist_browser__has_filter(struct hist_browser *hb)
+{
+	return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter || hb->c2c_filter;
+}
+
+static int hist_browser__get_folding(struct hist_browser *browser)
+{
+	struct rb_node *nd;
+	struct hists *hists = browser->hists;
+	int unfolded_rows = 0;
+
+	for (nd = rb_first(&hists->entries);
+	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
+	     nd = rb_hierarchy_next(nd)) {
+		struct hist_entry *he =
+			rb_entry(nd, struct hist_entry, rb_node);
+
+		if (he->leaf && he->unfolded)
+			unfolded_rows += he->nr_rows;
+	}
+	return unfolded_rows;
+}
+
+static void hist_browser__set_title_space(struct hist_browser *hb)
+{
+	struct ui_browser *browser = &hb->b;
+	struct hists *hists = hb->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+
+	browser->extra_title_lines = hb->show_headers ? hpp_list->nr_header_lines : 0;
+}
+
+static u32 hist_browser__nr_entries(struct hist_browser *hb)
+{
+	u32 nr_entries;
+
+	if (symbol_conf.report_hierarchy)
+		nr_entries = hb->nr_hierarchy_entries;
+	else if (hist_browser__has_filter(hb))
+		nr_entries = hb->nr_non_filtered_entries;
+	else
+		nr_entries = hb->hists->nr_entries;
+
+	hb->nr_callchain_rows = hist_browser__get_folding(hb);
+	return nr_entries + hb->nr_callchain_rows;
+}
+
+static void hist_browser__update_rows(struct hist_browser *hb)
+{
+	struct ui_browser *browser = &hb->b;
+	struct hists *hists = hb->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	u16 index_row;
+
+	if (!hb->show_headers) {
+		browser->rows += browser->extra_title_lines;
+		browser->extra_title_lines = 0;
+		return;
+	}
+
+	browser->extra_title_lines = hpp_list->nr_header_lines;
+	browser->rows -= browser->extra_title_lines;
+	/*
+	 * Verify if we were at the last line and that line isn't
+	 * visibe because we now show the header line(s).
+	 */
+	index_row = browser->index - browser->top_idx;
+	if (index_row >= browser->rows)
+		browser->index -= index_row - browser->rows + 1;
+}
+
+static void hist_browser__refresh_dimensions(struct ui_browser *browser)
+{
+	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+
+	/* 3 == +/- toggle symbol before actual hist_entry rendering */
+	browser->width = 3 + (hists__sort_list_width(hb->hists) + sizeof("[k]"));
+	/*
+ 	 * FIXME: Just keeping existing behaviour, but this really should be
+ 	 *	  before updating browser->width, as it will invalidate the
+ 	 *	  calculation above. Fix this and the fallout in another
+ 	 *	  changeset.
+ 	 */
+	ui_browser__refresh_dimensions(browser);
+}
+
+static void hist_browser__reset(struct hist_browser *browser)
+{
+	/*
+	 * The hists__remove_entry_filter() already folds non-filtered
+	 * entries so we can assume it has 0 callchain rows.
+	 */
+	browser->nr_callchain_rows = 0;
+
+	hist_browser__update_nr_entries(browser);
+	browser->b.nr_entries = hist_browser__nr_entries(browser);
+	hist_browser__refresh_dimensions(&browser->b);
+	ui_browser__reset_index(&browser->b);
+}
+
+static char tree__folded_sign(bool unfolded)
+{
+	return unfolded ? '-' : '+';
+}
+
+static char hist_entry__folded(const struct hist_entry *he)
+{
+	return he->has_children ? tree__folded_sign(he->unfolded) : ' ';
+}
+
+static char callchain_list__folded(const struct callchain_list *cl)
+{
+	return cl->has_children ? tree__folded_sign(cl->unfolded) : ' ';
+}
+
+static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
+{
+	cl->unfolded = unfold ? cl->has_children : false;
+}
+
+static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
+{
+	int n = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
+		struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
+		struct callchain_list *chain;
+		char folded_sign = ' '; /* No children */
+
+		list_for_each_entry(chain, &child->val, list) {
+			++n;
+
+			/* We need this because we may not have children */
+			folded_sign = callchain_list__folded(chain);
+			if (folded_sign == '+')
+				break;
+		}
+
+		if (folded_sign == '-') /* Have children and they're unfolded */
+			n += callchain_node__count_rows_rb_tree(child);
+	}
+
+	return n;
+}
+
+static int callchain_node__count_flat_rows(struct callchain_node *node)
+{
+	struct callchain_list *chain;
+	char folded_sign = 0;
+	int n = 0;
+
+	list_for_each_entry(chain, &node->parent_val, list) {
+		if (!folded_sign) {
+			/* only check first chain list entry */
+			folded_sign = callchain_list__folded(chain);
+			if (folded_sign == '+')
+				return 1;
+		}
+		n++;
+	}
+
+	list_for_each_entry(chain, &node->val, list) {
+		if (!folded_sign) {
+			/* node->parent_val list might be empty */
+			folded_sign = callchain_list__folded(chain);
+			if (folded_sign == '+')
+				return 1;
+		}
+		n++;
+	}
+
+	return n;
+}
+
+static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
+{
+	return 1;
+}
+
+static int callchain_node__count_rows(struct callchain_node *node)
+{
+	struct callchain_list *chain;
+	bool unfolded = false;
+	int n = 0;
+
+	if (callchain_param.mode == CHAIN_FLAT)
+		return callchain_node__count_flat_rows(node);
+	else if (callchain_param.mode == CHAIN_FOLDED)
+		return callchain_node__count_folded_rows(node);
+
+	list_for_each_entry(chain, &node->val, list) {
+		++n;
+
+		unfolded = chain->unfolded;
+	}
+
+	if (unfolded)
+		n += callchain_node__count_rows_rb_tree(node);
+
+	return n;
+}
+
+static int callchain__count_rows(struct rb_root *chain)
+{
+	struct rb_node *nd;
+	int n = 0;
+
+	for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
+		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+		n += callchain_node__count_rows(node);
+	}
+
+	return n;
+}
+
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+				bool include_children)
+{
+	int count = 0;
+	struct rb_node *node;
+	struct hist_entry *child;
+
+	if (he->leaf)
+		return callchain__count_rows(&he->sorted_chain);
+
+	if (he->has_no_entry)
+		return 1;
+
+	node = rb_first(&he->hroot_out);
+	while (node) {
+		float percent;
+
+		child = rb_entry(node, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+
+		if (!child->filtered && percent >= hb->min_pcnt) {
+			count++;
+
+			if (include_children && child->unfolded)
+				count += hierarchy_count_rows(hb, child, true);
+		}
+
+		node = rb_next(node);
+	}
+	return count;
+}
+
+static bool hist_entry__toggle_fold(struct hist_entry *he)
+{
+	if (!he)
+		return false;
+
+	if (!he->has_children)
+		return false;
+
+	he->unfolded = !he->unfolded;
+	return true;
+}
+
+static bool callchain_list__toggle_fold(struct callchain_list *cl)
+{
+	if (!cl)
+		return false;
+
+	if (!cl->has_children)
+		return false;
+
+	cl->unfolded = !cl->unfolded;
+	return true;
+}
+
+static void callchain_node__init_have_children_rb_tree(struct callchain_node *node)
+{
+	struct rb_node *nd = rb_first(&node->rb_root);
+
+	for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
+		struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
+		struct callchain_list *chain;
+		bool first = true;
+
+		list_for_each_entry(chain, &child->val, list) {
+			if (first) {
+				first = false;
+				chain->has_children = chain->list.next != &child->val ||
+							 !RB_EMPTY_ROOT(&child->rb_root);
+			} else
+				chain->has_children = chain->list.next == &child->val &&
+							 !RB_EMPTY_ROOT(&child->rb_root);
+		}
+
+		callchain_node__init_have_children_rb_tree(child);
+	}
+}
+
+static void callchain_node__init_have_children(struct callchain_node *node,
+					       bool has_sibling)
+{
+	struct callchain_list *chain;
+
+	chain = list_entry(node->val.next, struct callchain_list, list);
+	chain->has_children = has_sibling;
+
+	if (!list_empty(&node->val)) {
+		chain = list_entry(node->val.prev, struct callchain_list, list);
+		chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
+	}
+
+	callchain_node__init_have_children_rb_tree(node);
+}
+
+static void callchain__init_have_children(struct rb_root *root)
+{
+	struct rb_node *nd = rb_first(root);
+	bool has_sibling = nd && rb_next(nd);
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+		callchain_node__init_have_children(node, has_sibling);
+		if (callchain_param.mode == CHAIN_FLAT ||
+		    callchain_param.mode == CHAIN_FOLDED)
+			callchain_node__make_parent_list(node);
+	}
+}
+
+static void hist_entry__init_have_children(struct hist_entry *he)
+{
+	if (he->init_have_children)
+		return;
+
+	if (he->leaf) {
+		he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
+		callchain__init_have_children(&he->sorted_chain);
+	} else {
+		he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
+	}
+
+	he->init_have_children = true;
+}
+
+static bool hist_browser__toggle_fold(struct hist_browser *browser)
+{
+	struct hist_entry *he = browser->he_selection;
+	struct map_symbol *ms = browser->selection;
+	struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
+	bool has_children;
+
+	if (!he || !ms)
+		return false;
+
+	if (ms == &he->ms)
+		has_children = hist_entry__toggle_fold(he);
+	else
+		has_children = callchain_list__toggle_fold(cl);
+
+	if (has_children) {
+		int child_rows = 0;
+
+		hist_entry__init_have_children(he);
+		browser->b.nr_entries -= he->nr_rows;
+
+		if (he->leaf)
+			browser->nr_callchain_rows -= he->nr_rows;
+		else
+			browser->nr_hierarchy_entries -= he->nr_rows;
+
+		if (symbol_conf.report_hierarchy)
+			child_rows = hierarchy_count_rows(browser, he, true);
+
+		if (he->unfolded) {
+			if (he->leaf)
+				he->nr_rows = callchain__count_rows(
+						&he->sorted_chain);
+			else
+				he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+			/* account grand children */
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries += child_rows - he->nr_rows;
+
+			if (!he->leaf && he->nr_rows == 0) {
+				he->has_no_entry = true;
+				he->nr_rows = 1;
+			}
+		} else {
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries -= child_rows - he->nr_rows;
+
+			if (he->has_no_entry)
+				he->has_no_entry = false;
+
+			he->nr_rows = 0;
+		}
+
+		browser->b.nr_entries += he->nr_rows;
+
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
+		else
+			browser->nr_hierarchy_entries += he->nr_rows;
+
+		return true;
+	}
+
+	/* If it doesn't have children, no toggling performed */
+	return false;
+}
+
+static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool unfold)
+{
+	int n = 0;
+	struct rb_node *nd;
+
+	for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
+		struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
+		struct callchain_list *chain;
+		bool has_children = false;
+
+		list_for_each_entry(chain, &child->val, list) {
+			++n;
+			callchain_list__set_folding(chain, unfold);
+			has_children = chain->has_children;
+		}
+
+		if (has_children)
+			n += callchain_node__set_folding_rb_tree(child, unfold);
+	}
+
+	return n;
+}
+
+static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
+{
+	struct callchain_list *chain;
+	bool has_children = false;
+	int n = 0;
+
+	list_for_each_entry(chain, &node->val, list) {
+		++n;
+		callchain_list__set_folding(chain, unfold);
+		has_children = chain->has_children;
+	}
+
+	if (has_children)
+		n += callchain_node__set_folding_rb_tree(node, unfold);
+
+	return n;
+}
+
+static int callchain__set_folding(struct rb_root *chain, bool unfold)
+{
+	struct rb_node *nd;
+	int n = 0;
+
+	for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
+		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+		n += callchain_node__set_folding(node, unfold);
+	}
+
+	return n;
+}
+
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+				 bool unfold __maybe_unused)
+{
+	float percent;
+	struct rb_node *nd;
+	struct hist_entry *child;
+	int n = 0;
+
+	for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+		child = rb_entry(nd, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+		if (!child->filtered && percent >= hb->min_pcnt)
+			n++;
+	}
+
+	return n;
+}
+
+static void __hist_entry__set_folding(struct hist_entry *he,
+				      struct hist_browser *hb, bool unfold)
+{
+	hist_entry__init_have_children(he);
+	he->unfolded = unfold ? he->has_children : false;
+
+	if (he->has_children) {
+		int n;
+
+		if (he->leaf)
+			n = callchain__set_folding(&he->sorted_chain, unfold);
+		else
+			n = hierarchy_set_folding(hb, he, unfold);
+
+		he->nr_rows = unfold ? n : 0;
+	} else
+		he->nr_rows = 0;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+				    struct hist_browser *browser, bool unfold)
+{
+	double percent;
+
+	percent = hist_entry__get_percent_limit(he);
+	if (he->filtered || percent < browser->min_pcnt)
+		return;
+
+	__hist_entry__set_folding(he, browser, unfold);
+
+	if (!he->depth || unfold)
+		browser->nr_hierarchy_entries++;
+	if (he->leaf)
+		browser->nr_callchain_rows += he->nr_rows;
+	else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+		browser->nr_hierarchy_entries++;
+		he->has_no_entry = true;
+		he->nr_rows = 1;
+	} else
+		he->has_no_entry = false;
+}
+
+static void
+__hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+{
+	struct rb_node *nd;
+	struct hist_entry *he;
+
+	nd = rb_first(&browser->hists->entries);
+	while (nd) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		/* set folding state even if it's currently folded */
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		hist_entry__set_folding(he, browser, unfold);
+	}
+}
+
+static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+{
+	browser->nr_hierarchy_entries = 0;
+	browser->nr_callchain_rows = 0;
+	__hist_browser__set_folding(browser, unfold);
+
+	browser->b.nr_entries = hist_browser__nr_entries(browser);
+	/* Go to the start, we may be way after valid entries after a collapse */
+	ui_browser__reset_index(&browser->b);
+}
+
+static void hist_browser__set_folding_selected(struct hist_browser *browser, bool unfold)
+{
+	if (!browser->he_selection)
+		return;
+
+	hist_entry__set_folding(browser->he_selection, browser, unfold);
+	browser->b.nr_entries = hist_browser__nr_entries(browser);
+}
+
+static void ui_browser__warn_lost_events(struct ui_browser *browser)
+{
+	ui_browser__warning(browser, 4,
+		"Events are being lost, check IO/CPU overload!\n\n"
+		"You may want to run 'perf' using a RT scheduler policy:\n\n"
+		" perf top -r 80\n\n"
+		"Or reduce the sampling frequency.");
+}
+
+static int hist_browser__title(struct hist_browser *browser, char *bf, size_t size)
+{
+	return browser->title ? browser->title(browser, bf, size) : 0;
+}
+
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event)
+{
+	int key;
+	char title[160];
+	struct hist_browser_timer *hbt = browser->hbt;
+	int delay_secs = hbt ? hbt->refresh : 0;
+
+	browser->b.entries = &browser->hists->entries;
+	browser->b.nr_entries = hist_browser__nr_entries(browser);
+
+	hist_browser__title(browser, title, sizeof(title));
+
+	if (ui_browser__show(&browser->b, title, "%s", help) < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(&browser->b, delay_secs);
+
+		switch (key) {
+		case K_TIMER: {
+			u64 nr_entries;
+			hbt->timer(hbt->arg);
+
+			if (hist_browser__has_filter(browser) ||
+			    symbol_conf.report_hierarchy)
+				hist_browser__update_nr_entries(browser);
+
+			nr_entries = hist_browser__nr_entries(browser);
+			ui_browser__update_nr_entries(&browser->b, nr_entries);
+
+			if (warn_lost_event &&
+			    (browser->hists->stats.nr_lost_warned !=
+			    browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
+				browser->hists->stats.nr_lost_warned =
+					browser->hists->stats.nr_events[PERF_RECORD_LOST];
+				ui_browser__warn_lost_events(&browser->b);
+			}
+
+			hist_browser__title(browser, title, sizeof(title));
+			ui_browser__show_title(&browser->b, title);
+			continue;
+		}
+		case 'D': { /* Debug */
+			static int seq;
+			struct hist_entry *h = rb_entry(browser->b.top,
+							struct hist_entry, rb_node);
+			ui_helpline__pop();
+			ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
+					   seq++, browser->b.nr_entries,
+					   browser->hists->nr_entries,
+					   browser->b.extra_title_lines,
+					   browser->b.rows,
+					   browser->b.index,
+					   browser->b.top_idx,
+					   h->row_offset, h->nr_rows);
+		}
+			break;
+		case 'C':
+			/* Collapse the whole world. */
+			hist_browser__set_folding(browser, false);
+			break;
+		case 'c':
+			/* Collapse the selected entry. */
+			hist_browser__set_folding_selected(browser, false);
+			break;
+		case 'E':
+			/* Expand the whole world. */
+			hist_browser__set_folding(browser, true);
+			break;
+		case 'e':
+			/* Expand the selected entry. */
+			hist_browser__set_folding_selected(browser, true);
+			break;
+		case 'H':
+			browser->show_headers = !browser->show_headers;
+			hist_browser__update_rows(browser);
+			break;
+		case K_ENTER:
+			if (hist_browser__toggle_fold(browser))
+				break;
+			/* fall thru */
+		default:
+			goto out;
+		}
+	}
+out:
+	ui_browser__hide(&browser->b);
+	return key;
+}
+
+struct callchain_print_arg {
+	/* for hists browser */
+	off_t	row_offset;
+	bool	is_current_entry;
+
+	/* for file dump */
+	FILE	*fp;
+	int	printed;
+};
+
+typedef void (*print_callchain_entry_fn)(struct hist_browser *browser,
+					 struct callchain_list *chain,
+					 const char *str, int offset,
+					 unsigned short row,
+					 struct callchain_print_arg *arg);
+
+static void hist_browser__show_callchain_entry(struct hist_browser *browser,
+					       struct callchain_list *chain,
+					       const char *str, int offset,
+					       unsigned short row,
+					       struct callchain_print_arg *arg)
+{
+	int color, width;
+	char folded_sign = callchain_list__folded(chain);
+	bool show_annotated = browser->show_dso && chain->ms.sym && symbol__annotation(chain->ms.sym)->src;
+
+	color = HE_COLORSET_NORMAL;
+	width = browser->b.width - (offset + 2);
+	if (ui_browser__is_current_entry(&browser->b, row)) {
+		browser->selection = &chain->ms;
+		color = HE_COLORSET_SELECTED;
+		arg->is_current_entry = true;
+	}
+
+	ui_browser__set_color(&browser->b, color);
+	ui_browser__gotorc(&browser->b, row, 0);
+	ui_browser__write_nstring(&browser->b, " ", offset);
+	ui_browser__printf(&browser->b, "%c", folded_sign);
+	ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
+	ui_browser__write_nstring(&browser->b, str, width);
+}
+
+static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused,
+						  struct callchain_list *chain,
+						  const char *str, int offset,
+						  unsigned short row __maybe_unused,
+						  struct callchain_print_arg *arg)
+{
+	char folded_sign = callchain_list__folded(chain);
+
+	arg->printed += fprintf(arg->fp, "%*s%c %s\n", offset, " ",
+				folded_sign, str);
+}
+
+typedef bool (*check_output_full_fn)(struct hist_browser *browser,
+				     unsigned short row);
+
+static bool hist_browser__check_output_full(struct hist_browser *browser,
+					    unsigned short row)
+{
+	return browser->b.rows == row;
+}
+
+static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_unused,
+					  unsigned short row __maybe_unused)
+{
+	return false;
+}
+
+#define LEVEL_OFFSET_STEP 3
+
+static int hist_browser__show_callchain_list(struct hist_browser *browser,
+					     struct callchain_node *node,
+					     struct callchain_list *chain,
+					     unsigned short row, u64 total,
+					     bool need_percent, int offset,
+					     print_callchain_entry_fn print,
+					     struct callchain_print_arg *arg)
+{
+	char bf[1024], *alloc_str;
+	char buf[64], *alloc_str2;
+	const char *str;
+	int ret = 1;
+
+	if (arg->row_offset != 0) {
+		arg->row_offset--;
+		return 0;
+	}
+
+	alloc_str = NULL;
+	alloc_str2 = NULL;
+
+	str = callchain_list__sym_name(chain, bf, sizeof(bf),
+				       browser->show_dso);
+
+	if (symbol_conf.show_branchflag_count) {
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
+
+		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str2;
+	}
+
+	if (need_percent) {
+		callchain_node__scnprintf_value(node, buf, sizeof(buf),
+						total);
+
+		if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str;
+	}
+
+	print(browser, chain, str, offset, row, arg);
+	free(alloc_str);
+	free(alloc_str2);
+
+	return ret;
+}
+
+static bool check_percent_display(struct rb_node *node, u64 parent_total)
+{
+	struct callchain_node *child;
+
+	if (node == NULL)
+		return false;
+
+	if (rb_next(node))
+		return true;
+
+	child = rb_entry(node, struct callchain_node, rb_node);
+	return callchain_cumul_hits(child) != parent_total;
+}
+
+static int hist_browser__show_callchain_flat(struct hist_browser *browser,
+					     struct rb_root *root,
+					     unsigned short row, u64 total,
+					     u64 parent_total,
+					     print_callchain_entry_fn print,
+					     struct callchain_print_arg *arg,
+					     check_output_full_fn is_output_full)
+{
+	struct rb_node *node;
+	int first_row = row, offset = LEVEL_OFFSET_STEP;
+	bool need_percent;
+
+	node = rb_first(root);
+	need_percent = check_percent_display(node, parent_total);
+
+	while (node) {
+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+		struct rb_node *next = rb_next(node);
+		struct callchain_list *chain;
+		char folded_sign = ' ';
+		int first = true;
+		int extra_offset = 0;
+
+		list_for_each_entry(chain, &child->parent_val, list) {
+			bool was_first = first;
+
+			if (first)
+				first = false;
+			else if (need_percent)
+				extra_offset = LEVEL_OFFSET_STEP;
+
+			folded_sign = callchain_list__folded(chain);
+
+			row += hist_browser__show_callchain_list(browser, child,
+							chain, row, total,
+							was_first && need_percent,
+							offset + extra_offset,
+							print, arg);
+
+			if (is_output_full(browser, row))
+				goto out;
+
+			if (folded_sign == '+')
+				goto next;
+		}
+
+		list_for_each_entry(chain, &child->val, list) {
+			bool was_first = first;
+
+			if (first)
+				first = false;
+			else if (need_percent)
+				extra_offset = LEVEL_OFFSET_STEP;
+
+			folded_sign = callchain_list__folded(chain);
+
+			row += hist_browser__show_callchain_list(browser, child,
+							chain, row, total,
+							was_first && need_percent,
+							offset + extra_offset,
+							print, arg);
+
+			if (is_output_full(browser, row))
+				goto out;
+
+			if (folded_sign == '+')
+				break;
+		}
+
+next:
+		if (is_output_full(browser, row))
+			break;
+		node = next;
+	}
+out:
+	return row - first_row;
+}
+
+static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
+						struct callchain_list *chain,
+						char *value_str, char *old_str)
+{
+	char bf[1024];
+	const char *str;
+	char *new;
+
+	str = callchain_list__sym_name(chain, bf, sizeof(bf),
+				       browser->show_dso);
+	if (old_str) {
+		if (asprintf(&new, "%s%s%s", old_str,
+			     symbol_conf.field_sep ?: ";", str) < 0)
+			new = NULL;
+	} else {
+		if (value_str) {
+			if (asprintf(&new, "%s %s", value_str, str) < 0)
+				new = NULL;
+		} else {
+			if (asprintf(&new, "%s", str) < 0)
+				new = NULL;
+		}
+	}
+	return new;
+}
+
+static int hist_browser__show_callchain_folded(struct hist_browser *browser,
+					       struct rb_root *root,
+					       unsigned short row, u64 total,
+					       u64 parent_total,
+					       print_callchain_entry_fn print,
+					       struct callchain_print_arg *arg,
+					       check_output_full_fn is_output_full)
+{
+	struct rb_node *node;
+	int first_row = row, offset = LEVEL_OFFSET_STEP;
+	bool need_percent;
+
+	node = rb_first(root);
+	need_percent = check_percent_display(node, parent_total);
+
+	while (node) {
+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+		struct rb_node *next = rb_next(node);
+		struct callchain_list *chain, *first_chain = NULL;
+		int first = true;
+		char *value_str = NULL, *value_str_alloc = NULL;
+		char *chain_str = NULL, *chain_str_alloc = NULL;
+
+		if (arg->row_offset != 0) {
+			arg->row_offset--;
+			goto next;
+		}
+
+		if (need_percent) {
+			char buf[64];
+
+			callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
+			if (asprintf(&value_str, "%s", buf) < 0) {
+				value_str = (char *)"<...>";
+				goto do_print;
+			}
+			value_str_alloc = value_str;
+		}
+
+		list_for_each_entry(chain, &child->parent_val, list) {
+			chain_str = hist_browser__folded_callchain_str(browser,
+						chain, value_str, chain_str);
+			if (first) {
+				first = false;
+				first_chain = chain;
+			}
+
+			if (chain_str == NULL) {
+				chain_str = (char *)"Not enough memory!";
+				goto do_print;
+			}
+
+			chain_str_alloc = chain_str;
+		}
+
+		list_for_each_entry(chain, &child->val, list) {
+			chain_str = hist_browser__folded_callchain_str(browser,
+						chain, value_str, chain_str);
+			if (first) {
+				first = false;
+				first_chain = chain;
+			}
+
+			if (chain_str == NULL) {
+				chain_str = (char *)"Not enough memory!";
+				goto do_print;
+			}
+
+			chain_str_alloc = chain_str;
+		}
+
+do_print:
+		print(browser, first_chain, chain_str, offset, row++, arg);
+		free(value_str_alloc);
+		free(chain_str_alloc);
+
+next:
+		if (is_output_full(browser, row))
+			break;
+		node = next;
+	}
+
+	return row - first_row;
+}
+
+static int hist_browser__show_callchain_graph(struct hist_browser *browser,
+					struct rb_root *root, int level,
+					unsigned short row, u64 total,
+					u64 parent_total,
+					print_callchain_entry_fn print,
+					struct callchain_print_arg *arg,
+					check_output_full_fn is_output_full)
+{
+	struct rb_node *node;
+	int first_row = row, offset = level * LEVEL_OFFSET_STEP;
+	bool need_percent;
+	u64 percent_total = total;
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		percent_total = parent_total;
+
+	node = rb_first(root);
+	need_percent = check_percent_display(node, parent_total);
+
+	while (node) {
+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+		struct rb_node *next = rb_next(node);
+		struct callchain_list *chain;
+		char folded_sign = ' ';
+		int first = true;
+		int extra_offset = 0;
+
+		list_for_each_entry(chain, &child->val, list) {
+			bool was_first = first;
+
+			if (first)
+				first = false;
+			else if (need_percent)
+				extra_offset = LEVEL_OFFSET_STEP;
+
+			folded_sign = callchain_list__folded(chain);
+
+			row += hist_browser__show_callchain_list(browser, child,
+							chain, row, percent_total,
+							was_first && need_percent,
+							offset + extra_offset,
+							print, arg);
+
+			if (is_output_full(browser, row))
+				goto out;
+
+			if (folded_sign == '+')
+				break;
+		}
+
+		if (folded_sign == '-') {
+			const int new_level = level + (extra_offset ? 2 : 1);
+
+			row += hist_browser__show_callchain_graph(browser, &child->rb_root,
+							    new_level, row, total,
+							    child->children_hit,
+							    print, arg, is_output_full);
+		}
+		if (is_output_full(browser, row))
+			break;
+		node = next;
+	}
+out:
+	return row - first_row;
+}
+
+static int hist_browser__show_callchain(struct hist_browser *browser,
+					struct hist_entry *entry, int level,
+					unsigned short row,
+					print_callchain_entry_fn print,
+					struct callchain_print_arg *arg,
+					check_output_full_fn is_output_full)
+{
+	u64 total = hists__total_period(entry->hists);
+	u64 parent_total;
+	int printed;
+
+	if (symbol_conf.cumulate_callchain)
+		parent_total = entry->stat_acc->period;
+	else
+		parent_total = entry->stat.period;
+
+	if (callchain_param.mode == CHAIN_FLAT) {
+		printed = hist_browser__show_callchain_flat(browser,
+						&entry->sorted_chain, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	} else if (callchain_param.mode == CHAIN_FOLDED) {
+		printed = hist_browser__show_callchain_folded(browser,
+						&entry->sorted_chain, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	} else {
+		printed = hist_browser__show_callchain_graph(browser,
+						&entry->sorted_chain, level, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	}
+
+	if (arg->is_current_entry)
+		browser->he_selection = entry;
+
+	return printed;
+}
+
+struct hpp_arg {
+	struct ui_browser *b;
+	char folded_sign;
+	bool current_entry;
+};
+
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+	struct hpp_arg *arg = hpp->ptr;
+	int ret, len;
+	va_list args;
+	double percent;
+
+	va_start(args, fmt);
+	len = va_arg(args, int);
+	percent = va_arg(args, double);
+	va_end(args);
+
+	ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
+
+	ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
+	ui_browser__printf(arg->b, "%s", hpp->buf);
+
+	return ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field)				\
+static u64 __hpp_get_##_field(struct hist_entry *he)			\
+{									\
+	return he->stat._field;						\
+}									\
+									\
+static int								\
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt,		\
+				struct perf_hpp *hpp,			\
+				struct hist_entry *he)			\
+{									\
+	return hpp__fmt(fmt, hpp, he, __hpp_get_##_field, " %*.2f%%",	\
+			__hpp__slsmg_color_printf, true);		\
+}
+
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)			\
+static u64 __hpp_get_acc_##_field(struct hist_entry *he)		\
+{									\
+	return he->stat_acc->_field;					\
+}									\
+									\
+static int								\
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt,		\
+				struct perf_hpp *hpp,			\
+				struct hist_entry *he)			\
+{									\
+	if (!symbol_conf.cumulate_callchain) {				\
+		struct hpp_arg *arg = hpp->ptr;				\
+		int len = fmt->user_len ?: fmt->len;			\
+		int ret = scnprintf(hpp->buf, hpp->size,		\
+				    "%*s", len, "N/A");			\
+		ui_browser__printf(arg->b, "%s", hpp->buf);		\
+									\
+		return ret;						\
+	}								\
+	return hpp__fmt(fmt, hpp, he, __hpp_get_acc_##_field,		\
+			" %*.2f%%", __hpp__slsmg_color_printf, true);	\
+}
+
+__HPP_COLOR_PERCENT_FN(overhead, period)
+__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
+__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
+__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
+__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
+
+#undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+
+void hist_browser__init_hpp(void)
+{
+	perf_hpp__format[PERF_HPP__OVERHEAD].color =
+				hist_browser__hpp_color_overhead;
+	perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+				hist_browser__hpp_color_overhead_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+				hist_browser__hpp_color_overhead_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+				hist_browser__hpp_color_overhead_guest_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+				hist_browser__hpp_color_overhead_guest_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+				hist_browser__hpp_color_overhead_acc;
+}
+
+static int hist_browser__show_entry(struct hist_browser *browser,
+				    struct hist_entry *entry,
+				    unsigned short row)
+{
+	int printed = 0;
+	int width = browser->b.width;
+	char folded_sign = ' ';
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	off_t row_offset = entry->row_offset;
+	bool first = true;
+	struct perf_hpp_fmt *fmt;
+
+	if (current_entry) {
+		browser->he_selection = entry;
+		browser->selection = &entry->ms;
+	}
+
+	if (symbol_conf.use_callchain) {
+		hist_entry__init_have_children(entry);
+		folded_sign = hist_entry__folded(entry);
+	}
+
+	if (row_offset == 0) {
+		struct hpp_arg arg = {
+			.b		= &browser->b,
+			.folded_sign	= folded_sign,
+			.current_entry	= current_entry,
+		};
+		int column = 0;
+
+		ui_browser__gotorc(&browser->b, row, 0);
+
+		hists__for_each_format(browser->hists, fmt) {
+			char s[2048];
+			struct perf_hpp hpp = {
+				.buf	= s,
+				.size	= sizeof(s),
+				.ptr	= &arg,
+			};
+
+			if (perf_hpp__should_skip(fmt, entry->hists) ||
+			    column++ < browser->b.horiz_scroll)
+				continue;
+
+			if (current_entry && browser->b.navkeypressed) {
+				ui_browser__set_color(&browser->b,
+						      HE_COLORSET_SELECTED);
+			} else {
+				ui_browser__set_color(&browser->b,
+						      HE_COLORSET_NORMAL);
+			}
+
+			if (first) {
+				if (symbol_conf.use_callchain) {
+					ui_browser__printf(&browser->b, "%c ", folded_sign);
+					width -= 2;
+				}
+				first = false;
+			} else {
+				ui_browser__printf(&browser->b, "  ");
+				width -= 2;
+			}
+
+			if (fmt->color) {
+				int ret = fmt->color(fmt, &hpp, entry);
+				hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+				/*
+				 * fmt->color() already used ui_browser to
+				 * print the non alignment bits, skip it (+ret):
+				 */
+				ui_browser__printf(&browser->b, "%s", s + ret);
+			} else {
+				hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry));
+				ui_browser__printf(&browser->b, "%s", s);
+			}
+			width -= hpp.buf - s;
+		}
+
+		/* The scroll bar isn't being used */
+		if (!browser->b.navkeypressed)
+			width += 1;
+
+		ui_browser__write_nstring(&browser->b, "", width);
+
+		++row;
+		++printed;
+	} else
+		--row_offset;
+
+	if (folded_sign == '-' && row != browser->b.rows) {
+		struct callchain_print_arg arg = {
+			.row_offset = row_offset,
+			.is_current_entry = current_entry,
+		};
+
+		printed += hist_browser__show_callchain(browser,
+				entry, 1, row,
+				hist_browser__show_callchain_entry,
+				&arg,
+				hist_browser__check_output_full);
+	}
+
+	return printed;
+}
+
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+					      struct hist_entry *entry,
+					      unsigned short row,
+					      int level)
+{
+	int printed = 0;
+	int width = browser->b.width;
+	char folded_sign = ' ';
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	off_t row_offset = entry->row_offset;
+	bool first = true;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	struct hpp_arg arg = {
+		.b		= &browser->b,
+		.current_entry	= current_entry,
+	};
+	int column = 0;
+	int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+	if (current_entry) {
+		browser->he_selection = entry;
+		browser->selection = &entry->ms;
+	}
+
+	hist_entry__init_have_children(entry);
+	folded_sign = hist_entry__folded(entry);
+	arg.folded_sign = folded_sign;
+
+	if (entry->leaf && row_offset) {
+		row_offset--;
+		goto show_callchain;
+	}
+
+	ui_browser__gotorc(&browser->b, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&entry->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (perf_hpp__should_skip(fmt, entry->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		if (first) {
+			ui_browser__printf(&browser->b, "%c ", folded_sign);
+			width -= 2;
+			first = false;
+		} else {
+			ui_browser__printf(&browser->b, "  ");
+			width -= 2;
+		}
+
+		if (fmt->color) {
+			int ret = fmt->color(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			/*
+			 * fmt->color() already used ui_browser to
+			 * print the non alignment bits, skip it (+ret):
+			 */
+			ui_browser__printf(&browser->b, "%s", s + ret);
+		} else {
+			int ret = fmt->entry(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			ui_browser__printf(&browser->b, "%s", s);
+		}
+		width -= hpp.buf - s;
+	}
+
+	if (!first) {
+		ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+		width -= hierarchy_indent;
+	}
+
+	if (column >= browser->b.horiz_scroll) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
+			if (first) {
+				ui_browser__printf(&browser->b, "%c ", folded_sign);
+				first = false;
+			} else {
+				ui_browser__write_nstring(&browser->b, "", 2);
+			}
+
+			width -= 2;
+
+			/*
+			 * No need to call hist_entry__snprintf_alignment()
+			 * since this fmt is always the last column in the
+			 * hierarchy mode.
+			 */
+			if (fmt->color) {
+				width -= fmt->color(fmt, &hpp, entry);
+			} else {
+				int i = 0;
+
+				width -= fmt->entry(fmt, &hpp, entry);
+				ui_browser__printf(&browser->b, "%s", ltrim(s));
+
+				while (isspace(s[i++]))
+					width++;
+			}
+		}
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+
+	++row;
+	++printed;
+
+show_callchain:
+	if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+		struct callchain_print_arg carg = {
+			.row_offset = row_offset,
+		};
+
+		printed += hist_browser__show_callchain(browser, entry,
+					level + 1, row,
+					hist_browser__show_callchain_entry, &carg,
+					hist_browser__check_output_full);
+	}
+
+	return printed;
+}
+
+static int hist_browser__show_no_entry(struct hist_browser *browser,
+				       unsigned short row, int level)
+{
+	int width = browser->b.width;
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	bool first = true;
+	int column = 0;
+	int ret;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	int indent = browser->hists->nr_hpp_node - 2;
+
+	if (current_entry) {
+		browser->he_selection = NULL;
+		browser->selection = NULL;
+	}
+
+	ui_browser__gotorc(&browser->b, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&browser->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (perf_hpp__should_skip(fmt, browser->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->width(fmt, NULL, browser->hists);
+
+		if (first) {
+			/* for folded sign */
+			first = false;
+			ret++;
+		} else {
+			/* space between columns */
+			ret += 2;
+		}
+
+		ui_browser__write_nstring(&browser->b, "", ret);
+		width -= ret;
+	}
+
+	ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT);
+	width -= indent * HIERARCHY_INDENT;
+
+	if (column >= browser->b.horiz_scroll) {
+		char buf[32];
+
+		ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt);
+		ui_browser__printf(&browser->b, "  %s", buf);
+		width -= ret + 2;
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+	return 1;
+}
+
+static int advance_hpp_check(struct perf_hpp *hpp, int inc)
+{
+	advance_hpp(hpp, inc);
+	return hpp->size <= 0;
+}
+
+static int
+hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
+				 size_t size, int line)
+{
+	struct hists *hists = browser->hists;
+	struct perf_hpp dummy_hpp = {
+		.buf    = buf,
+		.size   = size,
+	};
+	struct perf_hpp_fmt *fmt;
+	size_t ret = 0;
+	int column = 0;
+	int span = 0;
+
+	if (symbol_conf.use_callchain) {
+		ret = scnprintf(buf, size, "  ");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			return ret;
+	}
+
+	hists__for_each_format(browser->hists, fmt) {
+		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->header(fmt, &dummy_hpp, hists, line, &span);
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+
+		if (span)
+			continue;
+
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+	}
+
+	return ret;
+}
+
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+	struct hists *hists = browser->hists;
+	struct perf_hpp dummy_hpp = {
+		.buf    = buf,
+		.size   = size,
+	};
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	size_t ret = 0;
+	int column = 0;
+	int indent = hists->nr_hpp_node - 2;
+	bool first_node, first_col;
+
+	ret = scnprintf(buf, size, "  ");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	first_node = true;
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+
+		first_node = false;
+	}
+
+	if (!first_node) {
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+				indent * HIERARCHY_INDENT, "");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			return ret;
+	}
+
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node) {
+			ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			char *start;
+
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col) {
+				ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+");
+				if (advance_hpp_check(&dummy_hpp, ret))
+					break;
+			}
+			first_col = false;
+
+			ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
+			dummy_hpp.buf[ret] = '\0';
+
+			start = trim(dummy_hpp.buf);
+			ret = strlen(start);
+
+			if (start != dummy_hpp.buf)
+				memmove(dummy_hpp.buf, start, ret + 1);
+
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+	}
+
+	return ret;
+}
+
+static void hists_browser__hierarchy_headers(struct hist_browser *browser)
+{
+	char headers[1024];
+
+	hists_browser__scnprintf_hierarchy_headers(browser, headers,
+						   sizeof(headers));
+
+	ui_browser__gotorc(&browser->b, 0, 0);
+	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+}
+
+static void hists_browser__headers(struct hist_browser *browser)
+{
+	struct hists *hists = browser->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+
+	int line;
+
+	for (line = 0; line < hpp_list->nr_header_lines; line++) {
+		char headers[1024];
+
+		hists_browser__scnprintf_headers(browser, headers,
+						 sizeof(headers), line);
+
+		ui_browser__gotorc_title(&browser->b, line, 0);
+		ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+		ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+	}
+}
+
+static void hist_browser__show_headers(struct hist_browser *browser)
+{
+	if (symbol_conf.report_hierarchy)
+		hists_browser__hierarchy_headers(browser);
+	else
+		hists_browser__headers(browser);
+}
+
+static void ui_browser__hists_init_top(struct ui_browser *browser)
+{
+	if (browser->top == NULL) {
+		struct hist_browser *hb;
+
+		hb = container_of(browser, struct hist_browser, b);
+		browser->top = rb_first(&hb->hists->entries);
+	}
+}
+
+static unsigned int hist_browser__refresh(struct ui_browser *browser)
+{
+	unsigned row = 0;
+	struct rb_node *nd;
+	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+
+	if (hb->show_headers)
+		hist_browser__show_headers(hb);
+
+	ui_browser__hists_init_top(browser);
+	hb->he_selection = NULL;
+	hb->selection = NULL;
+
+	for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		float percent;
+
+		if (h->filtered) {
+			/* let it move to sibling */
+			h->unfolded = false;
+			continue;
+		}
+
+		percent = hist_entry__get_percent_limit(h);
+		if (percent < hb->min_pcnt)
+			continue;
+
+		if (symbol_conf.report_hierarchy) {
+			row += hist_browser__show_hierarchy_entry(hb, h, row,
+								  h->depth);
+			if (row == browser->rows)
+				break;
+
+			if (h->has_no_entry) {
+				hist_browser__show_no_entry(hb, row, h->depth + 1);
+				row++;
+			}
+		} else {
+			row += hist_browser__show_entry(hb, h, row);
+		}
+
+		if (row == browser->rows)
+			break;
+	}
+
+	return row;
+}
+
+static struct rb_node *hists__filter_entries(struct rb_node *nd,
+					     float min_pcnt)
+{
+	while (nd != NULL) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		float percent = hist_entry__get_percent_limit(h);
+
+		if (!h->filtered && percent >= min_pcnt)
+			return nd;
+
+		/*
+		 * If it's filtered, its all children also were filtered.
+		 * So move to sibling node.
+		 */
+		if (rb_next(nd))
+			nd = rb_next(nd);
+		else
+			nd = rb_hierarchy_next(nd);
+	}
+
+	return NULL;
+}
+
+static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
+						  float min_pcnt)
+{
+	while (nd != NULL) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		float percent = hist_entry__get_percent_limit(h);
+
+		if (!h->filtered && percent >= min_pcnt)
+			return nd;
+
+		nd = rb_hierarchy_prev(nd);
+	}
+
+	return NULL;
+}
+
+static void ui_browser__hists_seek(struct ui_browser *browser,
+				   off_t offset, int whence)
+{
+	struct hist_entry *h;
+	struct rb_node *nd;
+	bool first = true;
+	struct hist_browser *hb;
+
+	hb = container_of(browser, struct hist_browser, b);
+
+	if (browser->nr_entries == 0)
+		return;
+
+	ui_browser__hists_init_top(browser);
+
+	switch (whence) {
+	case SEEK_SET:
+		nd = hists__filter_entries(rb_first(browser->entries),
+					   hb->min_pcnt);
+		break;
+	case SEEK_CUR:
+		nd = browser->top;
+		goto do_offset;
+	case SEEK_END:
+		nd = rb_hierarchy_last(rb_last(browser->entries));
+		nd = hists__filter_prev_entries(nd, hb->min_pcnt);
+		first = false;
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * Moves not relative to the first visible entry invalidates its
+	 * row_offset:
+	 */
+	h = rb_entry(browser->top, struct hist_entry, rb_node);
+	h->row_offset = 0;
+
+	/*
+	 * Here we have to check if nd is expanded (+), if it is we can't go
+	 * the next top level hist_entry, instead we must compute an offset of
+	 * what _not_ to show and not change the first visible entry.
+	 *
+	 * This offset increments when we are going from top to bottom and
+	 * decreases when we're going from bottom to top.
+	 *
+	 * As we don't have backpointers to the top level in the callchains
+	 * structure, we need to always print the whole hist_entry callchain,
+	 * skipping the first ones that are before the first visible entry
+	 * and stop when we printed enough lines to fill the screen.
+	 */
+do_offset:
+	if (!nd)
+		return;
+
+	if (offset > 0) {
+		do {
+			h = rb_entry(nd, struct hist_entry, rb_node);
+			if (h->unfolded && h->leaf) {
+				u16 remaining = h->nr_rows - h->row_offset;
+				if (offset > remaining) {
+					offset -= remaining;
+					h->row_offset = 0;
+				} else {
+					h->row_offset += offset;
+					offset = 0;
+					browser->top = nd;
+					break;
+				}
+			}
+			nd = hists__filter_entries(rb_hierarchy_next(nd),
+						   hb->min_pcnt);
+			if (nd == NULL)
+				break;
+			--offset;
+			browser->top = nd;
+		} while (offset != 0);
+	} else if (offset < 0) {
+		while (1) {
+			h = rb_entry(nd, struct hist_entry, rb_node);
+			if (h->unfolded && h->leaf) {
+				if (first) {
+					if (-offset > h->row_offset) {
+						offset += h->row_offset;
+						h->row_offset = 0;
+					} else {
+						h->row_offset += offset;
+						offset = 0;
+						browser->top = nd;
+						break;
+					}
+				} else {
+					if (-offset > h->nr_rows) {
+						offset += h->nr_rows;
+						h->row_offset = 0;
+					} else {
+						h->row_offset = h->nr_rows + offset;
+						offset = 0;
+						browser->top = nd;
+						break;
+					}
+				}
+			}
+
+			nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
+							hb->min_pcnt);
+			if (nd == NULL)
+				break;
+			++offset;
+			browser->top = nd;
+			if (offset == 0) {
+				/*
+				 * Last unfiltered hist_entry, check if it is
+				 * unfolded, if it is then we should have
+				 * row_offset at its last entry.
+				 */
+				h = rb_entry(nd, struct hist_entry, rb_node);
+				if (h->unfolded && h->leaf)
+					h->row_offset = h->nr_rows;
+				break;
+			}
+			first = false;
+		}
+	} else {
+		browser->top = nd;
+		h = rb_entry(nd, struct hist_entry, rb_node);
+		h->row_offset = 0;
+	}
+}
+
+static int hist_browser__fprintf_callchain(struct hist_browser *browser,
+					   struct hist_entry *he, FILE *fp,
+					   int level)
+{
+	struct callchain_print_arg arg  = {
+		.fp = fp,
+	};
+
+	hist_browser__show_callchain(browser, he, level, 0,
+				     hist_browser__fprintf_callchain_entry, &arg,
+				     hist_browser__check_dump_full);
+	return arg.printed;
+}
+
+static int hist_browser__fprintf_entry(struct hist_browser *browser,
+				       struct hist_entry *he, FILE *fp)
+{
+	char s[8192];
+	int printed = 0;
+	char folded_sign = ' ';
+	struct perf_hpp hpp = {
+		.buf = s,
+		.size = sizeof(s),
+	};
+	struct perf_hpp_fmt *fmt;
+	bool first = true;
+	int ret;
+
+	if (symbol_conf.use_callchain) {
+		folded_sign = hist_entry__folded(he);
+		printed += fprintf(fp, "%c ", folded_sign);
+	}
+
+	hists__for_each_format(browser->hists, fmt) {
+		if (perf_hpp__should_skip(fmt, he->hists))
+			continue;
+
+		if (!first) {
+			ret = scnprintf(hpp.buf, hpp.size, "  ");
+			advance_hpp(&hpp, ret);
+		} else
+			first = false;
+
+		ret = fmt->entry(fmt, &hpp, he);
+		ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret);
+		advance_hpp(&hpp, ret);
+	}
+	printed += fprintf(fp, "%s\n", s);
+
+	if (folded_sign == '-')
+		printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+	return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+						 struct hist_entry *he,
+						 FILE *fp, int level)
+{
+	char s[8192];
+	int printed = 0;
+	char folded_sign = ' ';
+	struct perf_hpp hpp = {
+		.buf = s,
+		.size = sizeof(s),
+	};
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	bool first = true;
+	int ret;
+	int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+	printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+	folded_sign = hist_entry__folded(he);
+	printed += fprintf(fp, "%c", folded_sign);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&he->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (!first) {
+			ret = scnprintf(hpp.buf, hpp.size, "  ");
+			advance_hpp(&hpp, ret);
+		} else
+			first = false;
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+	advance_hpp(&hpp, ret);
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		ret = scnprintf(hpp.buf, hpp.size, "  ");
+		advance_hpp(&hpp, ret);
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	printed += fprintf(fp, "%s\n", rtrim(s));
+
+	if (he->leaf && folded_sign == '-') {
+		printed += hist_browser__fprintf_callchain(browser, he, fp,
+							   he->depth + 1);
+	}
+
+	return printed;
+}
+
+static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
+{
+	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
+						   browser->min_pcnt);
+	int printed = 0;
+
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (symbol_conf.report_hierarchy) {
+			printed += hist_browser__fprintf_hierarchy_entry(browser,
+									 h, fp,
+									 h->depth);
+		} else {
+			printed += hist_browser__fprintf_entry(browser, h, fp);
+		}
+
+		nd = hists__filter_entries(rb_hierarchy_next(nd),
+					   browser->min_pcnt);
+	}
+
+	return printed;
+}
+
+static int hist_browser__dump(struct hist_browser *browser)
+{
+	char filename[64];
+	FILE *fp;
+
+	while (1) {
+		scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq);
+		if (access(filename, F_OK))
+			break;
+		/*
+ 		 * XXX: Just an arbitrary lazy upper limit
+ 		 */
+		if (++browser->print_seq == 8192) {
+			ui_helpline__fpush("Too many perf.hist.N files, nothing written!");
+			return -1;
+		}
+	}
+
+	fp = fopen(filename, "w");
+	if (fp == NULL) {
+		char bf[64];
+		const char *err = str_error_r(errno, bf, sizeof(bf));
+		ui_helpline__fpush("Couldn't write to %s: %s", filename, err);
+		return -1;
+	}
+
+	++browser->print_seq;
+	hist_browser__fprintf(browser, fp);
+	fclose(fp);
+	ui_helpline__fpush("%s written!", filename);
+
+	return 0;
+}
+
+void hist_browser__init(struct hist_browser *browser,
+			struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+
+	browser->hists			= hists;
+	browser->b.refresh		= hist_browser__refresh;
+	browser->b.refresh_dimensions	= hist_browser__refresh_dimensions;
+	browser->b.seek			= ui_browser__hists_seek;
+	browser->b.use_navkeypressed	= true;
+	browser->show_headers		= symbol_conf.show_hist_headers;
+	hist_browser__set_title_space(browser);
+
+	if (symbol_conf.report_hierarchy) {
+		struct perf_hpp_list_node *fmt_node;
+
+		/* count overhead columns (in the first node) */
+		fmt_node = list_first_entry(&hists->hpp_formats,
+					    struct perf_hpp_list_node, list);
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+			++browser->b.columns;
+
+		/* add a single column for whole hierarchy sort keys*/
+		++browser->b.columns;
+	} else {
+		hists__for_each_format(hists, fmt)
+			++browser->b.columns;
+	}
+
+	hists__reset_column_width(hists);
+}
+
+struct hist_browser *hist_browser__new(struct hists *hists)
+{
+	struct hist_browser *browser = zalloc(sizeof(*browser));
+
+	if (browser)
+		hist_browser__init(browser, hists);
+
+	return browser;
+}
+
+static struct hist_browser *
+perf_evsel_browser__new(struct perf_evsel *evsel,
+			struct hist_browser_timer *hbt,
+			struct perf_env *env)
+{
+	struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
+
+	if (browser) {
+		browser->hbt   = hbt;
+		browser->env   = env;
+		browser->title = hists_browser__scnprintf_title;
+	}
+	return browser;
+}
+
+void hist_browser__delete(struct hist_browser *browser)
+{
+	free(browser);
+}
+
+static struct hist_entry *hist_browser__selected_entry(struct hist_browser *browser)
+{
+	return browser->he_selection;
+}
+
+static struct thread *hist_browser__selected_thread(struct hist_browser *browser)
+{
+	return browser->he_selection->thread;
+}
+
+/* Check whether the browser is for 'top' or 'report' */
+static inline bool is_report_browser(void *timer)
+{
+	return timer == NULL;
+}
+
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size)
+{
+	struct hist_browser_timer *hbt = browser->hbt;
+	int printed = __hists__scnprintf_title(browser->hists, bf, size, !is_report_browser(hbt));
+
+	if (!is_report_browser(hbt)) {
+		struct perf_top *top = hbt->arg;
+
+		if (top->zero)
+			printed += scnprintf(bf + printed, size - printed, " [z]");
+	}
+
+	return printed;
+}
+
+static inline void free_popup_options(char **options, int n)
+{
+	int i;
+
+	for (i = 0; i < n; ++i)
+		zfree(&options[i]);
+}
+
+/*
+ * Only runtime switching of perf data file will make "input_name" point
+ * to a malloced buffer. So add "is_input_name_malloced" flag to decide
+ * whether we need to call free() for current "input_name" during the switch.
+ */
+static bool is_input_name_malloced = false;
+
+static int switch_data_file(void)
+{
+	char *pwd, *options[32], *abs_path[32], *tmp;
+	DIR *pwd_dir;
+	int nr_options = 0, choice = -1, ret = -1;
+	struct dirent *dent;
+
+	pwd = getenv("PWD");
+	if (!pwd)
+		return ret;
+
+	pwd_dir = opendir(pwd);
+	if (!pwd_dir)
+		return ret;
+
+	memset(options, 0, sizeof(options));
+	memset(abs_path, 0, sizeof(abs_path));
+
+	while ((dent = readdir(pwd_dir))) {
+		char path[PATH_MAX];
+		u64 magic;
+		char *name = dent->d_name;
+		FILE *file;
+
+		if (!(dent->d_type == DT_REG))
+			continue;
+
+		snprintf(path, sizeof(path), "%s/%s", pwd, name);
+
+		file = fopen(path, "r");
+		if (!file)
+			continue;
+
+		if (fread(&magic, 1, 8, file) < 8)
+			goto close_file_and_continue;
+
+		if (is_perf_magic(magic)) {
+			options[nr_options] = strdup(name);
+			if (!options[nr_options])
+				goto close_file_and_continue;
+
+			abs_path[nr_options] = strdup(path);
+			if (!abs_path[nr_options]) {
+				zfree(&options[nr_options]);
+				ui__warning("Can't search all data files due to memory shortage.\n");
+				fclose(file);
+				break;
+			}
+
+			nr_options++;
+		}
+
+close_file_and_continue:
+		fclose(file);
+		if (nr_options >= 32) {
+			ui__warning("Too many perf data files in PWD!\n"
+				    "Only the first 32 files will be listed.\n");
+			break;
+		}
+	}
+	closedir(pwd_dir);
+
+	if (nr_options) {
+		choice = ui__popup_menu(nr_options, options);
+		if (choice < nr_options && choice >= 0) {
+			tmp = strdup(abs_path[choice]);
+			if (tmp) {
+				if (is_input_name_malloced)
+					free((void *)input_name);
+				input_name = tmp;
+				is_input_name_malloced = true;
+				ret = 0;
+			} else
+				ui__warning("Data switch failed due to memory shortage!\n");
+		}
+	}
+
+	free_popup_options(options, nr_options);
+	free_popup_options(abs_path, nr_options);
+	return ret;
+}
+
+struct popup_action {
+	struct thread 		*thread;
+	struct map_symbol 	ms;
+	int			socket;
+
+	int (*fn)(struct hist_browser *browser, struct popup_action *act);
+};
+
+static int
+do_annotate(struct hist_browser *browser, struct popup_action *act)
+{
+	struct perf_evsel *evsel;
+	struct annotation *notes;
+	struct hist_entry *he;
+	int err;
+
+	if (!objdump_path && perf_env__lookup_objdump(browser->env))
+		return 0;
+
+	notes = symbol__annotation(act->ms.sym);
+	if (!notes->src)
+		return 0;
+
+	evsel = hists_to_evsel(browser->hists);
+	err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
+	he = hist_browser__selected_entry(browser);
+	/*
+	 * offer option to annotate the other branch source or target
+	 * (if they exists) when returning from annotate
+	 */
+	if ((err == 'q' || err == CTRL('c')) && he->branch_info)
+		return 1;
+
+	ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
+	if (err)
+		ui_browser__handle_resize(&browser->b);
+	return 0;
+}
+
+static int
+add_annotate_opt(struct hist_browser *browser __maybe_unused,
+		 struct popup_action *act, char **optstr,
+		 struct map *map, struct symbol *sym)
+{
+	if (sym == NULL || map->dso->annotate_warned)
+		return 0;
+
+	if (asprintf(optstr, "Annotate %s", sym->name) < 0)
+		return 0;
+
+	act->ms.map = map;
+	act->ms.sym = sym;
+	act->fn = do_annotate;
+	return 1;
+}
+
+static int
+do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
+{
+	struct thread *thread = act->thread;
+
+	if ((!hists__has(browser->hists, thread) &&
+	     !hists__has(browser->hists, comm)) || thread == NULL)
+		return 0;
+
+	if (browser->hists->thread_filter) {
+		pstack__remove(browser->pstack, &browser->hists->thread_filter);
+		perf_hpp__set_elide(HISTC_THREAD, false);
+		thread__zput(browser->hists->thread_filter);
+		ui_helpline__pop();
+	} else {
+		if (hists__has(browser->hists, thread)) {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "",
+					   thread->tid);
+		} else {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "");
+		}
+
+		browser->hists->thread_filter = thread__get(thread);
+		perf_hpp__set_elide(HISTC_THREAD, false);
+		pstack__push(browser->pstack, &browser->hists->thread_filter);
+	}
+
+	hists__filter_by_thread(browser->hists);
+	hist_browser__reset(browser);
+	return 0;
+}
+
+static int
+add_thread_opt(struct hist_browser *browser, struct popup_action *act,
+	       char **optstr, struct thread *thread)
+{
+	int ret;
+
+	if ((!hists__has(browser->hists, thread) &&
+	     !hists__has(browser->hists, comm)) || thread == NULL)
+		return 0;
+
+	if (hists__has(browser->hists, thread)) {
+		ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "",
+			       thread->tid);
+	} else {
+		ret = asprintf(optstr, "Zoom %s %s thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "");
+	}
+	if (ret < 0)
+		return 0;
+
+	act->thread = thread;
+	act->fn = do_zoom_thread;
+	return 1;
+}
+
+static int
+do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+{
+	struct map *map = act->ms.map;
+
+	if (!hists__has(browser->hists, dso) || map == NULL)
+		return 0;
+
+	if (browser->hists->dso_filter) {
+		pstack__remove(browser->pstack, &browser->hists->dso_filter);
+		perf_hpp__set_elide(HISTC_DSO, false);
+		browser->hists->dso_filter = NULL;
+		ui_helpline__pop();
+	} else {
+		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s DSO\"",
+				   __map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
+		browser->hists->dso_filter = map->dso;
+		perf_hpp__set_elide(HISTC_DSO, true);
+		pstack__push(browser->pstack, &browser->hists->dso_filter);
+	}
+
+	hists__filter_by_dso(browser->hists);
+	hist_browser__reset(browser);
+	return 0;
+}
+
+static int
+add_dso_opt(struct hist_browser *browser, struct popup_action *act,
+	    char **optstr, struct map *map)
+{
+	if (!hists__has(browser->hists, dso) || map == NULL)
+		return 0;
+
+	if (asprintf(optstr, "Zoom %s %s DSO",
+		     browser->hists->dso_filter ? "out of" : "into",
+		     __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0)
+		return 0;
+
+	act->ms.map = map;
+	act->fn = do_zoom_dso;
+	return 1;
+}
+
+static int
+do_browse_map(struct hist_browser *browser __maybe_unused,
+	      struct popup_action *act)
+{
+	map__browse(act->ms.map);
+	return 0;
+}
+
+static int
+add_map_opt(struct hist_browser *browser,
+	    struct popup_action *act, char **optstr, struct map *map)
+{
+	if (!hists__has(browser->hists, dso) || map == NULL)
+		return 0;
+
+	if (asprintf(optstr, "Browse map details") < 0)
+		return 0;
+
+	act->ms.map = map;
+	act->fn = do_browse_map;
+	return 1;
+}
+
+static int
+do_run_script(struct hist_browser *browser __maybe_unused,
+	      struct popup_action *act)
+{
+	char script_opt[64];
+	memset(script_opt, 0, sizeof(script_opt));
+
+	if (act->thread) {
+		scnprintf(script_opt, sizeof(script_opt), " -c %s ",
+			  thread__comm_str(act->thread));
+	} else if (act->ms.sym) {
+		scnprintf(script_opt, sizeof(script_opt), " -S %s ",
+			  act->ms.sym->name);
+	}
+
+	script_browse(script_opt);
+	return 0;
+}
+
+static int
+add_script_opt(struct hist_browser *browser __maybe_unused,
+	       struct popup_action *act, char **optstr,
+	       struct thread *thread, struct symbol *sym)
+{
+	if (thread) {
+		if (asprintf(optstr, "Run scripts for samples of thread [%s]",
+			     thread__comm_str(thread)) < 0)
+			return 0;
+	} else if (sym) {
+		if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
+			     sym->name) < 0)
+			return 0;
+	} else {
+		if (asprintf(optstr, "Run scripts for all samples") < 0)
+			return 0;
+	}
+
+	act->thread = thread;
+	act->ms.sym = sym;
+	act->fn = do_run_script;
+	return 1;
+}
+
+static int
+do_switch_data(struct hist_browser *browser __maybe_unused,
+	       struct popup_action *act __maybe_unused)
+{
+	if (switch_data_file()) {
+		ui__warning("Won't switch the data files due to\n"
+			    "no valid data file get selected!\n");
+		return 0;
+	}
+
+	return K_SWITCH_INPUT_DATA;
+}
+
+static int
+add_switch_opt(struct hist_browser *browser,
+	       struct popup_action *act, char **optstr)
+{
+	if (!is_report_browser(browser->hbt))
+		return 0;
+
+	if (asprintf(optstr, "Switch to another data file in PWD") < 0)
+		return 0;
+
+	act->fn = do_switch_data;
+	return 1;
+}
+
+static int
+do_exit_browser(struct hist_browser *browser __maybe_unused,
+		struct popup_action *act __maybe_unused)
+{
+	return 0;
+}
+
+static int
+add_exit_opt(struct hist_browser *browser __maybe_unused,
+	     struct popup_action *act, char **optstr)
+{
+	if (asprintf(optstr, "Exit") < 0)
+		return 0;
+
+	act->fn = do_exit_browser;
+	return 1;
+}
+
+static int
+do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
+{
+	if (!hists__has(browser->hists, socket) || act->socket < 0)
+		return 0;
+
+	if (browser->hists->socket_filter > -1) {
+		pstack__remove(browser->pstack, &browser->hists->socket_filter);
+		browser->hists->socket_filter = -1;
+		perf_hpp__set_elide(HISTC_SOCKET, false);
+	} else {
+		browser->hists->socket_filter = act->socket;
+		perf_hpp__set_elide(HISTC_SOCKET, true);
+		pstack__push(browser->pstack, &browser->hists->socket_filter);
+	}
+
+	hists__filter_by_socket(browser->hists);
+	hist_browser__reset(browser);
+	return 0;
+}
+
+static int
+add_socket_opt(struct hist_browser *browser, struct popup_action *act,
+	       char **optstr, int socket_id)
+{
+	if (!hists__has(browser->hists, socket) || socket_id < 0)
+		return 0;
+
+	if (asprintf(optstr, "Zoom %s Processor Socket %d",
+		     (browser->hists->socket_filter > -1) ? "out of" : "into",
+		     socket_id) < 0)
+		return 0;
+
+	act->socket = socket_id;
+	act->fn = do_zoom_socket;
+	return 1;
+}
+
+static void hist_browser__update_nr_entries(struct hist_browser *hb)
+{
+	u64 nr_entries = 0;
+	struct rb_node *nd = rb_first(&hb->hists->entries);
+
+	if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
+		hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
+		return;
+	}
+
+	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+		nr_entries++;
+		nd = rb_hierarchy_next(nd);
+	}
+
+	hb->nr_non_filtered_entries = nr_entries;
+	hb->nr_hierarchy_entries = nr_entries;
+}
+
+static void hist_browser__update_percent_limit(struct hist_browser *hb,
+					       double percent)
+{
+	struct hist_entry *he;
+	struct rb_node *nd = rb_first(&hb->hists->entries);
+	u64 total = hists__total_period(hb->hists);
+	u64 min_callchain_hits = total * (percent / 100);
+
+	hb->min_pcnt = callchain_param.min_percent = percent;
+
+	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (he->has_no_entry) {
+			he->has_no_entry = false;
+			he->nr_rows = 0;
+		}
+
+		if (!he->leaf || !symbol_conf.use_callchain)
+			goto next;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+
+next:
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		/* force to re-evaluate folding state of callchains */
+		he->init_have_children = false;
+		hist_entry__set_folding(he, hb, false);
+	}
+}
+
+static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
+				    const char *helpline,
+				    bool left_exits,
+				    struct hist_browser_timer *hbt,
+				    float min_pcnt,
+				    struct perf_env *env,
+				    bool warn_lost_event)
+{
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
+	struct branch_info *bi;
+#define MAX_OPTIONS  16
+	char *options[MAX_OPTIONS];
+	struct popup_action actions[MAX_OPTIONS];
+	int nr_options = 0;
+	int key = -1;
+	char buf[64];
+	int delay_secs = hbt ? hbt->refresh : 0;
+
+#define HIST_BROWSER_HELP_COMMON					\
+	"h/?/F1        Show this window\n"				\
+	"UP/DOWN/PGUP\n"						\
+	"PGDN/SPACE    Navigate\n"					\
+	"q/ESC/CTRL+C  Exit browser or go back to previous screen\n\n"	\
+	"For multiple event sessions:\n\n"				\
+	"TAB/UNTAB     Switch events\n\n"				\
+	"For symbolic views (--sort has sym):\n\n"			\
+	"ENTER         Zoom into DSO/Threads & Annotate current symbol\n" \
+	"ESC           Zoom out\n"					\
+	"a             Annotate current symbol\n"			\
+	"C             Collapse all callchains\n"			\
+	"d             Zoom into current DSO\n"				\
+	"E             Expand all callchains\n"				\
+	"F             Toggle percentage of filtered entries\n"		\
+	"H             Display column headers\n"			\
+	"L             Change percent limit\n"				\
+	"m             Display context menu\n"				\
+	"S             Zoom into current Processor Socket\n"		\
+
+	/* help messages are sorted by lexical order of the hotkey */
+	const char report_help[] = HIST_BROWSER_HELP_COMMON
+	"i             Show header information\n"
+	"P             Print histograms to perf.hist.N\n"
+	"r             Run available scripts\n"
+	"s             Switch to another data file in PWD\n"
+	"t             Zoom into current Thread\n"
+	"V             Verbose (DSO names in callchains, etc)\n"
+	"/             Filter symbol by name";
+	const char top_help[] = HIST_BROWSER_HELP_COMMON
+	"P             Print histograms to perf.hist.N\n"
+	"t             Zoom into current Thread\n"
+	"V             Verbose (DSO names in callchains, etc)\n"
+	"z             Toggle zeroing of samples\n"
+	"f             Enable/Disable events\n"
+	"/             Filter symbol by name";
+
+	if (browser == NULL)
+		return -1;
+
+	/* reset abort key so that it can get Ctrl-C as a key */
+	SLang_reset_tty();
+	SLang_init_tty(0, 0, 0);
+
+	if (min_pcnt)
+		browser->min_pcnt = min_pcnt;
+	hist_browser__update_nr_entries(browser);
+
+	browser->pstack = pstack__new(3);
+	if (browser->pstack == NULL)
+		goto out;
+
+	ui_helpline__push(helpline);
+
+	memset(options, 0, sizeof(options));
+	memset(actions, 0, sizeof(actions));
+
+	if (symbol_conf.col_width_list_str)
+		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+
+	while (1) {
+		struct thread *thread = NULL;
+		struct map *map = NULL;
+		int choice = 0;
+		int socked_id = -1;
+
+		nr_options = 0;
+
+		key = hist_browser__run(browser, helpline,
+					warn_lost_event);
+
+		if (browser->he_selection != NULL) {
+			thread = hist_browser__selected_thread(browser);
+			map = browser->selection->map;
+			socked_id = browser->he_selection->socket;
+		}
+		switch (key) {
+		case K_TAB:
+		case K_UNTAB:
+			if (nr_events == 1)
+				continue;
+			/*
+			 * Exit the browser, let hists__browser_tree
+			 * go to the next or previous
+			 */
+			goto out_free_stack;
+		case 'a':
+			if (!hists__has(hists, sym)) {
+				ui_browser__warning(&browser->b, delay_secs * 2,
+			"Annotation is only available for symbolic views, "
+			"include \"sym*\" in --sort to use it.");
+				continue;
+			}
+
+			if (browser->selection == NULL ||
+			    browser->selection->sym == NULL ||
+			    browser->selection->map->dso->annotate_warned)
+				continue;
+
+			actions->ms.map = browser->selection->map;
+			actions->ms.sym = browser->selection->sym;
+			do_annotate(browser, actions);
+			continue;
+		case 'P':
+			hist_browser__dump(browser);
+			continue;
+		case 'd':
+			actions->ms.map = map;
+			do_zoom_dso(browser, actions);
+			continue;
+		case 'V':
+			verbose = (verbose + 1) % 4;
+			browser->show_dso = verbose > 0;
+			ui_helpline__fpush("Verbosity level set to %d\n",
+					   verbose);
+			continue;
+		case 't':
+			actions->thread = thread;
+			do_zoom_thread(browser, actions);
+			continue;
+		case 'S':
+			actions->socket = socked_id;
+			do_zoom_socket(browser, actions);
+			continue;
+		case '/':
+			if (ui_browser__input_window("Symbol to show",
+					"Please enter the name of symbol you want to see.\n"
+					"To remove the filter later, press / + ENTER.",
+					buf, "ENTER: OK, ESC: Cancel",
+					delay_secs * 2) == K_ENTER) {
+				hists->symbol_filter_str = *buf ? buf : NULL;
+				hists__filter_by_symbol(hists);
+				hist_browser__reset(browser);
+			}
+			continue;
+		case 'r':
+			if (is_report_browser(hbt)) {
+				actions->thread = NULL;
+				actions->ms.sym = NULL;
+				do_run_script(browser, actions);
+			}
+			continue;
+		case 's':
+			if (is_report_browser(hbt)) {
+				key = do_switch_data(browser, actions);
+				if (key == K_SWITCH_INPUT_DATA)
+					goto out_free_stack;
+			}
+			continue;
+		case 'i':
+			/* env->arch is NULL for live-mode (i.e. perf top) */
+			if (env->arch)
+				tui__header_window(env);
+			continue;
+		case 'F':
+			symbol_conf.filter_relative ^= 1;
+			continue;
+		case 'z':
+			if (!is_report_browser(hbt)) {
+				struct perf_top *top = hbt->arg;
+
+				top->zero = !top->zero;
+			}
+			continue;
+		case 'L':
+			if (ui_browser__input_window("Percent Limit",
+					"Please enter the value you want to hide entries under that percent.",
+					buf, "ENTER: OK, ESC: Cancel",
+					delay_secs * 2) == K_ENTER) {
+				char *end;
+				double new_percent = strtod(buf, &end);
+
+				if (new_percent < 0 || new_percent > 100) {
+					ui_browser__warning(&browser->b, delay_secs * 2,
+						"Invalid percent: %.2f", new_percent);
+					continue;
+				}
+
+				hist_browser__update_percent_limit(browser, new_percent);
+				hist_browser__reset(browser);
+			}
+			continue;
+		case K_F1:
+		case 'h':
+		case '?':
+			ui_browser__help_window(&browser->b,
+				is_report_browser(hbt) ? report_help : top_help);
+			continue;
+		case K_ENTER:
+		case K_RIGHT:
+		case 'm':
+			/* menu */
+			break;
+		case K_ESC:
+		case K_LEFT: {
+			const void *top;
+
+			if (pstack__empty(browser->pstack)) {
+				/*
+				 * Go back to the perf_evsel_menu__run or other user
+				 */
+				if (left_exits)
+					goto out_free_stack;
+
+				if (key == K_ESC &&
+				    ui_browser__dialog_yesno(&browser->b,
+							     "Do you really want to exit?"))
+					goto out_free_stack;
+
+				continue;
+			}
+			top = pstack__peek(browser->pstack);
+			if (top == &browser->hists->dso_filter) {
+				/*
+				 * No need to set actions->dso here since
+				 * it's just to remove the current filter.
+				 * Ditto for thread below.
+				 */
+				do_zoom_dso(browser, actions);
+			} else if (top == &browser->hists->thread_filter) {
+				do_zoom_thread(browser, actions);
+			} else if (top == &browser->hists->socket_filter) {
+				do_zoom_socket(browser, actions);
+			}
+			continue;
+		}
+		case 'q':
+		case CTRL('c'):
+			goto out_free_stack;
+		case 'f':
+			if (!is_report_browser(hbt)) {
+				struct perf_top *top = hbt->arg;
+
+				perf_evlist__toggle_enable(top->evlist);
+				/*
+				 * No need to refresh, resort/decay histogram
+				 * entries if we are not collecting samples:
+				 */
+				if (top->evlist->enabled) {
+					helpline = "Press 'f' to disable the events or 'h' to see other hotkeys";
+					hbt->refresh = delay_secs;
+				} else {
+					helpline = "Press 'f' again to re-enable the events";
+					hbt->refresh = 0;
+				}
+				continue;
+			}
+			/* Fall thru */
+		default:
+			helpline = "Press '?' for help on key bindings";
+			continue;
+		}
+
+		if (!hists__has(hists, sym) || browser->selection == NULL)
+			goto skip_annotation;
+
+		if (sort__mode == SORT_MODE__BRANCH) {
+			bi = browser->he_selection->branch_info;
+
+			if (bi == NULL)
+				goto skip_annotation;
+
+			nr_options += add_annotate_opt(browser,
+						       &actions[nr_options],
+						       &options[nr_options],
+						       bi->from.map,
+						       bi->from.sym);
+			if (bi->to.sym != bi->from.sym)
+				nr_options += add_annotate_opt(browser,
+							&actions[nr_options],
+							&options[nr_options],
+							bi->to.map,
+							bi->to.sym);
+		} else {
+			nr_options += add_annotate_opt(browser,
+						       &actions[nr_options],
+						       &options[nr_options],
+						       browser->selection->map,
+						       browser->selection->sym);
+		}
+skip_annotation:
+		nr_options += add_thread_opt(browser, &actions[nr_options],
+					     &options[nr_options], thread);
+		nr_options += add_dso_opt(browser, &actions[nr_options],
+					  &options[nr_options], map);
+		nr_options += add_map_opt(browser, &actions[nr_options],
+					  &options[nr_options],
+					  browser->selection ?
+						browser->selection->map : NULL);
+		nr_options += add_socket_opt(browser, &actions[nr_options],
+					     &options[nr_options],
+					     socked_id);
+		/* perf script support */
+		if (!is_report_browser(hbt))
+			goto skip_scripting;
+
+		if (browser->he_selection) {
+			if (hists__has(hists, thread) && thread) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     thread, NULL);
+			}
+			/*
+			 * Note that browser->selection != NULL
+			 * when browser->he_selection is not NULL,
+			 * so we don't need to check browser->selection
+			 * before fetching browser->selection->sym like what
+			 * we do before fetching browser->selection->map.
+			 *
+			 * See hist_browser__show_entry.
+			 */
+			if (hists__has(hists, sym) && browser->selection->sym) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     NULL, browser->selection->sym);
+			}
+		}
+		nr_options += add_script_opt(browser, &actions[nr_options],
+					     &options[nr_options], NULL, NULL);
+		nr_options += add_switch_opt(browser, &actions[nr_options],
+					     &options[nr_options]);
+skip_scripting:
+		nr_options += add_exit_opt(browser, &actions[nr_options],
+					   &options[nr_options]);
+
+		do {
+			struct popup_action *act;
+
+			choice = ui__popup_menu(nr_options, options);
+			if (choice == -1 || choice >= nr_options)
+				break;
+
+			act = &actions[choice];
+			key = act->fn(browser, act);
+		} while (key == 1);
+
+		if (key == K_SWITCH_INPUT_DATA)
+			break;
+	}
+out_free_stack:
+	pstack__delete(browser->pstack);
+out:
+	hist_browser__delete(browser);
+	free_popup_options(options, MAX_OPTIONS);
+	return key;
+}
+
+struct perf_evsel_menu {
+	struct ui_browser b;
+	struct perf_evsel *selection;
+	bool lost_events, lost_events_warned;
+	float min_pcnt;
+	struct perf_env *env;
+};
+
+static void perf_evsel_menu__write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	struct perf_evsel_menu *menu = container_of(browser,
+						    struct perf_evsel_menu, b);
+	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
+	struct hists *hists = evsel__hists(evsel);
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+	unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+	const char *ev_name = perf_evsel__name(evsel);
+	char bf[256], unit;
+	const char *warn = " ";
+	size_t printed;
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+
+	if (perf_evsel__is_group_event(evsel)) {
+		struct perf_evsel *pos;
+
+		ev_name = perf_evsel__group_name(evsel);
+
+		for_each_group_member(pos, evsel) {
+			struct hists *pos_hists = evsel__hists(pos);
+			nr_events += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+		}
+	}
+
+	nr_events = convert_unit(nr_events, &unit);
+	printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
+			   unit, unit == ' ' ? "" : " ", ev_name);
+	ui_browser__printf(browser, "%s", bf);
+
+	nr_events = hists->stats.nr_events[PERF_RECORD_LOST];
+	if (nr_events != 0) {
+		menu->lost_events = true;
+		if (!current_entry)
+			ui_browser__set_color(browser, HE_COLORSET_TOP);
+		nr_events = convert_unit(nr_events, &unit);
+		printed += scnprintf(bf, sizeof(bf), ": %ld%c%schunks LOST!",
+				     nr_events, unit, unit == ' ' ? "" : " ");
+		warn = bf;
+	}
+
+	ui_browser__write_nstring(browser, warn, browser->width - printed);
+
+	if (current_entry)
+		menu->selection = evsel;
+}
+
+static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
+				int nr_events, const char *help,
+				struct hist_browser_timer *hbt,
+				bool warn_lost_event)
+{
+	struct perf_evlist *evlist = menu->b.priv;
+	struct perf_evsel *pos;
+	const char *title = "Available samples";
+	int delay_secs = hbt ? hbt->refresh : 0;
+	int key;
+
+	if (ui_browser__show(&menu->b, title,
+			     "ESC: exit, ENTER|->: Browse histograms") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(&menu->b, delay_secs);
+
+		switch (key) {
+		case K_TIMER:
+			hbt->timer(hbt->arg);
+
+			if (!menu->lost_events_warned &&
+			    menu->lost_events &&
+			    warn_lost_event) {
+				ui_browser__warn_lost_events(&menu->b);
+				menu->lost_events_warned = true;
+			}
+			continue;
+		case K_RIGHT:
+		case K_ENTER:
+			if (!menu->selection)
+				continue;
+			pos = menu->selection;
+browse_hists:
+			perf_evlist__set_selected(evlist, pos);
+			/*
+			 * Give the calling tool a chance to populate the non
+			 * default evsel resorted hists tree.
+			 */
+			if (hbt)
+				hbt->timer(hbt->arg);
+			key = perf_evsel__hists_browse(pos, nr_events, help,
+						       true, hbt,
+						       menu->min_pcnt,
+						       menu->env,
+						       warn_lost_event);
+			ui_browser__show_title(&menu->b, title);
+			switch (key) {
+			case K_TAB:
+				if (pos->node.next == &evlist->entries)
+					pos = perf_evlist__first(evlist);
+				else
+					pos = perf_evsel__next(pos);
+				goto browse_hists;
+			case K_UNTAB:
+				if (pos->node.prev == &evlist->entries)
+					pos = perf_evlist__last(evlist);
+				else
+					pos = perf_evsel__prev(pos);
+				goto browse_hists;
+			case K_SWITCH_INPUT_DATA:
+			case 'q':
+			case CTRL('c'):
+				goto out;
+			case K_ESC:
+			default:
+				continue;
+			}
+		case K_LEFT:
+			continue;
+		case K_ESC:
+			if (!ui_browser__dialog_yesno(&menu->b,
+					       "Do you really want to exit?"))
+				continue;
+			/* Fall thru */
+		case 'q':
+		case CTRL('c'):
+			goto out;
+		default:
+			continue;
+		}
+	}
+
+out:
+	ui_browser__hide(&menu->b);
+	return key;
+}
+
+static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
+				 void *entry)
+{
+	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
+
+	if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
+		return true;
+
+	return false;
+}
+
+static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
+					   int nr_entries, const char *help,
+					   struct hist_browser_timer *hbt,
+					   float min_pcnt,
+					   struct perf_env *env,
+					   bool warn_lost_event)
+{
+	struct perf_evsel *pos;
+	struct perf_evsel_menu menu = {
+		.b = {
+			.entries    = &evlist->entries,
+			.refresh    = ui_browser__list_head_refresh,
+			.seek	    = ui_browser__list_head_seek,
+			.write	    = perf_evsel_menu__write,
+			.filter	    = filter_group_entries,
+			.nr_entries = nr_entries,
+			.priv	    = evlist,
+		},
+		.min_pcnt = min_pcnt,
+		.env = env,
+	};
+
+	ui_helpline__push("Press ESC to exit");
+
+	evlist__for_each_entry(evlist, pos) {
+		const char *ev_name = perf_evsel__name(pos);
+		size_t line_len = strlen(ev_name) + 7;
+
+		if (menu.b.width < line_len)
+			menu.b.width = line_len;
+	}
+
+	return perf_evsel_menu__run(&menu, nr_entries, help,
+				    hbt, warn_lost_event);
+}
+
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
+				  struct hist_browser_timer *hbt,
+				  float min_pcnt,
+				  struct perf_env *env,
+				  bool warn_lost_event)
+{
+	int nr_entries = evlist->nr_entries;
+
+single_entry:
+	if (nr_entries == 1) {
+		struct perf_evsel *first = perf_evlist__first(evlist);
+
+		return perf_evsel__hists_browse(first, nr_entries, help,
+						false, hbt, min_pcnt,
+						env, warn_lost_event);
+	}
+
+	if (symbol_conf.event_group) {
+		struct perf_evsel *pos;
+
+		nr_entries = 0;
+		evlist__for_each_entry(evlist, pos) {
+			if (perf_evsel__is_group_leader(pos))
+				nr_entries++;
+		}
+
+		if (nr_entries == 1)
+			goto single_entry;
+	}
+
+	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
+					       hbt, min_pcnt, env,
+					       warn_lost_event);
+}
diff --git a/ui/browsers/hists.h b/ui/browsers/hists.h
new file mode 100644
index 0000000..9428bee
--- /dev/null
+++ b/ui/browsers/hists.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_BROWSER_HISTS_H_
+#define _PERF_UI_BROWSER_HISTS_H_ 1
+
+#include "ui/browser.h"
+
+struct hist_browser {
+	struct ui_browser   b;
+	struct hists	    *hists;
+	struct hist_entry   *he_selection;
+	struct map_symbol   *selection;
+	struct hist_browser_timer *hbt;
+	struct pstack	    *pstack;
+	struct perf_env	    *env;
+	int		     print_seq;
+	bool		     show_dso;
+	bool		     show_headers;
+	float		     min_pcnt;
+	u64		     nr_non_filtered_entries;
+	u64		     nr_hierarchy_entries;
+	u64		     nr_callchain_rows;
+	bool		     c2c_filter;
+
+	/* Get title string. */
+	int                  (*title)(struct hist_browser *browser,
+			     char *bf, size_t size);
+};
+
+struct hist_browser *hist_browser__new(struct hists *hists);
+void hist_browser__delete(struct hist_browser *browser);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+		      bool warn_lost_event);
+void hist_browser__init(struct hist_browser *browser,
+			struct hists *hists);
+#endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/ui/browsers/map.c b/ui/browsers/map.c
new file mode 100644
index 0000000..e03fa75
--- /dev/null
+++ b/ui/browsers/map.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <sys/ttydefaults.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include "../../util/util.h"
+#include "../../util/debug.h"
+#include "../../util/symbol.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../keysyms.h"
+#include "map.h"
+
+#include "sane_ctype.h"
+
+struct map_browser {
+	struct ui_browser b;
+	struct map	  *map;
+	u8		  addrlen;
+};
+
+static void map_browser__write(struct ui_browser *browser, void *nd, int row)
+{
+	struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+	struct map_browser *mb = container_of(browser, struct map_browser, b);
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+	int width;
+
+	ui_browser__set_percent_color(browser, 0, current_entry);
+	ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ",
+			   mb->addrlen, sym->start, mb->addrlen, sym->end,
+			   sym->binding == STB_GLOBAL ? 'g' :
+				sym->binding == STB_LOCAL  ? 'l' : 'w');
+	width = browser->width - ((mb->addrlen * 2) + 4);
+	if (width > 0)
+		ui_browser__write_nstring(browser, sym->name, width);
+}
+
+/* FIXME uber-kludgy, see comment on cmd_report... */
+static u32 *symbol__browser_index(struct symbol *browser)
+{
+	return ((void *)browser) - sizeof(struct rb_node) - sizeof(u32);
+}
+
+static int map_browser__search(struct map_browser *browser)
+{
+	char target[512];
+	struct symbol *sym;
+	int err = ui_browser__input_window("Search by name/addr",
+					   "Prefix with 0x to search by address",
+					   target, "ENTER: OK, ESC: Cancel", 0);
+	if (err != K_ENTER)
+		return -1;
+
+	if (target[0] == '0' && tolower(target[1]) == 'x') {
+		u64 addr = strtoull(target, NULL, 16);
+		sym = map__find_symbol(browser->map, addr);
+	} else
+		sym = map__find_symbol_by_name(browser->map, target);
+
+	if (sym != NULL) {
+		u32 *idx = symbol__browser_index(sym);
+
+		browser->b.top = &sym->rb_node;
+		browser->b.index = browser->b.top_idx = *idx;
+	} else
+		ui_helpline__fpush("%s not found!", target);
+
+	return 0;
+}
+
+static int map_browser__run(struct map_browser *browser)
+{
+	int key;
+
+	if (ui_browser__show(&browser->b, browser->map->dso->long_name,
+			     "Press ESC to exit, %s / to search",
+			     verbose > 0 ? "" : "restart with -v to use") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(&browser->b, 0);
+
+		switch (key) {
+		case '/':
+			if (verbose > 0)
+				map_browser__search(browser);
+		default:
+			break;
+                case K_LEFT:
+                case K_ESC:
+                case 'q':
+                case CTRL('c'):
+                        goto out;
+		}
+	}
+out:
+	ui_browser__hide(&browser->b);
+	return key;
+}
+
+int map__browse(struct map *map)
+{
+	struct map_browser mb = {
+		.b = {
+			.entries = &map->dso->symbols[map->type],
+			.refresh = ui_browser__rb_tree_refresh,
+			.seek	 = ui_browser__rb_tree_seek,
+			.write	 = map_browser__write,
+		},
+		.map = map,
+	};
+	struct rb_node *nd;
+	char tmp[BITS_PER_LONG / 4];
+	u64 maxaddr = 0;
+
+	for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+
+		if (maxaddr < pos->end)
+			maxaddr = pos->end;
+		if (verbose > 0) {
+			u32 *idx = symbol__browser_index(pos);
+			*idx = mb.b.nr_entries;
+		}
+		++mb.b.nr_entries;
+	}
+
+	mb.addrlen = snprintf(tmp, sizeof(tmp), "%" PRIx64, maxaddr);
+	return map_browser__run(&mb);
+}
diff --git a/ui/browsers/map.h b/ui/browsers/map.h
new file mode 100644
index 0000000..0ed7dbb
--- /dev/null
+++ b/ui/browsers/map.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_MAP_BROWSER_H_
+#define _PERF_UI_MAP_BROWSER_H_ 1
+struct map;
+
+int map__browse(struct map *map);
+#endif /* _PERF_UI_MAP_BROWSER_H_ */
diff --git a/ui/browsers/scripts.c b/ui/browsers/scripts.c
new file mode 100644
index 0000000..90a32ac
--- /dev/null
+++ b/ui/browsers/scripts.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <sys/ttydefaults.h>
+#include <string.h>
+#include "../../util/sort.h"
+#include "../../util/util.h"
+#include "../../util/hist.h"
+#include "../../util/debug.h"
+#include "../../util/symbol.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../libslang.h"
+
+/* 2048 lines should be enough for a script output */
+#define MAX_LINES		2048
+
+/* 160 bytes for one output line */
+#define AVERAGE_LINE_LEN	160
+
+struct script_line {
+	struct list_head node;
+	char line[AVERAGE_LINE_LEN];
+};
+
+struct perf_script_browser {
+	struct ui_browser b;
+	struct list_head entries;
+	const char *script_name;
+	int nr_lines;
+};
+
+#define SCRIPT_NAMELEN	128
+#define SCRIPT_MAX_NO	64
+/*
+ * Usually the full path for a script is:
+ *	/home/username/libexec/perf-core/scripts/python/xxx.py
+ *	/home/username/libexec/perf-core/scripts/perl/xxx.pl
+ * So 256 should be long enough to contain the full path.
+ */
+#define SCRIPT_FULLPATH_LEN	256
+
+/*
+ * When success, will copy the full path of the selected script
+ * into  the buffer pointed by script_name, and return 0.
+ * Return -1 on failure.
+ */
+static int list_scripts(char *script_name)
+{
+	char *buf, *names[SCRIPT_MAX_NO], *paths[SCRIPT_MAX_NO];
+	int i, num, choice, ret = -1;
+
+	/* Preset the script name to SCRIPT_NAMELEN */
+	buf = malloc(SCRIPT_MAX_NO * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN));
+	if (!buf)
+		return ret;
+
+	for (i = 0; i < SCRIPT_MAX_NO; i++) {
+		names[i] = buf + i * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN);
+		paths[i] = names[i] + SCRIPT_NAMELEN;
+	}
+
+	num = find_scripts(names, paths);
+	if (num > 0) {
+		choice = ui__popup_menu(num, names);
+		if (choice < num && choice >= 0) {
+			strcpy(script_name, paths[choice]);
+			ret = 0;
+		}
+	}
+
+	free(buf);
+	return ret;
+}
+
+static void script_browser__write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	struct script_line *sline = list_entry(entry, struct script_line, node);
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(browser, sline->line, browser->width);
+}
+
+static int script_browser__run(struct perf_script_browser *browser)
+{
+	int key;
+
+	if (ui_browser__show(&browser->b, browser->script_name,
+			     "Press ESC to exit") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(&browser->b, 0);
+
+		/* We can add some special key handling here if needed */
+		break;
+	}
+
+	ui_browser__hide(&browser->b);
+	return key;
+}
+
+
+int script_browse(const char *script_opt)
+{
+	char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN];
+	char *line = NULL;
+	size_t len = 0;
+	ssize_t retlen;
+	int ret = -1, nr_entries = 0;
+	FILE *fp;
+	void *buf;
+	struct script_line *sline;
+
+	struct perf_script_browser script = {
+		.b = {
+			.refresh    = ui_browser__list_head_refresh,
+			.seek	    = ui_browser__list_head_seek,
+			.write	    = script_browser__write,
+		},
+		.script_name = script_name,
+	};
+
+	INIT_LIST_HEAD(&script.entries);
+
+	/* Save each line of the output in one struct script_line object. */
+	buf = zalloc((sizeof(*sline)) * MAX_LINES);
+	if (!buf)
+		return -1;
+	sline = buf;
+
+	memset(script_name, 0, SCRIPT_FULLPATH_LEN);
+	if (list_scripts(script_name))
+		goto exit;
+
+	sprintf(cmd, "perf script -s %s ", script_name);
+
+	if (script_opt)
+		strcat(cmd, script_opt);
+
+	if (input_name) {
+		strcat(cmd, " -i ");
+		strcat(cmd, input_name);
+	}
+
+	strcat(cmd, " 2>&1");
+
+	fp = popen(cmd, "r");
+	if (!fp)
+		goto exit;
+
+	while ((retlen = getline(&line, &len, fp)) != -1) {
+		strncpy(sline->line, line, AVERAGE_LINE_LEN);
+
+		/* If one output line is very large, just cut it short */
+		if (retlen >= AVERAGE_LINE_LEN) {
+			sline->line[AVERAGE_LINE_LEN - 1] = '\0';
+			sline->line[AVERAGE_LINE_LEN - 2] = '\n';
+		}
+		list_add_tail(&sline->node, &script.entries);
+
+		if (script.b.width < retlen)
+			script.b.width = retlen;
+
+		if (nr_entries++ >= MAX_LINES - 1)
+			break;
+		sline++;
+	}
+
+	if (script.b.width > AVERAGE_LINE_LEN)
+		script.b.width = AVERAGE_LINE_LEN;
+
+	free(line);
+	pclose(fp);
+
+	script.nr_lines = nr_entries;
+	script.b.nr_entries = nr_entries;
+	script.b.entries = &script.entries;
+
+	ret = script_browser__run(&script);
+exit:
+	free(buf);
+	return ret;
+}
diff --git a/ui/gtk/Build b/ui/gtk/Build
new file mode 100644
index 0000000..ec22e89
--- /dev/null
+++ b/ui/gtk/Build
@@ -0,0 +1,9 @@
+CFLAGS_gtk += -fPIC $(GTK_CFLAGS)
+
+gtk-y += browser.o
+gtk-y += hists.o
+gtk-y += setup.o
+gtk-y += util.o
+gtk-y += helpline.o
+gtk-y += progress.o
+gtk-y += annotate.o
diff --git a/ui/gtk/annotate.c b/ui/gtk/annotate.c
new file mode 100644
index 0000000..aeeaf15
--- /dev/null
+++ b/ui/gtk/annotate.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "gtk.h"
+#include "util/debug.h"
+#include "util/annotate.h"
+#include "util/evsel.h"
+#include "ui/helpline.h"
+#include <inttypes.h>
+#include <signal.h>
+
+enum {
+	ANN_COL__PERCENT,
+	ANN_COL__OFFSET,
+	ANN_COL__LINE,
+
+	MAX_ANN_COLS
+};
+
+static const char *const col_names[] = {
+	"Overhead",
+	"Offset",
+	"Line"
+};
+
+static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
+				 struct disasm_line *dl, int evidx)
+{
+	struct sym_hist *symhist;
+	double percent = 0.0;
+	const char *markup;
+	int ret = 0;
+
+	strcpy(buf, "");
+
+	if (dl->al.offset == (s64) -1)
+		return 0;
+
+	symhist = annotation__histogram(symbol__annotation(sym), evidx);
+	if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples)
+		return 0;
+
+	percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples;
+
+	markup = perf_gtk__get_percent_color(percent);
+	if (markup)
+		ret += scnprintf(buf, size, "%s", markup);
+	ret += scnprintf(buf + ret, size - ret, "%6.2f%%", percent);
+	if (markup)
+		ret += scnprintf(buf + ret, size - ret, "</span>");
+
+	return ret;
+}
+
+static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym,
+				struct map *map, struct disasm_line *dl)
+{
+	u64 start = map__rip_2objdump(map, sym->start);
+
+	strcpy(buf, "");
+
+	if (dl->al.offset == (s64) -1)
+		return 0;
+
+	return scnprintf(buf, size, "%"PRIx64, start + dl->al.offset);
+}
+
+static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
+{
+	int ret = 0;
+	char *line = g_markup_escape_text(dl->al.line, -1);
+	const char *markup = "<span fgcolor='gray'>";
+
+	strcpy(buf, "");
+
+	if (!line)
+		return 0;
+
+	if (dl->al.offset != (s64) -1)
+		markup = NULL;
+
+	if (markup)
+		ret += scnprintf(buf, size, "%s", markup);
+	ret += scnprintf(buf + ret, size - ret, "%s", line);
+	if (markup)
+		ret += scnprintf(buf + ret, size - ret, "</span>");
+
+	g_free(line);
+	return ret;
+}
+
+static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
+				struct map *map, struct perf_evsel *evsel,
+				struct hist_browser_timer *hbt __maybe_unused)
+{
+	struct disasm_line *pos, *n;
+	struct annotation *notes;
+	GType col_types[MAX_ANN_COLS];
+	GtkCellRenderer *renderer;
+	GtkListStore *store;
+	GtkWidget *view;
+	int i;
+	char s[512];
+
+	notes = symbol__annotation(sym);
+
+	for (i = 0; i < MAX_ANN_COLS; i++) {
+		col_types[i] = G_TYPE_STRING;
+	}
+	store = gtk_list_store_newv(MAX_ANN_COLS, col_types);
+
+	view = gtk_tree_view_new();
+	renderer = gtk_cell_renderer_text_new();
+
+	for (i = 0; i < MAX_ANN_COLS; i++) {
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+					-1, col_names[i], renderer, "markup",
+					i, NULL);
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	list_for_each_entry(pos, &notes->src->source, al.node) {
+		GtkTreeIter iter;
+		int ret = 0;
+
+		gtk_list_store_append(store, &iter);
+
+		if (perf_evsel__is_group_event(evsel)) {
+			for (i = 0; i < evsel->nr_members; i++) {
+				ret += perf_gtk__get_percent(s + ret,
+							     sizeof(s) - ret,
+							     sym, pos,
+							     evsel->idx + i);
+				ret += scnprintf(s + ret, sizeof(s) - ret, " ");
+			}
+		} else {
+			ret = perf_gtk__get_percent(s, sizeof(s), sym, pos,
+						    evsel->idx);
+		}
+
+		if (ret)
+			gtk_list_store_set(store, &iter, ANN_COL__PERCENT, s, -1);
+		if (perf_gtk__get_offset(s, sizeof(s), sym, map, pos))
+			gtk_list_store_set(store, &iter, ANN_COL__OFFSET, s, -1);
+		if (perf_gtk__get_line(s, sizeof(s), pos))
+			gtk_list_store_set(store, &iter, ANN_COL__LINE, s, -1);
+	}
+
+	gtk_container_add(GTK_CONTAINER(window), view);
+
+	list_for_each_entry_safe(pos, n, &notes->src->source, al.node) {
+		list_del(&pos->al.node);
+		disasm_line__free(pos);
+	}
+
+	return 0;
+}
+
+static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
+				struct perf_evsel *evsel,
+				struct hist_browser_timer *hbt)
+{
+	GtkWidget *window;
+	GtkWidget *notebook;
+	GtkWidget *scrolled_window;
+	GtkWidget *tab_label;
+	int err;
+
+	if (map->dso->annotate_warned)
+		return -1;
+
+	err = symbol__annotate(sym, map, evsel, 0, NULL);
+	if (err) {
+		char msg[BUFSIZ];
+		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+		ui__error("Couldn't annotate %s: %s\n", sym->name, msg);
+		return -1;
+	}
+
+	symbol__calc_percent(sym, evsel);
+
+	if (perf_gtk__is_active_context(pgctx)) {
+		window = pgctx->main_window;
+		notebook = pgctx->notebook;
+	} else {
+		GtkWidget *vbox;
+		GtkWidget *infobar;
+		GtkWidget *statbar;
+
+		signal(SIGSEGV, perf_gtk__signal);
+		signal(SIGFPE,  perf_gtk__signal);
+		signal(SIGINT,  perf_gtk__signal);
+		signal(SIGQUIT, perf_gtk__signal);
+		signal(SIGTERM, perf_gtk__signal);
+
+		window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+		gtk_window_set_title(GTK_WINDOW(window), "perf annotate");
+
+		g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+		pgctx = perf_gtk__activate_context(window);
+		if (!pgctx)
+			return -1;
+
+		vbox = gtk_vbox_new(FALSE, 0);
+		notebook = gtk_notebook_new();
+		pgctx->notebook = notebook;
+
+		gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+		infobar = perf_gtk__setup_info_bar();
+		if (infobar) {
+			gtk_box_pack_start(GTK_BOX(vbox), infobar,
+					   FALSE, FALSE, 0);
+		}
+
+		statbar = perf_gtk__setup_statusbar();
+		gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+		gtk_container_add(GTK_CONTAINER(window), vbox);
+	}
+
+	scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+	tab_label = gtk_label_new(sym->name);
+
+	gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+				       GTK_POLICY_AUTOMATIC,
+				       GTK_POLICY_AUTOMATIC);
+
+	gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window,
+				 tab_label);
+
+	perf_gtk__annotate_symbol(scrolled_window, sym, map, evsel, hbt);
+	return 0;
+}
+
+int hist_entry__gtk_annotate(struct hist_entry *he,
+			     struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt)
+{
+	return symbol__gtk_annotate(he->ms.sym, he->ms.map, evsel, hbt);
+}
+
+void perf_gtk__show_annotations(void)
+{
+	GtkWidget *window;
+
+	if (!perf_gtk__is_active_context(pgctx))
+		return;
+
+	window = pgctx->main_window;
+	gtk_widget_show_all(window);
+
+	perf_gtk__resize_window(window);
+	gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+	gtk_main();
+
+	perf_gtk__deactivate_context(&pgctx);
+}
diff --git a/ui/gtk/browser.c b/ui/gtk/browser.c
new file mode 100644
index 0000000..4820e25
--- /dev/null
+++ b/ui/gtk/browser.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../evlist.h"
+#include "../cache.h"
+#include "../evsel.h"
+#include "../sort.h"
+#include "../hist.h"
+#include "../helpline.h"
+#include "gtk.h"
+
+#include <signal.h>
+
+void perf_gtk__signal(int sig)
+{
+	perf_gtk__exit(false);
+	psignal(sig, "perf");
+}
+
+void perf_gtk__resize_window(GtkWidget *window)
+{
+	GdkRectangle rect;
+	GdkScreen *screen;
+	int monitor;
+	int height;
+	int width;
+
+	screen = gtk_widget_get_screen(window);
+
+	monitor = gdk_screen_get_monitor_at_window(screen, window->window);
+
+	gdk_screen_get_monitor_geometry(screen, monitor, &rect);
+
+	width	= rect.width * 3 / 4;
+	height	= rect.height * 3 / 4;
+
+	gtk_window_resize(GTK_WINDOW(window), width, height);
+}
+
+const char *perf_gtk__get_percent_color(double percent)
+{
+	if (percent >= MIN_RED)
+		return "<span fgcolor='red'>";
+	if (percent >= MIN_GREEN)
+		return "<span fgcolor='dark green'>";
+	return NULL;
+}
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+GtkWidget *perf_gtk__setup_info_bar(void)
+{
+	GtkWidget *info_bar;
+	GtkWidget *label;
+	GtkWidget *content_area;
+
+	info_bar = gtk_info_bar_new();
+	gtk_widget_set_no_show_all(info_bar, TRUE);
+
+	label = gtk_label_new("");
+	gtk_widget_show(label);
+
+	content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar));
+	gtk_container_add(GTK_CONTAINER(content_area), label);
+
+	gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK,
+				GTK_RESPONSE_OK);
+	g_signal_connect(info_bar, "response",
+			 G_CALLBACK(gtk_widget_hide), NULL);
+
+	pgctx->info_bar = info_bar;
+	pgctx->message_label = label;
+
+	return info_bar;
+}
+#endif
+
+GtkWidget *perf_gtk__setup_statusbar(void)
+{
+	GtkWidget *stbar;
+	unsigned ctxid;
+
+	stbar = gtk_statusbar_new();
+
+	ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar),
+					     "perf report");
+	pgctx->statbar = stbar;
+	pgctx->statbar_ctx_id = ctxid;
+
+	return stbar;
+}
diff --git a/ui/gtk/gtk.h b/ui/gtk/gtk.h
new file mode 100644
index 0000000..9846ea5
--- /dev/null
+++ b/ui/gtk/gtk.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_GTK_H_
+#define _PERF_GTK_H_ 1
+
+#include <stdbool.h>
+
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+
+struct perf_gtk_context {
+	GtkWidget *main_window;
+	GtkWidget *notebook;
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+	GtkWidget *info_bar;
+	GtkWidget *message_label;
+#endif
+	GtkWidget *statbar;
+	guint statbar_ctx_id;
+};
+
+int perf_gtk__init(void);
+void perf_gtk__exit(bool wait_for_ok);
+
+extern struct perf_gtk_context *pgctx;
+
+static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx)
+{
+	return ctx && ctx->main_window;
+}
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window);
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
+
+void perf_gtk__init_helpline(void);
+void gtk_ui_progress__init(void);
+void perf_gtk__init_hpp(void);
+
+void perf_gtk__signal(int sig);
+void perf_gtk__resize_window(GtkWidget *window);
+const char *perf_gtk__get_percent_color(double percent);
+GtkWidget *perf_gtk__setup_statusbar(void);
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+GtkWidget *perf_gtk__setup_info_bar(void);
+#else
+static inline GtkWidget *perf_gtk__setup_info_bar(void)
+{
+	return NULL;
+}
+#endif
+
+struct perf_evsel;
+struct perf_evlist;
+struct hist_entry;
+struct hist_browser_timer;
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
+				  struct hist_browser_timer *hbt,
+				  float min_pcnt);
+int hist_entry__gtk_annotate(struct hist_entry *he,
+			     struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt);
+void perf_gtk__show_annotations(void);
+
+#endif /* _PERF_GTK_H_ */
diff --git a/ui/gtk/helpline.c b/ui/gtk/helpline.c
new file mode 100644
index 0000000..fbf1ea9
--- /dev/null
+++ b/ui/gtk/helpline.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include "gtk.h"
+#include "../ui.h"
+#include "../helpline.h"
+#include "../../util/debug.h"
+
+static void gtk_helpline_pop(void)
+{
+	if (!perf_gtk__is_active_context(pgctx))
+		return;
+
+	gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar),
+			  pgctx->statbar_ctx_id);
+}
+
+static void gtk_helpline_push(const char *msg)
+{
+	if (!perf_gtk__is_active_context(pgctx))
+		return;
+
+	gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar),
+			   pgctx->statbar_ctx_id, msg);
+}
+
+static int gtk_helpline_show(const char *fmt, va_list ap)
+{
+	int ret;
+	char *ptr;
+	static int backlog;
+
+	ret = vscnprintf(ui_helpline__current + backlog,
+			 sizeof(ui_helpline__current) - backlog, fmt, ap);
+	backlog += ret;
+
+	/* only first line can be displayed */
+	ptr = strchr(ui_helpline__current, '\n');
+	if (ptr && (ptr - ui_helpline__current) <= backlog) {
+		*ptr = '\0';
+		ui_helpline__puts(ui_helpline__current);
+		backlog = 0;
+	}
+
+	return ret;
+}
+
+static struct ui_helpline gtk_helpline_fns = {
+	.pop	= gtk_helpline_pop,
+	.push	= gtk_helpline_push,
+	.show	= gtk_helpline_show,
+};
+
+void perf_gtk__init_helpline(void)
+{
+	helpline_fns = &gtk_helpline_fns;
+}
diff --git a/ui/gtk/hists.c b/ui/gtk/hists.c
new file mode 100644
index 0000000..24e1ec2
--- /dev/null
+++ b/ui/gtk/hists.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../evlist.h"
+#include "../cache.h"
+#include "../evsel.h"
+#include "../sort.h"
+#include "../hist.h"
+#include "../helpline.h"
+#include "../string2.h"
+#include "gtk.h"
+#include <signal.h>
+
+#define MAX_COLUMNS			32
+
+static int __percent_color_snprintf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+	int ret = 0;
+	int len;
+	va_list args;
+	double percent;
+	const char *markup;
+	char *buf = hpp->buf;
+	size_t size = hpp->size;
+
+	va_start(args, fmt);
+	len = va_arg(args, int);
+	percent = va_arg(args, double);
+	va_end(args);
+
+	markup = perf_gtk__get_percent_color(percent);
+	if (markup)
+		ret += scnprintf(buf, size, markup);
+
+	ret += scnprintf(buf + ret, size - ret, fmt, len, percent);
+
+	if (markup)
+		ret += scnprintf(buf + ret, size - ret, "</span>");
+
+	return ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field)					\
+static u64 he_get_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat._field;							\
+}										\
+										\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt,		\
+				       struct perf_hpp *hpp,			\
+				       struct hist_entry *he)			\
+{										\
+	return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%",		\
+			__percent_color_snprintf, true);			\
+}
+
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+static u64 he_get_acc_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat_acc->_field;						\
+}										\
+										\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt,		\
+				       struct perf_hpp *hpp,			\
+				       struct hist_entry *he)			\
+{										\
+	return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", 	\
+			    __percent_color_snprintf, true);			\
+}
+
+__HPP_COLOR_PERCENT_FN(overhead, period)
+__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
+__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
+__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
+__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
+
+#undef __HPP_COLOR_PERCENT_FN
+
+
+void perf_gtk__init_hpp(void)
+{
+	perf_hpp__format[PERF_HPP__OVERHEAD].color =
+				perf_gtk__hpp_color_overhead;
+	perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+				perf_gtk__hpp_color_overhead_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+				perf_gtk__hpp_color_overhead_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+				perf_gtk__hpp_color_overhead_guest_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+				perf_gtk__hpp_color_overhead_guest_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+				perf_gtk__hpp_color_overhead_acc;
+}
+
+static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
+					 GtkTreeIter *parent, int col, u64 total)
+{
+	struct rb_node *nd;
+	bool has_single_node = (rb_first(root) == rb_last(root));
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct callchain_node *node;
+		struct callchain_list *chain;
+		GtkTreeIter iter, new_parent;
+		bool need_new_parent;
+
+		node = rb_entry(nd, struct callchain_node, rb_node);
+
+		new_parent = *parent;
+		need_new_parent = !has_single_node;
+
+		callchain_node__make_parent_list(node);
+
+		list_for_each_entry(chain, &node->parent_val, list) {
+			char buf[128];
+
+			gtk_tree_store_append(store, &iter, &new_parent);
+
+			callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+			gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+			callchain_list__sym_name(chain, buf, sizeof(buf), false);
+			gtk_tree_store_set(store, &iter, col, buf, -1);
+
+			if (need_new_parent) {
+				/*
+				 * Only show the top-most symbol in a callchain
+				 * if it's not the only callchain.
+				 */
+				new_parent = iter;
+				need_new_parent = false;
+			}
+		}
+
+		list_for_each_entry(chain, &node->val, list) {
+			char buf[128];
+
+			gtk_tree_store_append(store, &iter, &new_parent);
+
+			callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+			gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+			callchain_list__sym_name(chain, buf, sizeof(buf), false);
+			gtk_tree_store_set(store, &iter, col, buf, -1);
+
+			if (need_new_parent) {
+				/*
+				 * Only show the top-most symbol in a callchain
+				 * if it's not the only callchain.
+				 */
+				new_parent = iter;
+				need_new_parent = false;
+			}
+		}
+	}
+}
+
+static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
+					   GtkTreeIter *parent, int col, u64 total)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct callchain_node *node;
+		struct callchain_list *chain;
+		GtkTreeIter iter;
+		char buf[64];
+		char *str, *str_alloc = NULL;
+		bool first = true;
+
+		node = rb_entry(nd, struct callchain_node, rb_node);
+
+		callchain_node__make_parent_list(node);
+
+		list_for_each_entry(chain, &node->parent_val, list) {
+			char name[1024];
+
+			callchain_list__sym_name(chain, name, sizeof(name), false);
+
+			if (asprintf(&str, "%s%s%s",
+				     first ? "" : str_alloc,
+				     first ? "" : symbol_conf.field_sep ?: "; ",
+				     name) < 0)
+				return;
+
+			first = false;
+			free(str_alloc);
+			str_alloc = str;
+		}
+
+		list_for_each_entry(chain, &node->val, list) {
+			char name[1024];
+
+			callchain_list__sym_name(chain, name, sizeof(name), false);
+
+			if (asprintf(&str, "%s%s%s",
+				     first ? "" : str_alloc,
+				     first ? "" : symbol_conf.field_sep ?: "; ",
+				     name) < 0)
+				return;
+
+			first = false;
+			free(str_alloc);
+			str_alloc = str;
+		}
+
+		gtk_tree_store_append(store, &iter, parent);
+
+		callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+		gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+		gtk_tree_store_set(store, &iter, col, str, -1);
+
+		free(str_alloc);
+	}
+}
+
+static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
+					  GtkTreeIter *parent, int col, u64 total)
+{
+	struct rb_node *nd;
+	bool has_single_node = (rb_first(root) == rb_last(root));
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct callchain_node *node;
+		struct callchain_list *chain;
+		GtkTreeIter iter, new_parent;
+		bool need_new_parent;
+		u64 child_total;
+
+		node = rb_entry(nd, struct callchain_node, rb_node);
+
+		new_parent = *parent;
+		need_new_parent = !has_single_node && (node->val_nr > 1);
+
+		list_for_each_entry(chain, &node->val, list) {
+			char buf[128];
+
+			gtk_tree_store_append(store, &iter, &new_parent);
+
+			callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+			gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+			callchain_list__sym_name(chain, buf, sizeof(buf), false);
+			gtk_tree_store_set(store, &iter, col, buf, -1);
+
+			if (need_new_parent) {
+				/*
+				 * Only show the top-most symbol in a callchain
+				 * if it's not the only callchain.
+				 */
+				new_parent = iter;
+				need_new_parent = false;
+			}
+		}
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL)
+			child_total = node->children_hit;
+		else
+			child_total = total;
+
+		/* Now 'iter' contains info of the last callchain_list */
+		perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
+					      child_total);
+	}
+}
+
+static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
+				    GtkTreeIter *parent, int col, u64 total)
+{
+	if (callchain_param.mode == CHAIN_FLAT)
+		perf_gtk__add_callchain_flat(root, store, parent, col, total);
+	else if (callchain_param.mode == CHAIN_FOLDED)
+		perf_gtk__add_callchain_folded(root, store, parent, col, total);
+	else
+		perf_gtk__add_callchain_graph(root, store, parent, col, total);
+}
+
+static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
+			     GtkTreeViewColumn *col __maybe_unused,
+			     gpointer user_data __maybe_unused)
+{
+	bool expanded = gtk_tree_view_row_expanded(view, path);
+
+	if (expanded)
+		gtk_tree_view_collapse_row(view, path);
+	else
+		gtk_tree_view_expand_row(view, path, FALSE);
+}
+
+static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
+				 float min_pcnt)
+{
+	struct perf_hpp_fmt *fmt;
+	GType col_types[MAX_COLUMNS];
+	GtkCellRenderer *renderer;
+	GtkTreeStore *store;
+	struct rb_node *nd;
+	GtkWidget *view;
+	int col_idx;
+	int sym_col = -1;
+	int nr_cols;
+	char s[512];
+
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+	};
+
+	nr_cols = 0;
+
+	hists__for_each_format(hists, fmt)
+		col_types[nr_cols++] = G_TYPE_STRING;
+
+	store = gtk_tree_store_newv(nr_cols, col_types);
+
+	view = gtk_tree_view_new();
+
+	renderer = gtk_cell_renderer_text_new();
+
+	col_idx = 0;
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		/*
+		 * XXX no way to determine where symcol column is..
+		 *     Just use last column for now.
+		 */
+		if (perf_hpp__is_sort_entry(fmt))
+			sym_col = col_idx;
+
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, fmt->name,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
+
+	for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+		GtkTreeViewColumn *column;
+
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+		gtk_tree_view_column_set_resizable(column, TRUE);
+
+		if (col_idx == sym_col) {
+			gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+							  column);
+		}
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		GtkTreeIter iter;
+		u64 total = hists__total_period(h->hists);
+		float percent;
+
+		if (h->filtered)
+			continue;
+
+		percent = hist_entry__get_percent_limit(h);
+		if (percent < min_pcnt)
+			continue;
+
+		gtk_tree_store_append(store, &iter, NULL);
+
+		col_idx = 0;
+
+		hists__for_each_format(hists, fmt) {
+			if (perf_hpp__should_skip(fmt, h->hists))
+				continue;
+
+			if (fmt->color)
+				fmt->color(fmt, &hpp, h);
+			else
+				fmt->entry(fmt, &hpp, h);
+
+			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
+		}
+
+		if (symbol_conf.use_callchain && hists__has(hists, sym)) {
+			if (callchain_param.mode == CHAIN_GRAPH_REL)
+				total = symbol_conf.cumulate_callchain ?
+					h->stat_acc->period : h->stat.period;
+
+			perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
+						sym_col, total);
+		}
+	}
+
+	gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+	g_signal_connect(view, "row-activated",
+			 G_CALLBACK(on_row_activated), NULL);
+	gtk_container_add(GTK_CONTAINER(window), view);
+}
+
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+					    struct rb_root *root,
+					    GtkTreeStore *store,
+					    GtkTreeIter *parent,
+					    struct perf_hpp *hpp,
+					    float min_pcnt)
+{
+	int col_idx = 0;
+	struct rb_node *node;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	u64 total = hists__total_period(hists);
+	int size;
+
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		GtkTreeIter iter;
+		float percent;
+		char *bf;
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		if (he->filtered)
+			continue;
+
+		percent = hist_entry__get_percent_limit(he);
+		if (percent < min_pcnt)
+			continue;
+
+		gtk_tree_store_append(store, &iter, parent);
+
+		col_idx = 0;
+
+		/* the first hpp_list_node is for overhead columns */
+		fmt_node = list_first_entry(&hists->hpp_formats,
+					    struct perf_hpp_list_node, list);
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (fmt->color)
+				fmt->color(fmt, hpp, he);
+			else
+				fmt->entry(fmt, hpp, he);
+
+			gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+		}
+
+		bf = hpp->buf;
+		size = hpp->size;
+		perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+			int ret;
+
+			if (fmt->color)
+				ret = fmt->color(fmt, hpp, he);
+			else
+				ret = fmt->entry(fmt, hpp, he);
+
+			snprintf(hpp->buf + ret, hpp->size - ret, "  ");
+			advance_hpp(hpp, ret + 2);
+		}
+
+		gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1);
+
+		if (!he->leaf) {
+			hpp->buf = bf;
+			hpp->size = size;
+
+			perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+							store, &iter, hpp,
+							min_pcnt);
+
+			if (!hist_entry__has_hierarchy_children(he, min_pcnt)) {
+				char buf[32];
+				GtkTreeIter child;
+
+				snprintf(buf, sizeof(buf), "no entry >= %.2f%%",
+					 min_pcnt);
+
+				gtk_tree_store_append(store, &child, &iter);
+				gtk_tree_store_set(store, &child, col_idx, buf, -1);
+			}
+		}
+
+		if (symbol_conf.use_callchain && he->leaf) {
+			if (callchain_param.mode == CHAIN_GRAPH_REL)
+				total = symbol_conf.cumulate_callchain ?
+					he->stat_acc->period : he->stat.period;
+
+			perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+						col_idx, total);
+		}
+	}
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+				     float min_pcnt)
+{
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	GType col_types[MAX_COLUMNS];
+	GtkCellRenderer *renderer;
+	GtkTreeStore *store;
+	GtkWidget *view;
+	int col_idx;
+	int nr_cols = 0;
+	char s[512];
+	char buf[512];
+	bool first_node, first_col;
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+	};
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
+	col_types[nr_cols++] = G_TYPE_STRING;
+
+	store = gtk_tree_store_newv(nr_cols, col_types);
+	view = gtk_tree_view_new();
+	renderer = gtk_cell_renderer_text_new();
+
+	col_idx = 0;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, fmt->name,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
+
+	/* construct merged column header since sort keys share single column */
+	buf[0] = '\0';
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node)
+			strcat(buf, " / ");
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				strcat(buf, "+");
+			first_col = false;
+
+			fmt->header(fmt, &hpp, hists, 0, NULL);
+			strcat(buf, ltrim(rtrim(hpp.buf)));
+		}
+	}
+
+	gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+						    -1, buf,
+						    renderer, "markup",
+						    col_idx++, NULL);
+
+	for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+		GtkTreeViewColumn *column;
+
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+		gtk_tree_view_column_set_resizable(column, TRUE);
+
+		if (col_idx == 0) {
+			gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+							  column);
+		}
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+					NULL, &hpp, min_pcnt);
+
+	gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+	g_signal_connect(view, "row-activated",
+			 G_CALLBACK(on_row_activated), NULL);
+	gtk_container_add(GTK_CONTAINER(window), view);
+}
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
+				  const char *help,
+				  struct hist_browser_timer *hbt __maybe_unused,
+				  float min_pcnt)
+{
+	struct perf_evsel *pos;
+	GtkWidget *vbox;
+	GtkWidget *notebook;
+	GtkWidget *info_bar;
+	GtkWidget *statbar;
+	GtkWidget *window;
+
+	signal(SIGSEGV, perf_gtk__signal);
+	signal(SIGFPE,  perf_gtk__signal);
+	signal(SIGINT,  perf_gtk__signal);
+	signal(SIGQUIT, perf_gtk__signal);
+	signal(SIGTERM, perf_gtk__signal);
+
+	window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+
+	gtk_window_set_title(GTK_WINDOW(window), "perf report");
+
+	g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+	pgctx = perf_gtk__activate_context(window);
+	if (!pgctx)
+		return -1;
+
+	vbox = gtk_vbox_new(FALSE, 0);
+
+	notebook = gtk_notebook_new();
+
+	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+	info_bar = perf_gtk__setup_info_bar();
+	if (info_bar)
+		gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
+
+	statbar = perf_gtk__setup_statusbar();
+	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+	gtk_container_add(GTK_CONTAINER(window), vbox);
+
+	evlist__for_each_entry(evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+		const char *evname = perf_evsel__name(pos);
+		GtkWidget *scrolled_window;
+		GtkWidget *tab_label;
+		char buf[512];
+		size_t size = sizeof(buf);
+
+		if (symbol_conf.event_group) {
+			if (!perf_evsel__is_group_leader(pos))
+				continue;
+
+			if (pos->nr_members > 1) {
+				perf_evsel__group_desc(pos, buf, size);
+				evname = buf;
+			}
+		}
+
+		scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+
+		gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+							GTK_POLICY_AUTOMATIC,
+							GTK_POLICY_AUTOMATIC);
+
+		if (symbol_conf.report_hierarchy)
+			perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+		else
+			perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+
+		tab_label = gtk_label_new(evname);
+
+		gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
+	}
+
+	gtk_widget_show_all(window);
+
+	perf_gtk__resize_window(window);
+
+	gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+	ui_helpline__push(help);
+
+	gtk_main();
+
+	perf_gtk__deactivate_context(&pgctx);
+
+	return 0;
+}
diff --git a/ui/gtk/progress.c b/ui/gtk/progress.c
new file mode 100644
index 0000000..b6ad885
--- /dev/null
+++ b/ui/gtk/progress.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+
+#include "gtk.h"
+#include "../progress.h"
+#include "util.h"
+
+static GtkWidget *dialog;
+static GtkWidget *progress;
+
+static void gtk_ui_progress__update(struct ui_progress *p)
+{
+	double fraction = p->total ? 1.0 * p->curr / p->total : 0.0;
+	char buf[1024];
+
+	if (dialog == NULL) {
+		GtkWidget *vbox = gtk_vbox_new(TRUE, 5);
+		GtkWidget *label = gtk_label_new(p->title);
+
+		dialog = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+		progress = gtk_progress_bar_new();
+
+		gtk_box_pack_start(GTK_BOX(vbox), label, TRUE, FALSE, 3);
+		gtk_box_pack_start(GTK_BOX(vbox), progress, TRUE, TRUE, 3);
+
+		gtk_container_add(GTK_CONTAINER(dialog), vbox);
+
+		gtk_window_set_title(GTK_WINDOW(dialog), "perf");
+		gtk_window_resize(GTK_WINDOW(dialog), 300, 80);
+		gtk_window_set_position(GTK_WINDOW(dialog), GTK_WIN_POS_CENTER);
+
+		gtk_widget_show_all(dialog);
+	}
+
+	gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction);
+	snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, p->curr, p->total);
+	gtk_progress_bar_set_text(GTK_PROGRESS_BAR(progress), buf);
+
+	/* we didn't call gtk_main yet, so do it manually */
+	while (gtk_events_pending())
+		gtk_main_iteration();
+}
+
+static void gtk_ui_progress__finish(void)
+{
+	/* this will also destroy all of its children */
+	gtk_widget_destroy(dialog);
+
+	dialog = NULL;
+}
+
+static struct ui_progress_ops gtk_ui_progress__ops = {
+	.update		= gtk_ui_progress__update,
+	.finish		= gtk_ui_progress__finish,
+};
+
+void gtk_ui_progress__init(void)
+{
+	ui_progress__ops = &gtk_ui_progress__ops;
+}
diff --git a/ui/gtk/setup.c b/ui/gtk/setup.c
new file mode 100644
index 0000000..506e73b
--- /dev/null
+++ b/ui/gtk/setup.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "gtk.h"
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+
+extern struct perf_error_ops perf_gtk_eops;
+
+int perf_gtk__init(void)
+{
+	perf_error__register(&perf_gtk_eops);
+	perf_gtk__init_helpline();
+	gtk_ui_progress__init();
+	perf_gtk__init_hpp();
+
+	return gtk_init_check(NULL, NULL) ? 0 : -1;
+}
+
+void perf_gtk__exit(bool wait_for_ok __maybe_unused)
+{
+	if (!perf_gtk__is_active_context(pgctx))
+		return;
+	perf_error__unregister(&perf_gtk_eops);
+	gtk_main_quit();
+}
diff --git a/ui/gtk/util.c b/ui/gtk/util.c
new file mode 100644
index 0000000..7250d81
--- /dev/null
+++ b/ui/gtk/util.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../util.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+#include "gtk.h"
+
+#include <string.h>
+
+
+struct perf_gtk_context *pgctx;
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window)
+{
+	struct perf_gtk_context *ctx;
+
+	ctx = malloc(sizeof(*pgctx));
+	if (ctx)
+		ctx->main_window = window;
+
+	return ctx;
+}
+
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx)
+{
+	if (!perf_gtk__is_active_context(*ctx))
+		return -1;
+
+	zfree(ctx);
+	return 0;
+}
+
+static int perf_gtk__error(const char *format, va_list args)
+{
+	char *msg;
+	GtkWidget *dialog;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Error:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window),
+					GTK_DIALOG_DESTROY_WITH_PARENT,
+					GTK_MESSAGE_ERROR,
+					GTK_BUTTONS_CLOSE,
+					"<b>Error</b>\n\n%s", msg);
+	gtk_dialog_run(GTK_DIALOG(dialog));
+
+	gtk_widget_destroy(dialog);
+	free(msg);
+	return 0;
+}
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+static int perf_gtk__warning_info_bar(const char *format, va_list args)
+{
+	char *msg;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Warning:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg);
+	gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar),
+				      GTK_MESSAGE_WARNING);
+	gtk_widget_show(pgctx->info_bar);
+
+	free(msg);
+	return 0;
+}
+#else
+static int perf_gtk__warning_statusbar(const char *format, va_list args)
+{
+	char *msg, *p;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Warning:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar),
+			  pgctx->statbar_ctx_id);
+
+	/* Only first line can be displayed */
+	p = strchr(msg, '\n');
+	if (p)
+		*p = '\0';
+
+	gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar),
+			   pgctx->statbar_ctx_id, msg);
+
+	free(msg);
+	return 0;
+}
+#endif
+
+struct perf_error_ops perf_gtk_eops = {
+	.error		= perf_gtk__error,
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+	.warning	= perf_gtk__warning_info_bar,
+#else
+	.warning	= perf_gtk__warning_statusbar,
+#endif
+};
diff --git a/ui/helpline.c b/ui/helpline.c
new file mode 100644
index 0000000..b3c4214
--- /dev/null
+++ b/ui/helpline.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../debug.h"
+#include "helpline.h"
+#include "ui.h"
+#include "../util.h"
+
+char ui_helpline__current[512];
+
+static void nop_helpline__pop(void)
+{
+}
+
+static void nop_helpline__push(const char *msg __maybe_unused)
+{
+}
+
+static int nop_helpline__show(const char *fmt __maybe_unused,
+			       va_list ap __maybe_unused)
+{
+	return 0;
+}
+
+static struct ui_helpline default_helpline_fns = {
+	.pop	= nop_helpline__pop,
+	.push	= nop_helpline__push,
+	.show	= nop_helpline__show,
+};
+
+struct ui_helpline *helpline_fns = &default_helpline_fns;
+
+void ui_helpline__pop(void)
+{
+	helpline_fns->pop();
+}
+
+void ui_helpline__push(const char *msg)
+{
+	helpline_fns->push(msg);
+}
+
+void ui_helpline__vpush(const char *fmt, va_list ap)
+{
+	char *s;
+
+	if (vasprintf(&s, fmt, ap) < 0)
+		vfprintf(stderr, fmt, ap);
+	else {
+		ui_helpline__push(s);
+		free(s);
+	}
+}
+
+void ui_helpline__fpush(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	ui_helpline__vpush(fmt, ap);
+	va_end(ap);
+}
+
+void ui_helpline__puts(const char *msg)
+{
+	ui_helpline__pop();
+	ui_helpline__push(msg);
+}
+
+int ui_helpline__vshow(const char *fmt, va_list ap)
+{
+	return helpline_fns->show(fmt, ap);
+}
+
+void ui_helpline__printf(const char *fmt, ...)
+{
+	va_list ap;
+
+	ui_helpline__pop();
+	va_start(ap, fmt);
+	ui_helpline__vpush(fmt, ap);
+	va_end(ap);
+}
diff --git a/ui/helpline.h b/ui/helpline.h
new file mode 100644
index 0000000..8f775a0
--- /dev/null
+++ b/ui/helpline.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_HELPLINE_H_
+#define _PERF_UI_HELPLINE_H_ 1
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "../util/cache.h"
+
+struct ui_helpline {
+	void (*pop)(void);
+	void (*push)(const char *msg);
+	int  (*show)(const char *fmt, va_list ap);
+};
+
+extern struct ui_helpline *helpline_fns;
+
+void ui_helpline__init(void);
+
+void ui_helpline__pop(void);
+void ui_helpline__push(const char *msg);
+void ui_helpline__vpush(const char *fmt, va_list ap);
+void ui_helpline__fpush(const char *fmt, ...);
+void ui_helpline__puts(const char *msg);
+void ui_helpline__printf(const char *fmt, ...);
+int  ui_helpline__vshow(const char *fmt, va_list ap);
+
+extern char ui_helpline__current[512];
+extern char ui_helpline__last_msg[];
+
+#endif /* _PERF_UI_HELPLINE_H_ */
diff --git a/ui/hist.c b/ui/hist.c
new file mode 100644
index 0000000..706f6f1
--- /dev/null
+++ b/ui/hist.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <math.h>
+#include <linux/compiler.h>
+
+#include "../util/hist.h"
+#include "../util/util.h"
+#include "../util/sort.h"
+#include "../util/evsel.h"
+#include "../util/evlist.h"
+
+/* hist period print (hpp) functions */
+
+#define hpp__call_print_fn(hpp, fn, fmt, ...)			\
+({								\
+	int __ret = fn(hpp, fmt, ##__VA_ARGS__);		\
+	advance_hpp(hpp, __ret);				\
+	__ret;							\
+})
+
+static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
+		      hpp_field_fn get_field, const char *fmt, int len,
+		      hpp_snprint_fn print_fn, bool fmt_percent)
+{
+	int ret;
+	struct hists *hists = he->hists;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	char *buf = hpp->buf;
+	size_t size = hpp->size;
+
+	if (fmt_percent) {
+		double percent = 0.0;
+		u64 total = hists__total_period(hists);
+
+		if (total)
+			percent = 100.0 * get_field(he) / total;
+
+		ret = hpp__call_print_fn(hpp, print_fn, fmt, len, percent);
+	} else
+		ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
+
+	if (perf_evsel__is_group_event(evsel)) {
+		int prev_idx, idx_delta;
+		struct hist_entry *pair;
+		int nr_members = evsel->nr_members;
+
+		prev_idx = perf_evsel__group_idx(evsel);
+
+		list_for_each_entry(pair, &he->pairs.head, pairs.node) {
+			u64 period = get_field(pair);
+			u64 total = hists__total_period(pair->hists);
+
+			if (!total)
+				continue;
+
+			evsel = hists_to_evsel(pair->hists);
+			idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+
+			while (idx_delta--) {
+				/*
+				 * zero-fill group members in the middle which
+				 * have no sample
+				 */
+				if (fmt_percent) {
+					ret += hpp__call_print_fn(hpp, print_fn,
+								  fmt, len, 0.0);
+				} else {
+					ret += hpp__call_print_fn(hpp, print_fn,
+								  fmt, len, 0ULL);
+				}
+			}
+
+			if (fmt_percent) {
+				ret += hpp__call_print_fn(hpp, print_fn, fmt, len,
+							  100.0 * period / total);
+			} else {
+				ret += hpp__call_print_fn(hpp, print_fn, fmt,
+							  len, period);
+			}
+
+			prev_idx = perf_evsel__group_idx(evsel);
+		}
+
+		idx_delta = nr_members - prev_idx - 1;
+
+		while (idx_delta--) {
+			/*
+			 * zero-fill group members at last which have no sample
+			 */
+			if (fmt_percent) {
+				ret += hpp__call_print_fn(hpp, print_fn,
+							  fmt, len, 0.0);
+			} else {
+				ret += hpp__call_print_fn(hpp, print_fn,
+							  fmt, len, 0ULL);
+			}
+		}
+	}
+
+	/*
+	 * Restore original buf and size as it's where caller expects
+	 * the result will be saved.
+	 */
+	hpp->buf = buf;
+	hpp->size = size;
+
+	return ret;
+}
+
+int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	     struct hist_entry *he, hpp_field_fn get_field,
+	     const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+{
+	int len = fmt->user_len ?: fmt->len;
+
+	if (symbol_conf.field_sep) {
+		return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
+				  print_fn, fmt_percent);
+	}
+
+	if (fmt_percent)
+		len -= 2; /* 2 for a space and a % sign */
+	else
+		len -= 1;
+
+	return  __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+}
+
+int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		 struct hist_entry *he, hpp_field_fn get_field,
+		 const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+{
+	if (!symbol_conf.cumulate_callchain) {
+		int len = fmt->user_len ?: fmt->len;
+		return snprintf(hpp->buf, hpp->size, " %*s", len - 1, "N/A");
+	}
+
+	return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmt_percent);
+}
+
+static int field_cmp(u64 field_a, u64 field_b)
+{
+	if (field_a > field_b)
+		return 1;
+	if (field_a < field_b)
+		return -1;
+	return 0;
+}
+
+static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
+		       hpp_field_fn get_field)
+{
+	s64 ret;
+	int i, nr_members;
+	struct perf_evsel *evsel;
+	struct hist_entry *pair;
+	u64 *fields_a, *fields_b;
+
+	ret = field_cmp(get_field(a), get_field(b));
+	if (ret || !symbol_conf.event_group)
+		return ret;
+
+	evsel = hists_to_evsel(a->hists);
+	if (!perf_evsel__is_group_event(evsel))
+		return ret;
+
+	nr_members = evsel->nr_members;
+	fields_a = calloc(nr_members, sizeof(*fields_a));
+	fields_b = calloc(nr_members, sizeof(*fields_b));
+
+	if (!fields_a || !fields_b)
+		goto out;
+
+	list_for_each_entry(pair, &a->pairs.head, pairs.node) {
+		evsel = hists_to_evsel(pair->hists);
+		fields_a[perf_evsel__group_idx(evsel)] = get_field(pair);
+	}
+
+	list_for_each_entry(pair, &b->pairs.head, pairs.node) {
+		evsel = hists_to_evsel(pair->hists);
+		fields_b[perf_evsel__group_idx(evsel)] = get_field(pair);
+	}
+
+	for (i = 1; i < nr_members; i++) {
+		ret = field_cmp(fields_a[i], fields_b[i]);
+		if (ret)
+			break;
+	}
+
+out:
+	free(fields_a);
+	free(fields_b);
+
+	return ret;
+}
+
+static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
+			   hpp_field_fn get_field)
+{
+	s64 ret = 0;
+
+	if (symbol_conf.cumulate_callchain) {
+		/*
+		 * Put caller above callee when they have equal period.
+		 */
+		ret = field_cmp(get_field(a), get_field(b));
+		if (ret)
+			return ret;
+
+		if (a->thread != b->thread || !symbol_conf.use_callchain)
+			return 0;
+
+		ret = b->callchain->max_depth - a->callchain->max_depth;
+		if (callchain_param.order == ORDER_CALLER)
+			ret = -ret;
+	}
+	return ret;
+}
+
+static int hpp__width_fn(struct perf_hpp_fmt *fmt,
+			 struct perf_hpp *hpp __maybe_unused,
+			 struct hists *hists)
+{
+	int len = fmt->user_len ?: fmt->len;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+
+	if (symbol_conf.event_group)
+		len = max(len, evsel->nr_members * fmt->len);
+
+	if (len < (int)strlen(fmt->name))
+		len = strlen(fmt->name);
+
+	return len;
+}
+
+static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			  struct hists *hists, int line __maybe_unused,
+			  int *span __maybe_unused)
+{
+	int len = hpp__width_fn(fmt, hpp, hists);
+	return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
+}
+
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+	va_list args;
+	ssize_t ssize = hpp->size;
+	double percent;
+	int ret, len;
+
+	va_start(args, fmt);
+	len = va_arg(args, int);
+	percent = va_arg(args, double);
+	ret = percent_color_len_snprintf(hpp->buf, hpp->size, fmt, len, percent);
+	va_end(args);
+
+	return (ret >= ssize) ? (ssize - 1) : ret;
+}
+
+static int hpp_entry_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+	va_list args;
+	ssize_t ssize = hpp->size;
+	int ret;
+
+	va_start(args, fmt);
+	ret = vsnprintf(hpp->buf, hpp->size, fmt, args);
+	va_end(args);
+
+	return (ret >= ssize) ? (ssize - 1) : ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field)					\
+static u64 he_get_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat._field;							\
+}										\
+										\
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt,				\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%",		\
+			hpp_color_scnprintf, true);				\
+}
+
+#define __HPP_ENTRY_PERCENT_FN(_type, _field)					\
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt,				\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%",		\
+			hpp_entry_scnprintf, true);				\
+}
+
+#define __HPP_SORT_FN(_type, _field)						\
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, 	\
+				 struct hist_entry *a, struct hist_entry *b) 	\
+{										\
+	return __hpp__sort(a, b, he_get_##_field);				\
+}
+
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+static u64 he_get_acc_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat_acc->_field;						\
+}										\
+										\
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt,				\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", 	\
+			    hpp_color_scnprintf, true);				\
+}
+
+#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field)				\
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt,				\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%",	\
+			    hpp_entry_scnprintf, true);				\
+}
+
+#define __HPP_SORT_ACC_FN(_type, _field)					\
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, 	\
+				 struct hist_entry *a, struct hist_entry *b) 	\
+{										\
+	return __hpp__sort_acc(a, b, he_get_acc_##_field);			\
+}
+
+#define __HPP_ENTRY_RAW_FN(_type, _field)					\
+static u64 he_get_raw_##_field(struct hist_entry *he)				\
+{										\
+	return he->stat._field;							\
+}										\
+										\
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt,				\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
+{										\
+	return hpp__fmt(fmt, hpp, he, he_get_raw_##_field, " %*"PRIu64, 	\
+			hpp_entry_scnprintf, false);				\
+}
+
+#define __HPP_SORT_RAW_FN(_type, _field)					\
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, 	\
+				 struct hist_entry *a, struct hist_entry *b) 	\
+{										\
+	return __hpp__sort(a, b, he_get_raw_##_field);				\
+}
+
+
+#define HPP_PERCENT_FNS(_type, _field)					\
+__HPP_COLOR_PERCENT_FN(_type, _field)					\
+__HPP_ENTRY_PERCENT_FN(_type, _field)					\
+__HPP_SORT_FN(_type, _field)
+
+#define HPP_PERCENT_ACC_FNS(_type, _field)				\
+__HPP_COLOR_ACC_PERCENT_FN(_type, _field)				\
+__HPP_ENTRY_ACC_PERCENT_FN(_type, _field)				\
+__HPP_SORT_ACC_FN(_type, _field)
+
+#define HPP_RAW_FNS(_type, _field)					\
+__HPP_ENTRY_RAW_FN(_type, _field)					\
+__HPP_SORT_RAW_FN(_type, _field)
+
+HPP_PERCENT_FNS(overhead, period)
+HPP_PERCENT_FNS(overhead_sys, period_sys)
+HPP_PERCENT_FNS(overhead_us, period_us)
+HPP_PERCENT_FNS(overhead_guest_sys, period_guest_sys)
+HPP_PERCENT_FNS(overhead_guest_us, period_guest_us)
+HPP_PERCENT_ACC_FNS(overhead_acc, period)
+
+HPP_RAW_FNS(samples, nr_events)
+HPP_RAW_FNS(period, period)
+
+static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+			    struct hist_entry *a __maybe_unused,
+			    struct hist_entry *b __maybe_unused)
+{
+	return 0;
+}
+
+static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a)
+{
+	return a->header == hpp__header_fn;
+}
+
+static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b))
+		return false;
+
+	return a->idx == b->idx;
+}
+
+#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx)		\
+	{						\
+		.name   = _name,			\
+		.header	= hpp__header_fn,		\
+		.width	= hpp__width_fn,		\
+		.color	= hpp__color_ ## _fn,		\
+		.entry	= hpp__entry_ ## _fn,		\
+		.cmp	= hpp__nop_cmp,			\
+		.collapse = hpp__nop_cmp,		\
+		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
+	}
+
+#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx)	\
+	{						\
+		.name   = _name,			\
+		.header	= hpp__header_fn,		\
+		.width	= hpp__width_fn,		\
+		.color	= hpp__color_ ## _fn,		\
+		.entry	= hpp__entry_ ## _fn,		\
+		.cmp	= hpp__nop_cmp,			\
+		.collapse = hpp__nop_cmp,		\
+		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
+	}
+
+#define HPP__PRINT_FNS(_name, _fn, _idx)		\
+	{						\
+		.name   = _name,			\
+		.header	= hpp__header_fn,		\
+		.width	= hpp__width_fn,		\
+		.entry	= hpp__entry_ ## _fn,		\
+		.cmp	= hpp__nop_cmp,			\
+		.collapse = hpp__nop_cmp,		\
+		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
+	}
+
+struct perf_hpp_fmt perf_hpp__format[] = {
+	HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD),
+	HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS),
+	HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US),
+	HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS),
+	HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
+	HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
+	HPP__PRINT_FNS("Samples", samples, SAMPLES),
+	HPP__PRINT_FNS("Period", period, PERIOD)
+};
+
+struct perf_hpp_list perf_hpp_list = {
+	.fields	= LIST_HEAD_INIT(perf_hpp_list.fields),
+	.sorts	= LIST_HEAD_INIT(perf_hpp_list.sorts),
+	.nr_header_lines = 1,
+};
+
+#undef HPP__COLOR_PRINT_FNS
+#undef HPP__COLOR_ACC_PRINT_FNS
+#undef HPP__PRINT_FNS
+
+#undef HPP_PERCENT_FNS
+#undef HPP_PERCENT_ACC_FNS
+#undef HPP_RAW_FNS
+
+#undef __HPP_HEADER_FN
+#undef __HPP_WIDTH_FN
+#undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_ENTRY_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+#undef __HPP_ENTRY_ACC_PERCENT_FN
+#undef __HPP_ENTRY_RAW_FN
+#undef __HPP_SORT_FN
+#undef __HPP_SORT_ACC_FN
+#undef __HPP_SORT_RAW_FN
+
+
+void perf_hpp__init(void)
+{
+	int i;
+
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		struct perf_hpp_fmt *fmt = &perf_hpp__format[i];
+
+		INIT_LIST_HEAD(&fmt->list);
+
+		/* sort_list may be linked by setup_sorting() */
+		if (fmt->sort_list.next == NULL)
+			INIT_LIST_HEAD(&fmt->sort_list);
+	}
+
+	/*
+	 * If user specified field order, no need to setup default fields.
+	 */
+	if (is_strict_order(field_order))
+		return;
+
+	if (symbol_conf.cumulate_callchain) {
+		hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC);
+		perf_hpp__format[PERF_HPP__OVERHEAD].name = "Self";
+	}
+
+	hpp_dimension__add_output(PERF_HPP__OVERHEAD);
+
+	if (symbol_conf.show_cpu_utilization) {
+		hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS);
+		hpp_dimension__add_output(PERF_HPP__OVERHEAD_US);
+
+		if (perf_guest) {
+			hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS);
+			hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US);
+		}
+	}
+
+	if (symbol_conf.show_nr_samples)
+		hpp_dimension__add_output(PERF_HPP__SAMPLES);
+
+	if (symbol_conf.show_total_period)
+		hpp_dimension__add_output(PERF_HPP__PERIOD);
+}
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format)
+{
+	list_add_tail(&format->list, &list->fields);
+}
+
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format)
+{
+	list_add_tail(&format->sort_list, &list->sorts);
+}
+
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format)
+{
+	list_add(&format->sort_list, &list->sorts);
+}
+
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+{
+	list_del_init(&format->list);
+}
+
+void perf_hpp__cancel_cumulate(void)
+{
+	struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp;
+
+	if (is_strict_order(field_order))
+		return;
+
+	ovh = &perf_hpp__format[PERF_HPP__OVERHEAD];
+	acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC];
+
+	perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+		if (acc->equal(acc, fmt)) {
+			perf_hpp__column_unregister(fmt);
+			continue;
+		}
+
+		if (ovh->equal(ovh, fmt))
+			fmt->name = "Overhead";
+	}
+}
+
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	return a->equal && a->equal(a, b);
+}
+
+void perf_hpp__setup_output_field(struct perf_hpp_list *list)
+{
+	struct perf_hpp_fmt *fmt;
+
+	/* append sort keys to output field */
+	perf_hpp_list__for_each_sort_list(list, fmt) {
+		struct perf_hpp_fmt *pos;
+
+		/* skip sort-only fields ("sort_compute" in perf diff) */
+		if (!fmt->entry && !fmt->color)
+			continue;
+
+		perf_hpp_list__for_each_format(list, pos) {
+			if (fmt_equal(fmt, pos))
+				goto next;
+		}
+
+		perf_hpp__column_register(fmt);
+next:
+		continue;
+	}
+}
+
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list)
+{
+	struct perf_hpp_fmt *fmt;
+
+	/* append output fields to sort keys */
+	perf_hpp_list__for_each_format(list, fmt) {
+		struct perf_hpp_fmt *pos;
+
+		perf_hpp_list__for_each_sort_list(list, pos) {
+			if (fmt_equal(fmt, pos))
+				goto next;
+		}
+
+		perf_hpp__register_sort_field(fmt);
+next:
+		continue;
+	}
+}
+
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+	/*
+	 * At this point fmt should be completely
+	 * unhooked, if not it's a bug.
+	 */
+	BUG_ON(!list_empty(&fmt->list));
+	BUG_ON(!list_empty(&fmt->sort_list));
+
+	if (fmt->free)
+		fmt->free(fmt);
+}
+
+void perf_hpp__reset_output_field(struct perf_hpp_list *list)
+{
+	struct perf_hpp_fmt *fmt, *tmp;
+
+	/* reset output fields */
+	perf_hpp_list__for_each_format_safe(list, fmt, tmp) {
+		list_del_init(&fmt->list);
+		list_del_init(&fmt->sort_list);
+		fmt_free(fmt);
+	}
+
+	/* reset sort keys */
+	perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) {
+		list_del_init(&fmt->list);
+		list_del_init(&fmt->sort_list);
+		fmt_free(fmt);
+	}
+}
+
+/*
+ * See hists__fprintf to match the column widths
+ */
+unsigned int hists__sort_list_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	int ret = 0;
+	bool first = true;
+	struct perf_hpp dummy_hpp;
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (first)
+			first = false;
+		else
+			ret += 2;
+
+		ret += fmt->width(fmt, &dummy_hpp, hists);
+	}
+
+	if (verbose > 0 && hists__has(hists, sym)) /* Addr + origin */
+		ret += 3 + BITS_PER_LONG / 4;
+
+	return ret;
+}
+
+unsigned int hists__overhead_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	int ret = 0;
+	bool first = true;
+	struct perf_hpp dummy_hpp;
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (first)
+			first = false;
+		else
+			ret += 2;
+
+		ret += fmt->width(fmt, &dummy_hpp, hists);
+	}
+
+	return ret;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+	if (perf_hpp__is_sort_entry(fmt))
+		return perf_hpp__reset_sort_width(fmt, hists);
+
+	if (perf_hpp__is_dynamic_entry(fmt))
+		return;
+
+	BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX);
+
+	switch (fmt->idx) {
+	case PERF_HPP__OVERHEAD:
+	case PERF_HPP__OVERHEAD_SYS:
+	case PERF_HPP__OVERHEAD_US:
+	case PERF_HPP__OVERHEAD_ACC:
+		fmt->len = 8;
+		break;
+
+	case PERF_HPP__OVERHEAD_GUEST_SYS:
+	case PERF_HPP__OVERHEAD_GUEST_US:
+		fmt->len = 9;
+		break;
+
+	case PERF_HPP__SAMPLES:
+	case PERF_HPP__PERIOD:
+		fmt->len = 12;
+		break;
+
+	default:
+		break;
+	}
+}
+
+void hists__reset_column_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *node;
+
+	hists__for_each_format(hists, fmt)
+		perf_hpp__reset_width(fmt, hists);
+
+	/* hierarchy entries have their own hpp list */
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format(&node->hpp, fmt)
+			perf_hpp__reset_width(fmt, hists);
+	}
+}
+
+void perf_hpp__set_user_width(const char *width_list_str)
+{
+	struct perf_hpp_fmt *fmt;
+	const char *ptr = width_list_str;
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		char *p;
+
+		int len = strtol(ptr, &p, 10);
+		fmt->user_len = len;
+
+		if (*p == ',')
+			ptr = p + 1;
+		else
+			break;
+	}
+}
+
+static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_list_node *node = NULL;
+	struct perf_hpp_fmt *fmt_copy;
+	bool found = false;
+	bool skip = perf_hpp__should_skip(fmt, hists);
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		if (node->level == fmt->level) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		node = malloc(sizeof(*node));
+		if (node == NULL)
+			return -1;
+
+		node->skip = skip;
+		node->level = fmt->level;
+		perf_hpp_list__init(&node->hpp);
+
+		hists->nr_hpp_node++;
+		list_add_tail(&node->list, &hists->hpp_formats);
+	}
+
+	fmt_copy = perf_hpp_fmt__dup(fmt);
+	if (fmt_copy == NULL)
+		return -1;
+
+	if (!skip)
+		node->skip = false;
+
+	list_add_tail(&fmt_copy->list, &node->hpp.fields);
+	list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts);
+
+	return 0;
+}
+
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	struct perf_hpp_fmt *fmt;
+	struct hists *hists;
+	int ret;
+
+	if (!symbol_conf.report_hierarchy)
+		return 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		hists = evsel__hists(evsel);
+
+		perf_hpp_list__for_each_sort_list(list, fmt) {
+			if (perf_hpp__is_dynamic_entry(fmt) &&
+			    !perf_hpp__defined_dynamic_entry(fmt, hists))
+				continue;
+
+			ret = add_hierarchy_fmt(hists, fmt);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/ui/keysyms.h b/ui/keysyms.h
new file mode 100644
index 0000000..fbfac29
--- /dev/null
+++ b/ui/keysyms.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_KEYSYMS_H_
+#define _PERF_KEYSYMS_H_ 1
+
+#include "libslang.h"
+
+#define K_DOWN	SL_KEY_DOWN
+#define K_END	SL_KEY_END
+#define K_ENTER	'\r'
+#define K_ESC	033
+#define K_F1	SL_KEY_F(1)
+#define K_HOME	SL_KEY_HOME
+#define K_LEFT	SL_KEY_LEFT
+#define K_PGDN	SL_KEY_NPAGE
+#define K_PGUP	SL_KEY_PPAGE
+#define K_RIGHT	SL_KEY_RIGHT
+#define K_TAB	'\t'
+#define K_UNTAB	SL_KEY_UNTAB
+#define K_UP	SL_KEY_UP
+#define K_BKSPC 0x7f
+#define K_DEL	SL_KEY_DELETE
+
+/* Not really keys */
+#define K_TIMER	 -1
+#define K_ERROR	 -2
+#define K_RESIZE -3
+#define K_SWITCH_INPUT_DATA -4
+
+#endif /* _PERF_KEYSYMS_H_ */
diff --git a/ui/libslang.h b/ui/libslang.h
new file mode 100644
index 0000000..c0686cd
--- /dev/null
+++ b/ui/libslang.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_SLANG_H_
+#define _PERF_UI_SLANG_H_ 1
+/*
+ * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks
+ * the build if it isn't defined. Use the equivalent one that glibc
+ * has on features.h.
+ */
+#include <features.h>
+#ifndef HAVE_LONG_LONG
+#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
+#endif
+#include <slang.h>
+
+#if SLANG_VERSION < 20104
+#define slsmg_printf(msg, args...) \
+	SLsmg_printf((char *)(msg), ##args)
+#define slsmg_vprintf(msg, vargs) \
+	SLsmg_vprintf((char *)(msg), vargs)
+#define slsmg_write_nstring(msg, len) \
+	SLsmg_write_nstring((char *)(msg), len)
+#define sltt_set_color(obj, name, fg, bg) \
+	SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
+#else
+#define slsmg_printf SLsmg_printf
+#define slsmg_vprintf SLsmg_vprintf
+#define slsmg_write_nstring SLsmg_write_nstring
+#define sltt_set_color SLtt_set_color
+#endif
+
+#define SL_KEY_UNTAB 0x1000
+
+#endif /* _PERF_UI_SLANG_H_ */
diff --git a/ui/progress.c b/ui/progress.c
new file mode 100644
index 0000000..bbfbc91
--- /dev/null
+++ b/ui/progress.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "../cache.h"
+#include "progress.h"
+
+static void null_progress__update(struct ui_progress *p __maybe_unused)
+{
+}
+
+static struct ui_progress_ops null_progress__ops =
+{
+	.update = null_progress__update,
+};
+
+struct ui_progress_ops *ui_progress__ops = &null_progress__ops;
+
+void ui_progress__update(struct ui_progress *p, u64 adv)
+{
+	u64 last = p->curr;
+
+	p->curr += adv;
+
+	if (p->curr >= p->next) {
+		u64 nr = DIV_ROUND_UP(p->curr - last, p->step);
+
+		p->next += nr * p->step;
+		ui_progress__ops->update(p);
+	}
+}
+
+void __ui_progress__init(struct ui_progress *p, u64 total,
+			 const char *title, bool size)
+{
+	p->curr = 0;
+	p->next = p->step = total / 16 ?: 1;
+	p->total = total;
+	p->title = title;
+	p->size  = size;
+
+	if (ui_progress__ops->init)
+		ui_progress__ops->init(p);
+}
+
+void ui_progress__finish(void)
+{
+	if (ui_progress__ops->finish)
+		ui_progress__ops->finish();
+}
diff --git a/ui/progress.h b/ui/progress.h
new file mode 100644
index 0000000..4f52c37
--- /dev/null
+++ b/ui/progress.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_PROGRESS_H_
+#define _PERF_UI_PROGRESS_H_ 1
+
+#include <linux/types.h>
+
+void ui_progress__finish(void);
+
+struct ui_progress {
+	const char *title;
+	u64 curr, next, step, total;
+	bool size;
+};
+
+void __ui_progress__init(struct ui_progress *p, u64 total,
+			 const char *title, bool size);
+
+#define ui_progress__init(p, total, title) \
+	__ui_progress__init(p, total, title, false)
+
+#define ui_progress__init_size(p, total, title) \
+	__ui_progress__init(p, total, title, true)
+
+void ui_progress__update(struct ui_progress *p, u64 adv);
+
+struct ui_progress_ops {
+	void (*init)(struct ui_progress *p);
+	void (*update)(struct ui_progress *p);
+	void (*finish)(void);
+};
+
+extern struct ui_progress_ops *ui_progress__ops;
+
+#endif
diff --git a/ui/setup.c b/ui/setup.c
new file mode 100644
index 0000000..44fe824
--- /dev/null
+++ b/ui/setup.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <dlfcn.h>
+
+#include "../util/cache.h"
+#include "../util/debug.h"
+#include "../util/hist.h"
+#include "../util/util.h"
+
+pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
+void *perf_gtk_handle;
+int use_browser = -1;
+
+#define PERF_GTK_DSO "libperf-gtk.so"
+
+#ifdef HAVE_GTK2_SUPPORT
+
+static int setup_gtk_browser(void)
+{
+	int (*perf_ui_init)(void);
+
+	if (perf_gtk_handle)
+		return 0;
+
+	perf_gtk_handle = dlopen(PERF_GTK_DSO, RTLD_LAZY);
+	if (perf_gtk_handle == NULL) {
+		char buf[PATH_MAX];
+		scnprintf(buf, sizeof(buf), "%s/%s", LIBDIR, PERF_GTK_DSO);
+		perf_gtk_handle = dlopen(buf, RTLD_LAZY);
+	}
+	if (perf_gtk_handle == NULL)
+		return -1;
+
+	perf_ui_init = dlsym(perf_gtk_handle, "perf_gtk__init");
+	if (perf_ui_init == NULL)
+		goto out_close;
+
+	if (perf_ui_init() == 0)
+		return 0;
+
+out_close:
+	dlclose(perf_gtk_handle);
+	return -1;
+}
+
+static void exit_gtk_browser(bool wait_for_ok)
+{
+	void (*perf_ui_exit)(bool);
+
+	if (perf_gtk_handle == NULL)
+		return;
+
+	perf_ui_exit = dlsym(perf_gtk_handle, "perf_gtk__exit");
+	if (perf_ui_exit == NULL)
+		goto out_close;
+
+	perf_ui_exit(wait_for_ok);
+
+out_close:
+	dlclose(perf_gtk_handle);
+
+	perf_gtk_handle = NULL;
+}
+#else
+static inline int setup_gtk_browser(void) { return -1; }
+static inline void exit_gtk_browser(bool wait_for_ok __maybe_unused) {}
+#endif
+
+int stdio__config_color(const struct option *opt __maybe_unused,
+			const char *mode, int unset __maybe_unused)
+{
+	perf_use_color_default = perf_config_colorbool("color.ui", mode, -1);
+	return 0;
+}
+
+void setup_browser(bool fallback_to_pager)
+{
+	if (use_browser < 2 && (!isatty(1) || dump_trace))
+		use_browser = 0;
+
+	/* default to TUI */
+	if (use_browser < 0)
+		use_browser = 1;
+
+	switch (use_browser) {
+	case 2:
+		if (setup_gtk_browser() == 0)
+			break;
+		printf("GTK browser requested but could not find %s\n",
+		       PERF_GTK_DSO);
+		sleep(1);
+		/* fall through */
+	case 1:
+		use_browser = 1;
+		if (ui__init() == 0)
+			break;
+		/* fall through */
+	default:
+		use_browser = 0;
+		if (fallback_to_pager)
+			setup_pager();
+		break;
+	}
+}
+
+void exit_browser(bool wait_for_ok)
+{
+	switch (use_browser) {
+	case 2:
+		exit_gtk_browser(wait_for_ok);
+		break;
+
+	case 1:
+		ui__exit(wait_for_ok);
+		break;
+
+	default:
+		break;
+	}
+}
diff --git a/ui/stdio/hist.c b/ui/stdio/hist.c
new file mode 100644
index 0000000..6832fcb
--- /dev/null
+++ b/ui/stdio/hist.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <linux/string.h>
+
+#include "../../util/util.h"
+#include "../../util/hist.h"
+#include "../../util/sort.h"
+#include "../../util/evsel.h"
+#include "../../util/srcline.h"
+#include "../../util/string2.h"
+#include "../../util/thread.h"
+#include "../../util/sane_ctype.h"
+
+static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
+{
+	int i;
+	int ret = fprintf(fp, "            ");
+
+	for (i = 0; i < left_margin; i++)
+		ret += fprintf(fp, " ");
+
+	return ret;
+}
+
+static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
+					  int left_margin)
+{
+	int i;
+	size_t ret = callchain__fprintf_left_margin(fp, left_margin);
+
+	for (i = 0; i < depth; i++)
+		if (depth_mask & (1 << i))
+			ret += fprintf(fp, "|          ");
+		else
+			ret += fprintf(fp, "           ");
+
+	ret += fprintf(fp, "\n");
+
+	return ret;
+}
+
+static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
+				     struct callchain_list *chain,
+				     int depth, int depth_mask, int period,
+				     u64 total_samples, int left_margin)
+{
+	int i;
+	size_t ret = 0;
+	char bf[1024], *alloc_str = NULL;
+	char buf[64];
+	const char *str;
+
+	ret += callchain__fprintf_left_margin(fp, left_margin);
+	for (i = 0; i < depth; i++) {
+		if (depth_mask & (1 << i))
+			ret += fprintf(fp, "|");
+		else
+			ret += fprintf(fp, " ");
+		if (!period && i == depth - 1) {
+			ret += fprintf(fp, "--");
+			ret += callchain_node__fprintf_value(node, fp, total_samples);
+			ret += fprintf(fp, "--");
+		} else
+			ret += fprintf(fp, "%s", "          ");
+	}
+
+	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
+
+	if (symbol_conf.show_branchflag_count) {
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
+
+		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
+			str = "Not enough memory!";
+		else
+			str = alloc_str;
+	}
+
+	fputs(str, fp);
+	fputc('\n', fp);
+	free(alloc_str);
+
+	return ret;
+}
+
+static struct symbol *rem_sq_bracket;
+static struct callchain_list rem_hits;
+
+static void init_rem_hits(void)
+{
+	rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
+	if (!rem_sq_bracket) {
+		fprintf(stderr, "Not enough memory to display remaining hits\n");
+		return;
+	}
+
+	strcpy(rem_sq_bracket->name, "[...]");
+	rem_hits.ms.sym = rem_sq_bracket;
+}
+
+static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
+					 u64 total_samples, int depth,
+					 int depth_mask, int left_margin)
+{
+	struct rb_node *node, *next;
+	struct callchain_node *child = NULL;
+	struct callchain_list *chain;
+	int new_depth_mask = depth_mask;
+	u64 remaining;
+	size_t ret = 0;
+	int i;
+	uint entries_printed = 0;
+	int cumul_count = 0;
+
+	remaining = total_samples;
+
+	node = rb_first(root);
+	while (node) {
+		u64 new_total;
+		u64 cumul;
+
+		child = rb_entry(node, struct callchain_node, rb_node);
+		cumul = callchain_cumul_hits(child);
+		remaining -= cumul;
+		cumul_count += callchain_cumul_counts(child);
+
+		/*
+		 * The depth mask manages the output of pipes that show
+		 * the depth. We don't want to keep the pipes of the current
+		 * level for the last child of this depth.
+		 * Except if we have remaining filtered hits. They will
+		 * supersede the last child
+		 */
+		next = rb_next(node);
+		if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
+			new_depth_mask &= ~(1 << (depth - 1));
+
+		/*
+		 * But we keep the older depth mask for the line separator
+		 * to keep the level link until we reach the last child
+		 */
+		ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
+						   left_margin);
+		i = 0;
+		list_for_each_entry(chain, &child->val, list) {
+			ret += ipchain__fprintf_graph(fp, child, chain, depth,
+						      new_depth_mask, i++,
+						      total_samples,
+						      left_margin);
+		}
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL)
+			new_total = child->children_hit;
+		else
+			new_total = total_samples;
+
+		ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total,
+						  depth + 1,
+						  new_depth_mask | (1 << depth),
+						  left_margin);
+		node = next;
+		if (++entries_printed == callchain_param.print_limit)
+			break;
+	}
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL &&
+		remaining && remaining != total_samples) {
+		struct callchain_node rem_node = {
+			.hit = remaining,
+		};
+
+		if (!rem_sq_bracket)
+			return ret;
+
+		if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
+			rem_node.count = child->parent->children_count - cumul_count;
+			if (rem_node.count <= 0)
+				return ret;
+		}
+
+		new_depth_mask &= ~(1 << (depth - 1));
+		ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
+					      new_depth_mask, 0, total_samples,
+					      left_margin);
+	}
+
+	return ret;
+}
+
+/*
+ * If have one single callchain root, don't bother printing
+ * its percentage (100 % in fractal mode and the same percentage
+ * than the hist in graph mode). This also avoid one level of column.
+ *
+ * However when percent-limit applied, it's possible that single callchain
+ * node have different (non-100% in fractal mode) percentage.
+ */
+static bool need_percent_display(struct rb_node *node, u64 parent_samples)
+{
+	struct callchain_node *cnode;
+
+	if (rb_next(node))
+		return true;
+
+	cnode = rb_entry(node, struct callchain_node, rb_node);
+	return callchain_cumul_hits(cnode) != parent_samples;
+}
+
+static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
+				       u64 total_samples, u64 parent_samples,
+				       int left_margin)
+{
+	struct callchain_node *cnode;
+	struct callchain_list *chain;
+	u32 entries_printed = 0;
+	bool printed = false;
+	struct rb_node *node;
+	int i = 0;
+	int ret = 0;
+	char bf[1024];
+
+	node = rb_first(root);
+	if (node && !need_percent_display(node, parent_samples)) {
+		cnode = rb_entry(node, struct callchain_node, rb_node);
+		list_for_each_entry(chain, &cnode->val, list) {
+			/*
+			 * If we sort by symbol, the first entry is the same than
+			 * the symbol. No need to print it otherwise it appears as
+			 * displayed twice.
+			 */
+			if (!i++ && field_order == NULL &&
+			    sort_order && strstarts(sort_order, "sym"))
+				continue;
+
+			if (!printed) {
+				ret += callchain__fprintf_left_margin(fp, left_margin);
+				ret += fprintf(fp, "|\n");
+				ret += callchain__fprintf_left_margin(fp, left_margin);
+				ret += fprintf(fp, "---");
+				left_margin += 3;
+				printed = true;
+			} else
+				ret += callchain__fprintf_left_margin(fp, left_margin);
+
+			ret += fprintf(fp, "%s",
+				       callchain_list__sym_name(chain, bf,
+								sizeof(bf),
+								false));
+
+			if (symbol_conf.show_branchflag_count)
+				ret += callchain_list_counts__printf_value(
+						chain, fp, NULL, 0);
+			ret += fprintf(fp, "\n");
+
+			if (++entries_printed == callchain_param.print_limit)
+				break;
+		}
+		root = &cnode->rb_root;
+	}
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		total_samples = parent_samples;
+
+	ret += __callchain__fprintf_graph(fp, root, total_samples,
+					  1, 1, left_margin);
+	if (ret) {
+		/* do not add a blank line if it printed nothing */
+		ret += fprintf(fp, "\n");
+	}
+
+	return ret;
+}
+
+static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node,
+					u64 total_samples)
+{
+	struct callchain_list *chain;
+	size_t ret = 0;
+	char bf[1024];
+
+	if (!node)
+		return 0;
+
+	ret += __callchain__fprintf_flat(fp, node->parent, total_samples);
+
+
+	list_for_each_entry(chain, &node->val, list) {
+		if (chain->ip >= PERF_CONTEXT_MAX)
+			continue;
+		ret += fprintf(fp, "                %s\n", callchain_list__sym_name(chain,
+					bf, sizeof(bf), false));
+	}
+
+	return ret;
+}
+
+static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
+				      u64 total_samples)
+{
+	size_t ret = 0;
+	u32 entries_printed = 0;
+	struct callchain_node *chain;
+	struct rb_node *rb_node = rb_first(tree);
+
+	while (rb_node) {
+		chain = rb_entry(rb_node, struct callchain_node, rb_node);
+
+		ret += fprintf(fp, "           ");
+		ret += callchain_node__fprintf_value(chain, fp, total_samples);
+		ret += fprintf(fp, "\n");
+		ret += __callchain__fprintf_flat(fp, chain, total_samples);
+		ret += fprintf(fp, "\n");
+		if (++entries_printed == callchain_param.print_limit)
+			break;
+
+		rb_node = rb_next(rb_node);
+	}
+
+	return ret;
+}
+
+static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+	const char *sep = symbol_conf.field_sep ?: ";";
+	struct callchain_list *chain;
+	size_t ret = 0;
+	char bf[1024];
+	bool first;
+
+	if (!node)
+		return 0;
+
+	ret += __callchain__fprintf_folded(fp, node->parent);
+
+	first = (ret == 0);
+	list_for_each_entry(chain, &node->val, list) {
+		if (chain->ip >= PERF_CONTEXT_MAX)
+			continue;
+		ret += fprintf(fp, "%s%s", first ? "" : sep,
+			       callchain_list__sym_name(chain,
+						bf, sizeof(bf), false));
+		first = false;
+	}
+
+	return ret;
+}
+
+static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
+					u64 total_samples)
+{
+	size_t ret = 0;
+	u32 entries_printed = 0;
+	struct callchain_node *chain;
+	struct rb_node *rb_node = rb_first(tree);
+
+	while (rb_node) {
+
+		chain = rb_entry(rb_node, struct callchain_node, rb_node);
+
+		ret += callchain_node__fprintf_value(chain, fp, total_samples);
+		ret += fprintf(fp, " ");
+		ret += __callchain__fprintf_folded(fp, chain);
+		ret += fprintf(fp, "\n");
+		if (++entries_printed == callchain_param.print_limit)
+			break;
+
+		rb_node = rb_next(rb_node);
+	}
+
+	return ret;
+}
+
+static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
+					    u64 total_samples, int left_margin,
+					    FILE *fp)
+{
+	u64 parent_samples = he->stat.period;
+
+	if (symbol_conf.cumulate_callchain)
+		parent_samples = he->stat_acc->period;
+
+	switch (callchain_param.mode) {
+	case CHAIN_GRAPH_REL:
+		return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+						parent_samples, left_margin);
+		break;
+	case CHAIN_GRAPH_ABS:
+		return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+						parent_samples, left_margin);
+		break;
+	case CHAIN_FLAT:
+		return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
+		break;
+	case CHAIN_FOLDED:
+		return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
+		break;
+	case CHAIN_NONE:
+		break;
+	default:
+		pr_err("Bad callchain mode\n");
+	}
+
+	return 0;
+}
+
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct perf_hpp_fmt *fmt;
+	char *start = hpp->buf;
+	int ret;
+	bool first = true;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	perf_hpp_list__for_each_format(hpp_list, fmt) {
+		if (perf_hpp__should_skip(fmt, he->hists))
+			continue;
+
+		/*
+		 * If there's no field_sep, we still need
+		 * to display initial '  '.
+		 */
+		if (!sep || !first) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		} else
+			first = false;
+
+		if (perf_hpp__use_color() && fmt->color)
+			ret = fmt->color(fmt, hpp, he);
+		else
+			ret = fmt->entry(fmt, hpp, he);
+
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+		advance_hpp(hpp, ret);
+	}
+
+	return hpp->buf - start;
+}
+
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+	return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+					 struct perf_hpp *hpp,
+					 struct hists *hists,
+					 FILE *fp)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	char *buf = hpp->buf;
+	size_t size = hpp->size;
+	int ret, printed = 0;
+	bool first = true;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		/*
+		 * If there's no field_sep, we still need
+		 * to display initial '  '.
+		 */
+		if (!sep || !first) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		} else
+			first = false;
+
+		if (perf_hpp__use_color() && fmt->color)
+			ret = fmt->color(fmt, hpp, he);
+		else
+			ret = fmt->entry(fmt, hpp, he);
+
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+		advance_hpp(hpp, ret);
+	}
+
+	if (!sep)
+		ret = scnprintf(hpp->buf, hpp->size, "%*s",
+				(hists->nr_hpp_node - 2) * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	printed += fprintf(fp, "%s", buf);
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		hpp->buf  = buf;
+		hpp->size = size;
+
+		/*
+		 * No need to call hist_entry__snprintf_alignment() since this
+		 * fmt is always the last column in the hierarchy mode.
+		 */
+		if (perf_hpp__use_color() && fmt->color)
+			fmt->color(fmt, hpp, he);
+		else
+			fmt->entry(fmt, hpp, he);
+
+		/*
+		 * dynamic entries are right-aligned but we want left-aligned
+		 * in the hierarchy mode
+		 */
+		printed += fprintf(fp, "%s%s", sep ?: "  ", ltrim(buf));
+	}
+	printed += putc('\n', fp);
+
+	if (symbol_conf.use_callchain && he->leaf) {
+		u64 total = hists__total_period(hists);
+
+		printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+		goto out;
+	}
+
+out:
+	return printed;
+}
+
+static int hist_entry__fprintf(struct hist_entry *he, size_t size,
+			       char *bf, size_t bfsz, FILE *fp,
+			       bool use_callchain)
+{
+	int ret;
+	int callchain_ret = 0;
+	struct perf_hpp hpp = {
+		.buf		= bf,
+		.size		= size,
+	};
+	struct hists *hists = he->hists;
+	u64 total_period = hists->stats.total_period;
+
+	if (size == 0 || size > bfsz)
+		size = hpp.size = bfsz;
+
+	if (symbol_conf.report_hierarchy)
+		return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
+
+	hist_entry__snprintf(he, &hpp);
+
+	ret = fprintf(fp, "%s\n", bf);
+
+	if (use_callchain)
+		callchain_ret = hist_entry_callchain__fprintf(he, total_period,
+							      0, fp);
+
+	ret += callchain_ret;
+
+	return ret;
+}
+
+static int print_hierarchy_indent(const char *sep, int indent,
+				  const char *line, FILE *fp)
+{
+	if (sep != NULL || indent < 2)
+		return 0;
+
+	return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
+}
+
+static int hists__fprintf_hierarchy_headers(struct hists *hists,
+					    struct perf_hpp *hpp, FILE *fp)
+{
+	bool first_node, first_col;
+	int indent;
+	int depth;
+	unsigned width = 0;
+	unsigned header_width = 0;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	const char *sep = symbol_conf.field_sep;
+
+	indent = hists->nr_hpp_node;
+
+	/* preserve max indent depth for column headers */
+	print_hierarchy_indent(sep, indent, spaces, fp);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		fmt->header(fmt, hpp, hists, 0, NULL);
+		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
+	}
+
+	/* combine sort headers with ' / ' */
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node)
+			header_width += fprintf(fp, " / ");
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				header_width += fprintf(fp, "+");
+			first_col = false;
+
+			fmt->header(fmt, hpp, hists, 0, NULL);
+
+			header_width += fprintf(fp, "%s", trim(hpp->buf));
+		}
+	}
+
+	fprintf(fp, "\n# ");
+
+	/* preserve max indent depth for initial dots */
+	print_hierarchy_indent(sep, indent, dots, fp);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+
+	first_col = true;
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (!first_col)
+			fprintf(fp, "%s", sep ?: "..");
+		first_col = false;
+
+		width = fmt->width(fmt, hpp, hists);
+		fprintf(fp, "%.*s", width, dots);
+	}
+
+	depth = 0;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		first_col = true;
+		width = depth * HIERARCHY_INDENT;
+
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				width++;  /* for '+' sign between column header */
+			first_col = false;
+
+			width += fmt->width(fmt, hpp, hists);
+		}
+
+		if (width > header_width)
+			header_width = width;
+
+		depth++;
+	}
+
+	fprintf(fp, "%s%-.*s", sep ?: "  ", header_width, dots);
+
+	fprintf(fp, "\n#\n");
+
+	return 2;
+}
+
+static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
+			 int line, FILE *fp)
+{
+	struct perf_hpp_fmt *fmt;
+	const char *sep = symbol_conf.field_sep;
+	bool first = true;
+	int span = 0;
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (!first && !span)
+			fprintf(fp, "%s", sep ?: "  ");
+		else
+			first = false;
+
+		fmt->header(fmt, hpp, hists, line, &span);
+
+		if (!span)
+			fprintf(fp, "%s", hpp->buf);
+	}
+}
+
+static int
+hists__fprintf_standard_headers(struct hists *hists,
+				struct perf_hpp *hpp,
+				FILE *fp)
+{
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	struct perf_hpp_fmt *fmt;
+	unsigned int width;
+	const char *sep = symbol_conf.field_sep;
+	bool first = true;
+	int line;
+
+	for (line = 0; line < hpp_list->nr_header_lines; line++) {
+		/* first # is displayed one level up */
+		if (line)
+			fprintf(fp, "# ");
+		fprintf_line(hists, hpp, line, fp);
+		fprintf(fp, "\n");
+	}
+
+	if (sep)
+		return hpp_list->nr_header_lines;
+
+	first = true;
+
+	fprintf(fp, "# ");
+
+	hists__for_each_format(hists, fmt) {
+		unsigned int i;
+
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (!first)
+			fprintf(fp, "%s", sep ?: "  ");
+		else
+			first = false;
+
+		width = fmt->width(fmt, hpp, hists);
+		for (i = 0; i < width; i++)
+			fprintf(fp, ".");
+	}
+
+	fprintf(fp, "\n");
+	fprintf(fp, "#\n");
+	return hpp_list->nr_header_lines + 2;
+}
+
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
+{
+	char bf[1024];
+	struct perf_hpp dummy_hpp = {
+		.buf	= bf,
+		.size	= sizeof(bf),
+	};
+
+	fprintf(fp, "# ");
+
+	if (symbol_conf.report_hierarchy)
+		return hists__fprintf_hierarchy_headers(hists, &dummy_hpp, fp);
+	else
+		return hists__fprintf_standard_headers(hists, &dummy_hpp, fp);
+
+}
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+		      int max_cols, float min_pcnt, FILE *fp,
+		      bool use_callchain)
+{
+	struct rb_node *nd;
+	size_t ret = 0;
+	const char *sep = symbol_conf.field_sep;
+	int nr_rows = 0;
+	size_t linesz;
+	char *line = NULL;
+	unsigned indent;
+
+	init_rem_hits();
+
+	hists__reset_column_width(hists);
+
+	if (symbol_conf.col_width_list_str)
+		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+
+	if (show_header)
+		nr_rows += hists__fprintf_headers(hists, fp);
+
+	if (max_rows && nr_rows >= max_rows)
+		goto out;
+
+	linesz = hists__sort_list_width(hists) + 3 + 1;
+	linesz += perf_hpp__color_overhead();
+	line = malloc(linesz);
+	if (line == NULL) {
+		ret = -1;
+		goto out;
+	}
+
+	indent = hists__overhead_width(hists) + 4;
+
+	for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		float percent;
+
+		if (h->filtered)
+			continue;
+
+		percent = hist_entry__get_percent_limit(h);
+		if (percent < min_pcnt)
+			continue;
+
+		ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, use_callchain);
+
+		if (max_rows && ++nr_rows >= max_rows)
+			break;
+
+		/*
+		 * If all children are filtered out or percent-limited,
+		 * display "no entry >= x.xx%" message.
+		 */
+		if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
+			int depth = hists->nr_hpp_node + h->depth + 1;
+
+			print_hierarchy_indent(sep, depth, spaces, fp);
+			fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
+
+			if (max_rows && ++nr_rows >= max_rows)
+				break;
+		}
+
+		if (h->ms.map == NULL && verbose > 1) {
+			__map_groups__fprintf_maps(h->thread->mg,
+						   MAP__FUNCTION, fp);
+			fprintf(fp, "%.10s end\n", graph_dotted_line);
+		}
+	}
+
+	free(line);
+out:
+	zfree(&rem_sq_bracket);
+
+	return ret;
+}
+
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
+{
+	int i;
+	size_t ret = 0;
+
+	for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
+		const char *name;
+
+		name = perf_event__name(i);
+		if (!strcmp(name, "UNKNOWN"))
+			continue;
+
+		ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]);
+	}
+
+	return ret;
+}
diff --git a/ui/tui/Build b/ui/tui/Build
new file mode 100644
index 0000000..9e4c6ca
--- /dev/null
+++ b/ui/tui/Build
@@ -0,0 +1,4 @@
+libperf-y += setup.o
+libperf-y += util.o
+libperf-y += helpline.o
+libperf-y += progress.o
diff --git a/ui/tui/helpline.c b/ui/tui/helpline.c
new file mode 100644
index 0000000..4ca799a
--- /dev/null
+++ b/ui/tui/helpline.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "../../util/debug.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../libslang.h"
+
+char ui_helpline__last_msg[1024];
+bool tui_helpline__set;
+
+static void tui_helpline__pop(void)
+{
+}
+
+static void tui_helpline__push(const char *msg)
+{
+	const size_t sz = sizeof(ui_helpline__current);
+
+	SLsmg_gotorc(SLtt_Screen_Rows - 1, 0);
+	SLsmg_set_color(0);
+	SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols);
+	SLsmg_refresh();
+	strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0';
+}
+
+static int tui_helpline__show(const char *format, va_list ap)
+{
+	int ret;
+	static int backlog;
+
+	pthread_mutex_lock(&ui__lock);
+	ret = vscnprintf(ui_helpline__last_msg + backlog,
+			sizeof(ui_helpline__last_msg) - backlog, format, ap);
+	backlog += ret;
+
+	tui_helpline__set = true;
+
+	if (ui_helpline__last_msg[backlog - 1] == '\n') {
+		ui_helpline__puts(ui_helpline__last_msg);
+		SLsmg_refresh();
+		backlog = 0;
+	}
+	pthread_mutex_unlock(&ui__lock);
+
+	return ret;
+}
+
+struct ui_helpline tui_helpline_fns = {
+	.pop	= tui_helpline__pop,
+	.push	= tui_helpline__push,
+	.show	= tui_helpline__show,
+};
+
+void ui_helpline__init(void)
+{
+	helpline_fns = &tui_helpline_fns;
+	ui_helpline__puts(" ");
+}
diff --git a/ui/tui/progress.c b/ui/tui/progress.c
new file mode 100644
index 0000000..bc134b8
--- /dev/null
+++ b/ui/tui/progress.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "../cache.h"
+#include "../progress.h"
+#include "../libslang.h"
+#include "../ui.h"
+#include "tui.h"
+#include "units.h"
+#include "../browser.h"
+
+static void __tui_progress__init(struct ui_progress *p)
+{
+	p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1;
+}
+
+static int get_title(struct ui_progress *p, char *buf, size_t size)
+{
+	char buf_cur[20];
+	char buf_tot[20];
+	int ret;
+
+	ret  = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr);
+	ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total);
+
+	return ret + scnprintf(buf, size, "%s [%s/%s]",
+			       p->title, buf_cur, buf_tot);
+}
+
+static void tui_progress__update(struct ui_progress *p)
+{
+	char buf[100], *title = (char *) p->title;
+	int bar, y;
+	/*
+	 * FIXME: We should have a per UI backend way of showing progress,
+	 * stdio will just show a percentage as NN%, etc.
+	 */
+	if (use_browser <= 0)
+		return;
+
+	if (p->total == 0)
+		return;
+
+	if (p->size) {
+		get_title(p, buf, sizeof(buf));
+		title = buf;
+	}
+
+	ui__refresh_dimensions(false);
+	pthread_mutex_lock(&ui__lock);
+	y = SLtt_Screen_Rows / 2 - 2;
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
+	SLsmg_gotorc(y++, 1);
+	SLsmg_write_string(title);
+	SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' ');
+	SLsmg_set_color(HE_COLORSET_SELECTED);
+	bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total;
+	SLsmg_fill_region(y, 1, 1, bar, ' ');
+	SLsmg_refresh();
+	pthread_mutex_unlock(&ui__lock);
+}
+
+static void tui_progress__finish(void)
+{
+	int y;
+
+	if (use_browser <= 0)
+		return;
+
+	ui__refresh_dimensions(false);
+	pthread_mutex_lock(&ui__lock);
+	y = SLtt_Screen_Rows / 2 - 2;
+	SLsmg_set_color(0);
+	SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' ');
+	SLsmg_refresh();
+	pthread_mutex_unlock(&ui__lock);
+}
+
+static struct ui_progress_ops tui_progress__ops = {
+	.init   = __tui_progress__init,
+	.update = tui_progress__update,
+	.finish = tui_progress__finish,
+};
+
+void tui_progress__init(void)
+{
+	ui_progress__ops = &tui_progress__ops;
+}
diff --git a/ui/tui/setup.c b/ui/tui/setup.c
new file mode 100644
index 0000000..d4ac416
--- /dev/null
+++ b/ui/tui/setup.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#ifdef HAVE_BACKTRACE_SUPPORT
+#include <execinfo.h>
+#endif
+
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../../util/util.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+#include "../keysyms.h"
+#include "tui.h"
+
+static volatile int ui__need_resize;
+
+extern struct perf_error_ops perf_tui_eops;
+extern bool tui_helpline__set;
+
+extern void hist_browser__init_hpp(void);
+
+void ui__refresh_dimensions(bool force)
+{
+	if (force || ui__need_resize) {
+		ui__need_resize = 0;
+		pthread_mutex_lock(&ui__lock);
+		SLtt_get_screen_size();
+		SLsmg_reinit_smg();
+		pthread_mutex_unlock(&ui__lock);
+	}
+}
+
+static void ui__sigwinch(int sig __maybe_unused)
+{
+	ui__need_resize = 1;
+}
+
+static void ui__setup_sigwinch(void)
+{
+	static bool done;
+
+	if (done)
+		return;
+
+	done = true;
+	pthread__unblock_sigwinch();
+	signal(SIGWINCH, ui__sigwinch);
+}
+
+int ui__getch(int delay_secs)
+{
+	struct timeval timeout, *ptimeout = delay_secs ? &timeout : NULL;
+	fd_set read_set;
+	int err, key;
+
+	ui__setup_sigwinch();
+
+	FD_ZERO(&read_set);
+	FD_SET(0, &read_set);
+
+	if (delay_secs) {
+		timeout.tv_sec = delay_secs;
+		timeout.tv_usec = 0;
+	}
+
+        err = select(1, &read_set, NULL, NULL, ptimeout);
+
+	if (err == 0)
+		return K_TIMER;
+
+	if (err == -1) {
+		if (errno == EINTR)
+			return K_RESIZE;
+		return K_ERROR;
+	}
+
+	key = SLang_getkey();
+	if (key != K_ESC)
+		return key;
+
+	FD_ZERO(&read_set);
+	FD_SET(0, &read_set);
+	timeout.tv_sec = 0;
+	timeout.tv_usec = 20;
+        err = select(1, &read_set, NULL, NULL, &timeout);
+	if (err == 0)
+		return K_ESC;
+
+	SLang_ungetkey(key);
+	return SLkp_getkey();
+}
+
+#ifdef HAVE_BACKTRACE_SUPPORT
+static void ui__signal_backtrace(int sig)
+{
+	void *stackdump[32];
+	size_t size;
+
+	ui__exit(false);
+	psignal(sig, "perf");
+
+	printf("-------- backtrace --------\n");
+	size = backtrace(stackdump, ARRAY_SIZE(stackdump));
+	backtrace_symbols_fd(stackdump, size, STDOUT_FILENO);
+
+	exit(0);
+}
+#else
+# define ui__signal_backtrace  ui__signal
+#endif
+
+static void ui__signal(int sig)
+{
+	ui__exit(false);
+	psignal(sig, "perf");
+	exit(0);
+}
+
+int ui__init(void)
+{
+	int err;
+
+	SLutf8_enable(-1);
+	SLtt_get_terminfo();
+	SLtt_get_screen_size();
+
+	err = SLsmg_init_smg();
+	if (err < 0)
+		goto out;
+	err = SLang_init_tty(-1, 0, 0);
+	if (err < 0)
+		goto out;
+
+	err = SLkp_init();
+	if (err < 0) {
+		pr_err("TUI initialization failed.\n");
+		goto out;
+	}
+
+	SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
+
+	signal(SIGSEGV, ui__signal_backtrace);
+	signal(SIGFPE, ui__signal_backtrace);
+	signal(SIGINT, ui__signal);
+	signal(SIGQUIT, ui__signal);
+	signal(SIGTERM, ui__signal);
+
+	perf_error__register(&perf_tui_eops);
+
+	ui_helpline__init();
+	ui_browser__init();
+	tui_progress__init();
+
+	hist_browser__init_hpp();
+out:
+	return err;
+}
+
+void ui__exit(bool wait_for_ok)
+{
+	if (wait_for_ok && tui_helpline__set)
+		ui__question_window("Fatal Error",
+				    ui_helpline__last_msg,
+				    "Press any key...", 0);
+
+	SLtt_set_cursor_visibility(1);
+	SLsmg_refresh();
+	SLsmg_reset_smg();
+	SLang_reset_tty();
+
+	perf_error__unregister(&perf_tui_eops);
+}
diff --git a/ui/tui/tui.h b/ui/tui/tui.h
new file mode 100644
index 0000000..8de06f6
--- /dev/null
+++ b/ui/tui/tui.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TUI_H_
+#define _PERF_TUI_H_ 1
+
+void tui_progress__init(void);
+
+#endif /* _PERF_TUI_H_ */
diff --git a/ui/tui/util.c b/ui/tui/util.c
new file mode 100644
index 0000000..b9794d6
--- /dev/null
+++ b/ui/tui/util.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/util.h"
+#include <signal.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/ttydefaults.h>
+
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../browser.h"
+#include "../keysyms.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+
+static void ui_browser__argv_write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	char **arg = entry;
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+	ui_browser__write_nstring(browser, *arg, browser->width);
+}
+
+static int popup_menu__run(struct ui_browser *menu)
+{
+	int key;
+
+	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(menu, 0);
+
+		switch (key) {
+		case K_RIGHT:
+		case K_ENTER:
+			key = menu->index;
+			break;
+		case K_LEFT:
+		case K_ESC:
+		case 'q':
+		case CTRL('c'):
+			key = -1;
+			break;
+		default:
+			continue;
+		}
+
+		break;
+	}
+
+	ui_browser__hide(menu);
+	return key;
+}
+
+int ui__popup_menu(int argc, char * const argv[])
+{
+	struct ui_browser menu = {
+		.entries    = (void *)argv,
+		.refresh    = ui_browser__argv_refresh,
+		.seek	    = ui_browser__argv_seek,
+		.write	    = ui_browser__argv_write,
+		.nr_entries = argc,
+	};
+
+	return popup_menu__run(&menu);
+}
+
+int ui_browser__input_window(const char *title, const char *text, char *input,
+			     const char *exit_msg, int delay_secs)
+{
+	int x, y, len, key;
+	int max_len = 60, nr_lines = 0;
+	static char buf[50];
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	pthread_mutex_lock(&ui__lock);
+
+	max_len += 2;
+	nr_lines += 8;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2;
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 7;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	y += nr_lines;
+	len = 5;
+	while (len--) {
+		SLsmg_gotorc(y + len - 1, x);
+		SLsmg_write_nstring((char *)" ", max_len);
+	}
+	SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
+
+	SLsmg_gotorc(y + 3, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+
+	pthread_mutex_unlock(&ui__lock);
+
+	x += 2;
+	len = 0;
+	key = ui__getch(delay_secs);
+	while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
+		pthread_mutex_lock(&ui__lock);
+
+		if (key == K_BKSPC) {
+			if (len == 0) {
+				pthread_mutex_unlock(&ui__lock);
+				goto next_key;
+			}
+			SLsmg_gotorc(y, x + --len);
+			SLsmg_write_char(' ');
+		} else {
+			buf[len] = key;
+			SLsmg_gotorc(y, x + len++);
+			SLsmg_write_char(key);
+		}
+		SLsmg_refresh();
+
+		pthread_mutex_unlock(&ui__lock);
+
+		/* XXX more graceful overflow handling needed */
+		if (len == sizeof(buf) - 1) {
+			ui_helpline__push("maximum size of symbol name reached!");
+			key = K_ENTER;
+			break;
+		}
+next_key:
+		key = ui__getch(delay_secs);
+	}
+
+	buf[len] = '\0';
+	strncpy(input, buf, len+1);
+	return key;
+}
+
+int ui__question_window(const char *title, const char *text,
+			const char *exit_msg, int delay_secs)
+{
+	int x, y;
+	int max_len = 0, nr_lines = 0;
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+		int len;
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	pthread_mutex_lock(&ui__lock);
+
+	max_len += 2;
+	nr_lines += 4;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2,
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 2;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	SLsmg_gotorc(y + nr_lines - 2, x);
+	SLsmg_write_nstring((char *)" ", max_len);
+	SLsmg_gotorc(y + nr_lines - 1, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+
+	pthread_mutex_unlock(&ui__lock);
+
+	return ui__getch(delay_secs);
+}
+
+int ui__help_window(const char *text)
+{
+	return ui__question_window("Help", text, "Press any key...", 0);
+}
+
+int ui__dialog_yesno(const char *msg)
+{
+	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0);
+}
+
+static int __ui__warning(const char *title, const char *format, va_list args)
+{
+	char *s;
+
+	if (vasprintf(&s, format, args) > 0) {
+		int key;
+
+		key = ui__question_window(title, s, "Press any key...", 0);
+		free(s);
+		return key;
+	}
+
+	fprintf(stderr, "%s\n", title);
+	vfprintf(stderr, format, args);
+	return K_ESC;
+}
+
+static int perf_tui__error(const char *format, va_list args)
+{
+	return __ui__warning("Error:", format, args);
+}
+
+static int perf_tui__warning(const char *format, va_list args)
+{
+	return __ui__warning("Warning:", format, args);
+}
+
+struct perf_error_ops perf_tui_eops = {
+	.error		= perf_tui__error,
+	.warning	= perf_tui__warning,
+};
diff --git a/ui/ui.h b/ui/ui.h
new file mode 100644
index 0000000..9b6fdf0
--- /dev/null
+++ b/ui/ui.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_H_
+#define _PERF_UI_H_ 1
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <linux/compiler.h>
+
+extern pthread_mutex_t ui__lock;
+extern void *perf_gtk_handle;
+
+extern int use_browser;
+
+void setup_browser(bool fallback_to_pager);
+void exit_browser(bool wait_for_ok);
+
+#ifdef HAVE_SLANG_SUPPORT
+int ui__init(void);
+void ui__exit(bool wait_for_ok);
+#else
+static inline int ui__init(void)
+{
+	return -1;
+}
+static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
+#endif
+
+void ui__refresh_dimensions(bool force);
+
+struct option;
+
+int stdio__config_color(const struct option *opt, const char *mode, int unset);
+
+#endif /* _PERF_UI_H_ */
diff --git a/ui/util.c b/ui/util.c
new file mode 100644
index 0000000..63bf06e
--- /dev/null
+++ b/ui/util.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "../debug.h"
+
+
+/*
+ * Default error logging functions
+ */
+static int perf_stdio__error(const char *format, va_list args)
+{
+	fprintf(stderr, "Error:\n");
+	vfprintf(stderr, format, args);
+	return 0;
+}
+
+static int perf_stdio__warning(const char *format, va_list args)
+{
+	fprintf(stderr, "Warning:\n");
+	vfprintf(stderr, format, args);
+	return 0;
+}
+
+static struct perf_error_ops default_eops =
+{
+	.error		= perf_stdio__error,
+	.warning	= perf_stdio__warning,
+};
+
+static struct perf_error_ops *perf_eops = &default_eops;
+
+
+int ui__error(const char *format, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, format);
+	ret = perf_eops->error(format, args);
+	va_end(args);
+
+	return ret;
+}
+
+int ui__warning(const char *format, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, format);
+	ret = perf_eops->warning(format, args);
+	va_end(args);
+
+	return ret;
+}
+
+/**
+ * perf_error__register - Register error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Register UI-specific error logging functions. Before calling this,
+ * other logging functions should be unregistered, if any.
+ */
+int perf_error__register(struct perf_error_ops *eops)
+{
+	if (perf_eops != &default_eops)
+		return -1;
+
+	perf_eops = eops;
+	return 0;
+}
+
+/**
+ * perf_error__unregister - Unregister error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Unregister already registered error logging functions.
+ */
+int perf_error__unregister(struct perf_error_ops *eops)
+{
+	if (perf_eops != eops)
+		return -1;
+
+	perf_eops = &default_eops;
+	return 0;
+}
diff --git a/ui/util.h b/ui/util.h
new file mode 100644
index 0000000..5e44223
--- /dev/null
+++ b/ui/util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_UTIL_H_
+#define _PERF_UI_UTIL_H_ 1
+
+#include <stdarg.h>
+
+int ui__getch(int delay_secs);
+int ui__popup_menu(int argc, char * const argv[]);
+int ui__help_window(const char *text);
+int ui__dialog_yesno(const char *msg);
+int ui__question_window(const char *title, const char *text,
+			const char *exit_msg, int delay_secs);
+
+struct perf_error_ops {
+	int (*error)(const char *format, va_list args);
+	int (*warning)(const char *format, va_list args);
+};
+
+int perf_error__register(struct perf_error_ops *eops);
+int perf_error__unregister(struct perf_error_ops *eops);
+
+#endif /* _PERF_UI_UTIL_H_ */
diff --git a/util/Build b/util/Build
new file mode 100644
index 0000000..8052373
--- /dev/null
+++ b/util/Build
@@ -0,0 +1,221 @@
+libperf-y += annotate.o
+libperf-y += block-range.o
+libperf-y += build-id.o
+libperf-y += config.o
+libperf-y += ctype.o
+libperf-y += db-export.o
+libperf-y += env.o
+libperf-y += event.o
+libperf-y += evlist.o
+libperf-y += evsel.o
+libperf-y += evsel_fprintf.o
+libperf-y += find_bit.o
+libperf-y += kallsyms.o
+libperf-y += levenshtein.o
+libperf-y += llvm-utils.o
+libperf-y += mmap.o
+libperf-y += memswap.o
+libperf-y += parse-events.o
+libperf-y += perf_regs.o
+libperf-y += path.o
+libperf-y += print_binary.o
+libperf-y += rbtree.o
+libperf-y += libstring.o
+libperf-y += bitmap.o
+libperf-y += hweight.o
+libperf-y += smt.o
+libperf-y += quote.o
+libperf-y += strbuf.o
+libperf-y += string.o
+libperf-y += strlist.o
+libperf-y += strfilter.o
+libperf-y += top.o
+libperf-y += usage.o
+libperf-y += dso.o
+libperf-y += symbol.o
+libperf-y += symbol_fprintf.o
+libperf-y += color.o
+libperf-y += metricgroup.o
+libperf-y += header.o
+libperf-y += callchain.o
+libperf-y += values.o
+libperf-y += debug.o
+libperf-y += machine.o
+libperf-y += map.o
+libperf-y += pstack.o
+libperf-y += session.o
+libperf-$(CONFIG_TRACE) += syscalltbl.o
+libperf-y += ordered-events.o
+libperf-y += namespaces.o
+libperf-y += comm.o
+libperf-y += thread.o
+libperf-y += thread_map.o
+libperf-y += trace-event-parse.o
+libperf-y += parse-events-flex.o
+libperf-y += parse-events-bison.o
+libperf-y += pmu.o
+libperf-y += pmu-flex.o
+libperf-y += pmu-bison.o
+libperf-y += trace-event-read.o
+libperf-y += trace-event-info.o
+libperf-y += trace-event-scripting.o
+libperf-y += trace-event.o
+libperf-y += svghelper.o
+libperf-y += sort.o
+libperf-y += hist.o
+libperf-y += util.o
+libperf-y += xyarray.o
+libperf-y += cpumap.o
+libperf-y += cgroup.o
+libperf-y += target.o
+libperf-y += rblist.o
+libperf-y += intlist.o
+libperf-y += vdso.o
+libperf-y += counts.o
+libperf-y += stat.o
+libperf-y += stat-shadow.o
+libperf-y += record.o
+libperf-y += srcline.o
+libperf-y += data.o
+libperf-y += tsc.o
+libperf-y += cloexec.o
+libperf-y += call-path.o
+libperf-y += rwsem.o
+libperf-y += thread-stack.o
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
+libperf-$(CONFIG_AUXTRACE) += arm-spe.o
+libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+
+ifdef CONFIG_LIBOPENCSD
+libperf-$(CONFIG_AUXTRACE) += cs-etm.o
+libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
+endif
+
+libperf-y += parse-branch-options.o
+libperf-y += dump-insn.o
+libperf-y += parse-regs-options.o
+libperf-y += term.o
+libperf-y += help-unknown-cmd.o
+libperf-y += mem-events.o
+libperf-y += vsprintf.o
+libperf-y += drv_configs.o
+libperf-y += units.o
+libperf-y += time-utils.o
+libperf-y += expr-bison.o
+libperf-y += branch.o
+libperf-y += mem2node.o
+
+libperf-$(CONFIG_LIBBPF) += bpf-loader.o
+libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
+libperf-$(CONFIG_LIBELF) += symbol-elf.o
+libperf-$(CONFIG_LIBELF) += probe-file.o
+libperf-$(CONFIG_LIBELF) += probe-event.o
+
+ifndef CONFIG_LIBELF
+libperf-y += symbol-minimal.o
+endif
+
+ifndef CONFIG_SETNS
+libperf-y += setns.o
+endif
+
+libperf-$(CONFIG_DWARF) += probe-finder.o
+libperf-$(CONFIG_DWARF) += dwarf-aux.o
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
+libperf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
+libperf-$(CONFIG_LIBUNWIND_X86)      += libunwind/x86_32.o
+libperf-$(CONFIG_LIBUNWIND_AARCH64)  += libunwind/arm64.o
+
+libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+
+libperf-y += scripting-engines/
+
+libperf-$(CONFIG_ZLIB) += zlib.o
+libperf-$(CONFIG_LZMA) += lzma.o
+libperf-y += demangle-java.o
+libperf-y += demangle-rust.o
+
+ifdef CONFIG_JITDUMP
+libperf-$(CONFIG_LIBELF) += jitdump.o
+libperf-$(CONFIG_LIBELF) += genelf.o
+libperf-$(CONFIG_DWARF) += genelf_debug.o
+endif
+
+libperf-y += perf-hooks.o
+
+libperf-$(CONFIG_CXX) += c++/
+
+CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o  += -Wno-packed
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+
+$(OUTPUT)util/expr-bison.c: util/expr.y
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
+
+CFLAGS_parse-events-flex.o  += -w
+CFLAGS_pmu-flex.o           += -w
+CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
+CFLAGS_pmu-bison.o          += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+CFLAGS_expr-bison.o         += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+CFLAGS_bitmap.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_find_bit.o      += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_parse-events.o  += -Wno-redundant-decls
+CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/util/PERF-VERSION-GEN b/util/PERF-VERSION-GEN
new file mode 100755
index 0000000..3802cee
--- /dev/null
+++ b/util/PERF-VERSION-GEN
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $# -eq 1 ]  ; then
+	OUTPUT=$1
+fi
+
+GVF=${OUTPUT}PERF-VERSION-FILE
+
+LF='
+'
+
+#
+# First check if there is a .git to get the version from git describe
+# otherwise try to get the version from the kernel Makefile
+#
+CID=
+TAG=
+if test -d ../../.git -o -f ../../.git
+then
+	TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
+	CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+elif test -f ../../PERF-VERSION-FILE
+then
+	TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
+fi
+if test -z "$TAG"
+then
+	TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+fi
+VN="$TAG$CID"
+if test -n "$CID"
+then
+	# format version string, strip trailing zero of sublevel:
+	VN=$(echo "$VN" | sed -e 's/-/./g;s/\([0-9]*[.][0-9]*\)[.]0/\1/')
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+	VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
+else
+	VC=unset
+fi
+test "$VN" = "$VC" || {
+	echo >&2 "  PERF_VERSION = $VN"
+	echo "#define PERF_VERSION \"$VN\"" >$GVF
+}
+
+
diff --git a/util/annotate.c b/util/annotate.c
new file mode 100644
index 0000000..536ee14
--- /dev/null
+++ b/util/annotate.c
@@ -0,0 +1,2707 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-annotate.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include "util.h"
+#include "ui/ui.h"
+#include "sort.h"
+#include "build-id.h"
+#include "color.h"
+#include "config.h"
+#include "cache.h"
+#include "symbol.h"
+#include "units.h"
+#include "debug.h"
+#include "annotate.h"
+#include "evsel.h"
+#include "block-range.h"
+#include "string2.h"
+#include "arch/common.h"
+#include <regex.h>
+#include <pthread.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+
+/* FIXME: For the HE_COLORSET */
+#include "ui/browser.h"
+
+/*
+ * FIXME: Using the same values as slang.h,
+ * but that header may not be available everywhere
+ */
+#define LARROW_CHAR	((unsigned char)',')
+#define RARROW_CHAR	((unsigned char)'+')
+#define DARROW_CHAR	((unsigned char)'.')
+#define UARROW_CHAR	((unsigned char)'-')
+
+#include "sane_ctype.h"
+
+struct annotation_options annotation__default_options = {
+	.use_offset     = true,
+	.jump_arrows    = true,
+	.offset_level	= ANNOTATION__OFFSET_JUMP_TARGETS,
+};
+
+const char 	*disassembler_style;
+const char	*objdump_path;
+static regex_t	 file_lineno;
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name);
+static void ins__sort(struct arch *arch);
+static int disasm_line__parse(char *line, const char **namep, char **rawp);
+
+struct arch {
+	const char	*name;
+	struct ins	*instructions;
+	size_t		nr_instructions;
+	size_t		nr_instructions_allocated;
+	struct ins_ops  *(*associate_instruction_ops)(struct arch *arch, const char *name);
+	bool		sorted_instructions;
+	bool		initialized;
+	void		*priv;
+	unsigned int	model;
+	unsigned int	family;
+	int		(*init)(struct arch *arch, char *cpuid);
+	bool		(*ins_is_fused)(struct arch *arch, const char *ins1,
+					const char *ins2);
+	struct		{
+		char comment_char;
+		char skip_functions_char;
+	} objdump;
+};
+
+static struct ins_ops call_ops;
+static struct ins_ops dec_ops;
+static struct ins_ops jump_ops;
+static struct ins_ops mov_ops;
+static struct ins_ops nop_ops;
+static struct ins_ops lock_ops;
+static struct ins_ops ret_ops;
+
+static int arch__grow_instructions(struct arch *arch)
+{
+	struct ins *new_instructions;
+	size_t new_nr_allocated;
+
+	if (arch->nr_instructions_allocated == 0 && arch->instructions)
+		goto grow_from_non_allocated_table;
+
+	new_nr_allocated = arch->nr_instructions_allocated + 128;
+	new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
+	if (new_instructions == NULL)
+		return -1;
+
+out_update_instructions:
+	arch->instructions = new_instructions;
+	arch->nr_instructions_allocated = new_nr_allocated;
+	return 0;
+
+grow_from_non_allocated_table:
+	new_nr_allocated = arch->nr_instructions + 128;
+	new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
+	if (new_instructions == NULL)
+		return -1;
+
+	memcpy(new_instructions, arch->instructions, arch->nr_instructions);
+	goto out_update_instructions;
+}
+
+static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
+{
+	struct ins *ins;
+
+	if (arch->nr_instructions == arch->nr_instructions_allocated &&
+	    arch__grow_instructions(arch))
+		return -1;
+
+	ins = &arch->instructions[arch->nr_instructions];
+	ins->name = strdup(name);
+	if (!ins->name)
+		return -1;
+
+	ins->ops  = ops;
+	arch->nr_instructions++;
+
+	ins__sort(arch);
+	return 0;
+}
+
+#include "arch/arm/annotate/instructions.c"
+#include "arch/arm64/annotate/instructions.c"
+#include "arch/x86/annotate/instructions.c"
+#include "arch/powerpc/annotate/instructions.c"
+#include "arch/s390/annotate/instructions.c"
+
+static struct arch architectures[] = {
+	{
+		.name = "arm",
+		.init = arm__annotate_init,
+	},
+	{
+		.name = "arm64",
+		.init = arm64__annotate_init,
+	},
+	{
+		.name = "x86",
+		.init = x86__annotate_init,
+		.instructions = x86__instructions,
+		.nr_instructions = ARRAY_SIZE(x86__instructions),
+		.ins_is_fused = x86__ins_is_fused,
+		.objdump =  {
+			.comment_char = '#',
+		},
+	},
+	{
+		.name = "powerpc",
+		.init = powerpc__annotate_init,
+	},
+	{
+		.name = "s390",
+		.init = s390__annotate_init,
+		.objdump =  {
+			.comment_char = '#',
+		},
+	},
+};
+
+static void ins__delete(struct ins_operands *ops)
+{
+	if (ops == NULL)
+		return;
+	zfree(&ops->source.raw);
+	zfree(&ops->source.name);
+	zfree(&ops->target.raw);
+	zfree(&ops->target.name);
+}
+
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+			      struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+		  struct ins_operands *ops)
+{
+	if (ins->ops->scnprintf)
+		return ins->ops->scnprintf(ins, bf, size, ops);
+
+	return ins__raw_scnprintf(ins, bf, size, ops);
+}
+
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+	if (!arch || !arch->ins_is_fused)
+		return false;
+
+	return arch->ins_is_fused(arch, ins1, ins2);
+}
+
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+	char *endptr, *tok, *name;
+	struct map *map = ms->map;
+	struct addr_map_symbol target = {
+		.map = map,
+	};
+
+	ops->target.addr = strtoull(ops->raw, &endptr, 16);
+
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		goto indirect_call;
+
+	name++;
+
+	if (arch->objdump.skip_functions_char &&
+	    strchr(name, arch->objdump.skip_functions_char))
+		return -1;
+
+	tok = strchr(name, '>');
+	if (tok == NULL)
+		return -1;
+
+	*tok = '\0';
+	ops->target.name = strdup(name);
+	*tok = '>';
+
+	if (ops->target.name == NULL)
+		return -1;
+find_target:
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+
+	if (map_groups__find_ams(&target) == 0 &&
+	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.sym;
+
+	return 0;
+
+indirect_call:
+	tok = strchr(endptr, '*');
+	if (tok != NULL)
+		ops->target.addr = strtoull(tok + 1, NULL, 16);
+	goto find_target;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	if (ops->target.sym)
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
+
+	if (ops->target.addr == 0)
+		return ins__raw_scnprintf(ins, bf, size, ops);
+
+	if (ops->target.name)
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);
+
+	return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr);
+}
+
+static struct ins_ops call_ops = {
+	.parse	   = call__parse,
+	.scnprintf = call__scnprintf,
+};
+
+bool ins__is_call(const struct ins *ins)
+{
+	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
+}
+
+static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms)
+{
+	struct map *map = ms->map;
+	struct symbol *sym = ms->sym;
+	struct addr_map_symbol target = {
+		.map = map,
+	};
+	const char *c = strchr(ops->raw, ',');
+	u64 start, end;
+	/*
+	 * Examples of lines to parse for the _cpp_lex_token@@Base
+	 * function:
+	 *
+	 * 1159e6c: jne    115aa32 <_cpp_lex_token@@Base+0xf92>
+	 * 1159e8b: jne    c469be <cpp_named_operator2name@@Base+0xa72>
+	 *
+	 * The first is a jump to an offset inside the same function,
+	 * the second is to another function, i.e. that 0xa72 is an
+	 * offset in the cpp_named_operator2name@@base function.
+	 */
+	/*
+	 * skip over possible up to 2 operands to get to address, e.g.:
+	 * tbnz	 w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+	 */
+	if (c++ != NULL) {
+		ops->target.addr = strtoull(c, NULL, 16);
+		if (!ops->target.addr) {
+			c = strchr(c, ',');
+			if (c++ != NULL)
+				ops->target.addr = strtoull(c, NULL, 16);
+		}
+	} else {
+		ops->target.addr = strtoull(ops->raw, NULL, 16);
+	}
+
+	target.addr = map__objdump_2mem(map, ops->target.addr);
+	start = map->unmap_ip(map, sym->start),
+	end = map->unmap_ip(map, sym->end);
+
+	ops->target.outside = target.addr < start || target.addr > end;
+
+	/*
+	 * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
+
+		cpp_named_operator2name@@Base+0xa72
+
+	 * Point to a place that is after the cpp_named_operator2name
+	 * boundaries, i.e.  in the ELF symbol table for cc1
+	 * cpp_named_operator2name is marked as being 32-bytes long, but it in
+	 * fact is much larger than that, so we seem to need a symbols__find()
+	 * routine that looks for >= current->start and  < next_symbol->start,
+	 * possibly just for C++ objects?
+	 *
+	 * For now lets just make some progress by marking jumps to outside the
+	 * current function as call like.
+	 *
+	 * Actual navigation will come next, with further understanding of how
+	 * the symbol searching and disassembly should be done.
+	 */
+	if (map_groups__find_ams(&target) == 0 &&
+	    map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+		ops->target.sym = target.sym;
+
+	if (!ops->target.outside) {
+		ops->target.offset = target.addr - start;
+		ops->target.offset_avail = true;
+	} else {
+		ops->target.offset_avail = false;
+	}
+
+	return 0;
+}
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	const char *c;
+
+	if (!ops->target.addr || ops->target.offset < 0)
+		return ins__raw_scnprintf(ins, bf, size, ops);
+
+	if (ops->target.outside && ops->target.sym != NULL)
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
+
+	c = strchr(ops->raw, ',');
+	if (c != NULL) {
+		const char *c2 = strchr(c + 1, ',');
+
+		/* check for 3-op insn */
+		if (c2 != NULL)
+			c = c2;
+		c++;
+
+		/* mirror arch objdump's space-after-comma style */
+		if (*c == ' ')
+			c++;
+	}
+
+	return scnprintf(bf, size, "%-6s %.*s%" PRIx64,
+			 ins->name, c ? c - ops->raw : 0, ops->raw,
+			 ops->target.offset);
+}
+
+static struct ins_ops jump_ops = {
+	.parse	   = jump__parse,
+	.scnprintf = jump__scnprintf,
+};
+
+bool ins__is_jump(const struct ins *ins)
+{
+	return ins->ops == &jump_ops;
+}
+
+static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
+{
+	char *endptr, *name, *t;
+
+	if (strstr(raw, "(%rip)") == NULL)
+		return 0;
+
+	*addrp = strtoull(comment, &endptr, 16);
+	if (endptr == comment)
+		return 0;
+	name = strchr(endptr, '<');
+	if (name == NULL)
+		return -1;
+
+	name++;
+
+	t = strchr(name, '>');
+	if (t == NULL)
+		return 0;
+
+	*t = '\0';
+	*namep = strdup(name);
+	*t = '>';
+
+	return 0;
+}
+
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+	ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
+	if (ops->locked.ops == NULL)
+		return 0;
+
+	if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
+		goto out_free_ops;
+
+	ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+
+	if (ops->locked.ins.ops == NULL)
+		goto out_free_ops;
+
+	if (ops->locked.ins.ops->parse &&
+	    ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+		goto out_free_ops;
+
+	return 0;
+
+out_free_ops:
+	zfree(&ops->locked.ops);
+	return 0;
+}
+
+static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	int printed;
+
+	if (ops->locked.ins.ops == NULL)
+		return ins__raw_scnprintf(ins, bf, size, ops);
+
+	printed = scnprintf(bf, size, "%-6s ", ins->name);
+	return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
+					size - printed, ops->locked.ops);
+}
+
+static void lock__delete(struct ins_operands *ops)
+{
+	struct ins *ins = &ops->locked.ins;
+
+	if (ins->ops && ins->ops->free)
+		ins->ops->free(ops->locked.ops);
+	else
+		ins__delete(ops->locked.ops);
+
+	zfree(&ops->locked.ops);
+	zfree(&ops->target.raw);
+	zfree(&ops->target.name);
+}
+
+static struct ins_ops lock_ops = {
+	.free	   = lock__delete,
+	.parse	   = lock__parse,
+	.scnprintf = lock__scnprintf,
+};
+
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+	char *s = strchr(ops->raw, ','), *target, *comment, prev;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+	comment = strchr(s, arch->objdump.comment_char);
+
+	if (comment != NULL)
+		s = comment - 1;
+	else
+		s = strchr(s, '\0') - 1;
+
+	while (s > target && isspace(s[0]))
+		--s;
+	s++;
+	prev = *s;
+	*s = '\0';
+
+	ops->target.raw = strdup(target);
+	*s = prev;
+
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	if (comment == NULL)
+		return 0;
+
+	comment = ltrim(comment);
+	comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+	comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+	return 0;
+
+out_free_source:
+	zfree(&ops->source.raw);
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6s %s,%s", ins->name,
+			 ops->source.name ?: ops->source.raw,
+			 ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops mov_ops = {
+	.parse	   = mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+	char *target, *comment, *s, prev;
+
+	target = s = ops->raw;
+
+	while (s[0] != '\0' && !isspace(s[0]))
+		++s;
+	prev = *s;
+	*s = '\0';
+
+	ops->target.raw = strdup(target);
+	*s = prev;
+
+	if (ops->target.raw == NULL)
+		return -1;
+
+	comment = strchr(s, arch->objdump.comment_char);
+	if (comment == NULL)
+		return 0;
+
+	comment = ltrim(comment);
+	comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+	return 0;
+}
+
+static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
+			   struct ins_operands *ops)
+{
+	return scnprintf(bf, size, "%-6s %s", ins->name,
+			 ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops dec_ops = {
+	.parse	   = dec__parse,
+	.scnprintf = dec__scnprintf,
+};
+
+static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
+			  struct ins_operands *ops __maybe_unused)
+{
+	return scnprintf(bf, size, "%-6s", "nop");
+}
+
+static struct ins_ops nop_ops = {
+	.scnprintf = nop__scnprintf,
+};
+
+static struct ins_ops ret_ops = {
+	.scnprintf = ins__raw_scnprintf,
+};
+
+bool ins__is_ret(const struct ins *ins)
+{
+	return ins->ops == &ret_ops;
+}
+
+bool ins__is_lock(const struct ins *ins)
+{
+	return ins->ops == &lock_ops;
+}
+
+static int ins__key_cmp(const void *name, const void *insp)
+{
+	const struct ins *ins = insp;
+
+	return strcmp(name, ins->name);
+}
+
+static int ins__cmp(const void *a, const void *b)
+{
+	const struct ins *ia = a;
+	const struct ins *ib = b;
+
+	return strcmp(ia->name, ib->name);
+}
+
+static void ins__sort(struct arch *arch)
+{
+	const int nmemb = arch->nr_instructions;
+
+	qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
+}
+
+static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+{
+	struct ins *ins;
+	const int nmemb = arch->nr_instructions;
+
+	if (!arch->sorted_instructions) {
+		ins__sort(arch);
+		arch->sorted_instructions = true;
+	}
+
+	ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+	return ins ? ins->ops : NULL;
+}
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = __ins__find(arch, name);
+
+	if (!ops && arch->associate_instruction_ops)
+		ops = arch->associate_instruction_ops(arch, name);
+
+	return ops;
+}
+
+static int arch__key_cmp(const void *name, const void *archp)
+{
+	const struct arch *arch = archp;
+
+	return strcmp(name, arch->name);
+}
+
+static int arch__cmp(const void *a, const void *b)
+{
+	const struct arch *aa = a;
+	const struct arch *ab = b;
+
+	return strcmp(aa->name, ab->name);
+}
+
+static void arch__sort(void)
+{
+	const int nmemb = ARRAY_SIZE(architectures);
+
+	qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
+}
+
+static struct arch *arch__find(const char *name)
+{
+	const int nmemb = ARRAY_SIZE(architectures);
+	static bool sorted;
+
+	if (!sorted) {
+		arch__sort();
+		sorted = true;
+	}
+
+	return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
+}
+
+int symbol__alloc_hist(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	size_t size = symbol__size(sym);
+	size_t sizeof_sym_hist;
+
+	/*
+	 * Add buffer of one element for zero length symbol.
+	 * When sample is taken from first instruction of
+	 * zero length symbol, perf still resolves it and
+	 * shows symbol name in perf report and allows to
+	 * annotate it.
+	 */
+	if (size == 0)
+		size = 1;
+
+	/* Check for overflow when calculating sizeof_sym_hist */
+	if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
+		return -1;
+
+	sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
+
+	/* Check for overflow in zalloc argument */
+	if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src))
+				/ symbol_conf.nr_events)
+		return -1;
+
+	notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist);
+	if (notes->src == NULL)
+		return -1;
+	notes->src->sizeof_sym_hist = sizeof_sym_hist;
+	notes->src->nr_histograms   = symbol_conf.nr_events;
+	INIT_LIST_HEAD(&notes->src->source);
+	return 0;
+}
+
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	const size_t size = symbol__size(sym);
+
+	notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+	if (notes->src->cycles_hist == NULL)
+		return -1;
+	return 0;
+}
+
+void symbol__annotate_zero_histograms(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	pthread_mutex_lock(&notes->lock);
+	if (notes->src != NULL) {
+		memset(notes->src->histograms, 0,
+		       notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+		if (notes->src->cycles_hist)
+			memset(notes->src->cycles_hist, 0,
+				symbol__size(sym) * sizeof(struct cyc_hist));
+	}
+	pthread_mutex_unlock(&notes->lock);
+}
+
+static int __symbol__account_cycles(struct annotation *notes,
+				    u64 start,
+				    unsigned offset, unsigned cycles,
+				    unsigned have_start)
+{
+	struct cyc_hist *ch;
+
+	ch = notes->src->cycles_hist;
+	/*
+	 * For now we can only account one basic block per
+	 * final jump. But multiple could be overlapping.
+	 * Always account the longest one. So when
+	 * a shorter one has been already seen throw it away.
+	 *
+	 * We separately always account the full cycles.
+	 */
+	ch[offset].num_aggr++;
+	ch[offset].cycles_aggr += cycles;
+
+	if (!have_start && ch[offset].have_start)
+		return 0;
+	if (ch[offset].num) {
+		if (have_start && (!ch[offset].have_start ||
+				   ch[offset].start > start)) {
+			ch[offset].have_start = 0;
+			ch[offset].cycles = 0;
+			ch[offset].num = 0;
+			if (ch[offset].reset < 0xffff)
+				ch[offset].reset++;
+		} else if (have_start &&
+			   ch[offset].start < start)
+			return 0;
+	}
+	ch[offset].have_start = have_start;
+	ch[offset].start = start;
+	ch[offset].cycles += cycles;
+	ch[offset].num++;
+	return 0;
+}
+
+static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+				      struct annotation *notes, int evidx, u64 addr,
+				      struct perf_sample *sample)
+{
+	unsigned offset;
+	struct sym_hist *h;
+
+	pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
+
+	if ((addr < sym->start || addr >= sym->end) &&
+	    (addr != sym->end || sym->start != sym->end)) {
+		pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
+		       __func__, __LINE__, sym->name, sym->start, addr, sym->end);
+		return -ERANGE;
+	}
+
+	offset = addr - sym->start;
+	h = annotation__histogram(notes, evidx);
+	h->nr_samples++;
+	h->addr[offset].nr_samples++;
+	h->period += sample->period;
+	h->addr[offset].period += sample->period;
+
+	pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+		  ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
+		  sym->start, sym->name, addr, addr - sym->start, evidx,
+		  h->addr[offset].nr_samples, h->addr[offset].period);
+	return 0;
+}
+
+static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	if (notes->src == NULL) {
+		if (symbol__alloc_hist(sym) < 0)
+			return NULL;
+	}
+	if (!notes->src->cycles_hist && cycles) {
+		if (symbol__alloc_hist_cycles(sym) < 0)
+			return NULL;
+	}
+	return notes;
+}
+
+static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+				    int evidx, u64 addr,
+				    struct perf_sample *sample)
+{
+	struct annotation *notes;
+
+	if (sym == NULL)
+		return 0;
+	notes = symbol__get_annotation(sym, false);
+	if (notes == NULL)
+		return -ENOMEM;
+	return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample);
+}
+
+static int symbol__account_cycles(u64 addr, u64 start,
+				  struct symbol *sym, unsigned cycles)
+{
+	struct annotation *notes;
+	unsigned offset;
+
+	if (sym == NULL)
+		return 0;
+	notes = symbol__get_annotation(sym, true);
+	if (notes == NULL)
+		return -ENOMEM;
+	if (addr < sym->start || addr >= sym->end)
+		return -ERANGE;
+
+	if (start) {
+		if (start < sym->start || start >= sym->end)
+			return -ERANGE;
+		if (start >= addr)
+			start = 0;
+	}
+	offset = addr - sym->start;
+	return __symbol__account_cycles(notes,
+					start ? start - sym->start : 0,
+					offset, cycles,
+					!!start);
+}
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+				    struct addr_map_symbol *start,
+				    unsigned cycles)
+{
+	u64 saddr = 0;
+	int err;
+
+	if (!cycles)
+		return 0;
+
+	/*
+	 * Only set start when IPC can be computed. We can only
+	 * compute it when the basic block is completely in a single
+	 * function.
+	 * Special case the case when the jump is elsewhere, but
+	 * it starts on the function start.
+	 */
+	if (start &&
+		(start->sym == ams->sym ||
+		 (ams->sym &&
+		   start->addr == ams->sym->start + ams->map->start)))
+		saddr = start->al_addr;
+	if (saddr == 0)
+		pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
+			ams->addr,
+			start ? start->addr : 0,
+			ams->sym ? ams->sym->start + ams->map->start : 0,
+			saddr);
+	err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
+	if (err)
+		pr_debug2("account_cycles failed %d\n", err);
+	return err;
+}
+
+static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end)
+{
+	unsigned n_insn = 0;
+	u64 offset;
+
+	for (offset = start; offset <= end; offset++) {
+		if (notes->offsets[offset])
+			n_insn++;
+	}
+	return n_insn;
+}
+
+static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
+{
+	unsigned n_insn;
+	u64 offset;
+
+	n_insn = annotation__count_insn(notes, start, end);
+	if (n_insn && ch->num && ch->cycles) {
+		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+		/* Hide data when there are too many overlaps. */
+		if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
+			return;
+
+		for (offset = start; offset <= end; offset++) {
+			struct annotation_line *al = notes->offsets[offset];
+
+			if (al)
+				al->ipc = ipc;
+		}
+	}
+}
+
+void annotation__compute_ipc(struct annotation *notes, size_t size)
+{
+	u64 offset;
+
+	if (!notes->src || !notes->src->cycles_hist)
+		return;
+
+	pthread_mutex_lock(&notes->lock);
+	for (offset = 0; offset < size; ++offset) {
+		struct cyc_hist *ch;
+
+		ch = &notes->src->cycles_hist[offset];
+		if (ch && ch->cycles) {
+			struct annotation_line *al;
+
+			if (ch->have_start)
+				annotation__count_and_fill(notes, ch->start, offset, ch);
+			al = notes->offsets[offset];
+			if (al && ch->num_aggr)
+				al->cycles = ch->cycles_aggr / ch->num_aggr;
+			notes->have_cycles = true;
+		}
+	}
+	pthread_mutex_unlock(&notes->lock);
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+				 int evidx)
+{
+	return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample);
+}
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+				 int evidx, u64 ip)
+{
+	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample);
+}
+
+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
+{
+	dl->ins.ops = ins__find(arch, dl->ins.name);
+
+	if (!dl->ins.ops)
+		return;
+
+	if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+		dl->ins.ops = NULL;
+}
+
+static int disasm_line__parse(char *line, const char **namep, char **rawp)
+{
+	char tmp, *name = ltrim(line);
+
+	if (name[0] == '\0')
+		return -1;
+
+	*rawp = name + 1;
+
+	while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
+		++*rawp;
+
+	tmp = (*rawp)[0];
+	(*rawp)[0] = '\0';
+	*namep = strdup(name);
+
+	if (*namep == NULL)
+		goto out_free_name;
+
+	(*rawp)[0] = tmp;
+	*rawp = ltrim(*rawp);
+
+	return 0;
+
+out_free_name:
+	free((void *)namep);
+	*namep = NULL;
+	return -1;
+}
+
+struct annotate_args {
+	size_t			 privsize;
+	struct arch		*arch;
+	struct map_symbol	 ms;
+	struct perf_evsel	*evsel;
+	s64			 offset;
+	char			*line;
+	int			 line_nr;
+};
+
+static void annotation_line__delete(struct annotation_line *al)
+{
+	void *ptr = (void *) al - al->privsize;
+
+	free_srcline(al->path);
+	zfree(&al->line);
+	free(ptr);
+}
+
+/*
+ * Allocating the annotation line data with following
+ * structure:
+ *
+ *    --------------------------------------
+ *    private space | struct annotation_line
+ *    --------------------------------------
+ *
+ * Size of the private space is stored in 'struct annotation_line'.
+ *
+ */
+static struct annotation_line *
+annotation_line__new(struct annotate_args *args, size_t privsize)
+{
+	struct annotation_line *al;
+	struct perf_evsel *evsel = args->evsel;
+	size_t size = privsize + sizeof(*al);
+	int nr = 1;
+
+	if (perf_evsel__is_group_event(evsel))
+		nr = evsel->nr_members;
+
+	size += sizeof(al->samples[0]) * nr;
+
+	al = zalloc(size);
+	if (al) {
+		al = (void *) al + privsize;
+		al->privsize   = privsize;
+		al->offset     = args->offset;
+		al->line       = strdup(args->line);
+		al->line_nr    = args->line_nr;
+		al->samples_nr = nr;
+	}
+
+	return al;
+}
+
+/*
+ * Allocating the disasm annotation line data with
+ * following structure:
+ *
+ *    ------------------------------------------------------------
+ *    privsize space | struct disasm_line | struct annotation_line
+ *    ------------------------------------------------------------
+ *
+ * We have 'struct annotation_line' member as last member
+ * of 'struct disasm_line' to have an easy access.
+ *
+ */
+static struct disasm_line *disasm_line__new(struct annotate_args *args)
+{
+	struct disasm_line *dl = NULL;
+	struct annotation_line *al;
+	size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+
+	al = annotation_line__new(args, privsize);
+	if (al != NULL) {
+		dl = disasm_line(al);
+
+		if (dl->al.line == NULL)
+			goto out_delete;
+
+		if (args->offset != -1) {
+			if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+				goto out_free_line;
+
+			disasm_line__init_ins(dl, args->arch, &args->ms);
+		}
+	}
+
+	return dl;
+
+out_free_line:
+	zfree(&dl->al.line);
+out_delete:
+	free(dl);
+	return NULL;
+}
+
+void disasm_line__free(struct disasm_line *dl)
+{
+	if (dl->ins.ops && dl->ins.ops->free)
+		dl->ins.ops->free(&dl->ops);
+	else
+		ins__delete(&dl->ops);
+	free((void *)dl->ins.name);
+	dl->ins.name = NULL;
+	annotation_line__delete(&dl->al);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
+{
+	if (raw || !dl->ins.ops)
+		return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw);
+
+	return ins__scnprintf(&dl->ins, bf, size, &dl->ops);
+}
+
+static void annotation_line__add(struct annotation_line *al, struct list_head *head)
+{
+	list_add_tail(&al->node, head);
+}
+
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head)
+{
+	list_for_each_entry_continue(pos, head, node)
+		if (pos->offset >= 0)
+			return pos;
+
+	return NULL;
+}
+
+static const char *annotate__address_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark red for >75% coverage */
+		if (cov > 0.75)
+			return PERF_COLOR_RED;
+
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+	bool emit_comment = true;
+
+	if (!br)
+		return;
+
+#if 1
+	if (br->is_target && br->start == addr) {
+		struct block_range *branch = br;
+		double p;
+
+		/*
+		 * Find matching branch to our target.
+		 */
+		while (!branch->is_branch)
+			branch = block_range__next(branch);
+
+		p = 100 *(double)br->entry / branch->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage joined at this target in relation
+			 * to the next branch.
+			 */
+			printf(" +%.2f%%", p);
+		}
+	}
+#endif
+	if (br->is_branch && br->end == addr) {
+		double p = 100*(double)br->taken / br->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage leaving at this branch, and
+			 * its prediction ratio.
+			 */
+			printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred  / br->taken);
+		}
+	}
+}
+
+static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width)
+{
+	s64 offset = dl->al.offset;
+	const u64 addr = start + offset;
+	struct block_range *br;
+
+	br = block_range__find(addr);
+	color_fprintf(stdout, annotate__address_color(br), "  %*" PRIx64 ":", addr_fmt_width, addr);
+	color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line);
+	annotate__branch_printf(br, addr);
+	return 0;
+}
+
+static int
+annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
+		       struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
+		       int max_lines, struct annotation_line *queue, int addr_fmt_width)
+{
+	struct disasm_line *dl = container_of(al, struct disasm_line, al);
+	static const char *prev_line;
+	static const char *prev_color;
+
+	if (al->offset != -1) {
+		double max_percent = 0.0;
+		int i, nr_percent = 1;
+		const char *color;
+		struct annotation *notes = symbol__annotation(sym);
+
+		for (i = 0; i < al->samples_nr; i++) {
+			struct annotation_data *sample = &al->samples[i];
+
+			if (sample->percent > max_percent)
+				max_percent = sample->percent;
+		}
+
+		if (max_percent < min_pcnt)
+			return -1;
+
+		if (max_lines && printed >= max_lines)
+			return 1;
+
+		if (queue != NULL) {
+			list_for_each_entry_from(queue, &notes->src->source, node) {
+				if (queue == al)
+					break;
+				annotation_line__print(queue, sym, start, evsel, len,
+						       0, 0, 1, NULL, addr_fmt_width);
+			}
+		}
+
+		color = get_percent_color(max_percent);
+
+		/*
+		 * Also color the filename and line if needed, with
+		 * the same color than the percentage. Don't print it
+		 * twice for close colored addr with the same filename:line
+		 */
+		if (al->path) {
+			if (!prev_line || strcmp(prev_line, al->path)
+				       || color != prev_color) {
+				color_fprintf(stdout, color, " %s", al->path);
+				prev_line = al->path;
+				prev_color = color;
+			}
+		}
+
+		for (i = 0; i < nr_percent; i++) {
+			struct annotation_data *sample = &al->samples[i];
+
+			color = get_percent_color(sample->percent);
+
+			if (symbol_conf.show_total_period)
+				color_fprintf(stdout, color, " %11" PRIu64,
+					      sample->he.period);
+			else if (symbol_conf.show_nr_samples)
+				color_fprintf(stdout, color, " %7" PRIu64,
+					      sample->he.nr_samples);
+			else
+				color_fprintf(stdout, color, " %7.2f", sample->percent);
+		}
+
+		printf(" : ");
+
+		disasm_line__print(dl, start, addr_fmt_width);
+		printf("\n");
+	} else if (max_lines && printed >= max_lines)
+		return 1;
+	else {
+		int width = symbol_conf.show_total_period ? 12 : 8;
+
+		if (queue)
+			return -1;
+
+		if (perf_evsel__is_group_event(evsel))
+			width *= evsel->nr_members;
+
+		if (!*al->line)
+			printf(" %*s:\n", width, " ");
+		else
+			printf(" %*s:     %*s %s\n", width, " ", addr_fmt_width, " ", al->line);
+	}
+
+	return 0;
+}
+
+/*
+ * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
+ * which looks like following
+ *
+ *  0000000000415500 <_init>:
+ *    415500:       sub    $0x8,%rsp
+ *    415504:       mov    0x2f5ad5(%rip),%rax        # 70afe0 <_DYNAMIC+0x2f8>
+ *    41550b:       test   %rax,%rax
+ *    41550e:       je     415515 <_init+0x15>
+ *    415510:       callq  416e70 <__gmon_start__@plt>
+ *    415515:       add    $0x8,%rsp
+ *    415519:       retq
+ *
+ * it will be parsed and saved into struct disasm_line as
+ *  <offset>       <name>  <ops.raw>
+ *
+ * The offset will be a relative offset from the start of the symbol and -1
+ * means that it's not a disassembly line so should be treated differently.
+ * The ops.raw part will be parsed further according to type of the instruction.
+ */
+static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
+				      struct annotate_args *args,
+				      int *line_nr)
+{
+	struct map *map = args->ms.map;
+	struct annotation *notes = symbol__annotation(sym);
+	struct disasm_line *dl;
+	char *line = NULL, *parsed_line, *tmp, *tmp2;
+	size_t line_len;
+	s64 line_ip, offset = -1;
+	regmatch_t match[2];
+
+	if (getline(&line, &line_len, file) < 0)
+		return -1;
+
+	if (!line)
+		return -1;
+
+	line_ip = -1;
+	parsed_line = rtrim(line);
+
+	/* /filename:linenr ? Save line number and ignore. */
+	if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
+		*line_nr = atoi(parsed_line + match[1].rm_so);
+		return 0;
+	}
+
+	tmp = ltrim(parsed_line);
+	if (*tmp) {
+		/*
+		 * Parse hexa addresses followed by ':'
+		 */
+		line_ip = strtoull(tmp, &tmp2, 16);
+		if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
+			line_ip = -1;
+	}
+
+	if (line_ip != -1) {
+		u64 start = map__rip_2objdump(map, sym->start),
+		    end = map__rip_2objdump(map, sym->end);
+
+		offset = line_ip - start;
+		if ((u64)line_ip < start || (u64)line_ip >= end)
+			offset = -1;
+		else
+			parsed_line = tmp2 + 1;
+	}
+
+	args->offset  = offset;
+	args->line    = parsed_line;
+	args->line_nr = *line_nr;
+	args->ms.sym  = sym;
+
+	dl = disasm_line__new(args);
+	free(line);
+	(*line_nr)++;
+
+	if (dl == NULL)
+		return -1;
+
+	if (!disasm_line__has_local_offset(dl)) {
+		dl->ops.target.offset = dl->ops.target.addr -
+					map__rip_2objdump(map, sym->start);
+		dl->ops.target.offset_avail = true;
+	}
+
+	/* kcore has no symbols, so add the call target symbol */
+	if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
+		struct addr_map_symbol target = {
+			.map = map,
+			.addr = dl->ops.target.addr,
+		};
+
+		if (!map_groups__find_ams(&target) &&
+		    target.sym->start == target.al_addr)
+			dl->ops.target.sym = target.sym;
+	}
+
+	annotation_line__add(&dl->al, &notes->src->source);
+
+	return 0;
+}
+
+static __attribute__((constructor)) void symbol__init_regexpr(void)
+{
+	regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
+}
+
+static void delete_last_nop(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct list_head *list = &notes->src->source;
+	struct disasm_line *dl;
+
+	while (!list_empty(list)) {
+		dl = list_entry(list->prev, struct disasm_line, al.node);
+
+		if (dl->ins.ops) {
+			if (dl->ins.ops != &nop_ops)
+				return;
+		} else {
+			if (!strstr(dl->al.line, " nop ") &&
+			    !strstr(dl->al.line, " nopl ") &&
+			    !strstr(dl->al.line, " nopw "))
+				return;
+		}
+
+		list_del(&dl->al.node);
+		disasm_line__free(dl);
+	}
+}
+
+int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *map,
+			      int errnum, char *buf, size_t buflen)
+{
+	struct dso *dso = map->dso;
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		str_error_r(errnum, buf, buflen);
+		return 0;
+	}
+
+	switch (errnum) {
+	case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
+		char bf[SBUILD_ID_SIZE + 15] = " with build id ";
+		char *build_id_msg = NULL;
+
+		if (dso->has_build_id) {
+			build_id__sprintf(dso->build_id,
+					  sizeof(dso->build_id), bf + 15);
+			build_id_msg = bf;
+		}
+		scnprintf(buf, buflen,
+			  "No vmlinux file%s\nwas found in the path.\n\n"
+			  "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
+			  "Please use:\n\n"
+			  "  perf buildid-cache -vu vmlinux\n\n"
+			  "or:\n\n"
+			  "  --vmlinux vmlinux\n", build_id_msg ?: "");
+	}
+		break;
+	default:
+		scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
+		break;
+	}
+
+	return 0;
+}
+
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+	char linkname[PATH_MAX];
+	char *build_id_filename;
+	char *build_id_path = NULL;
+	char *pos;
+
+	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(dso))
+		return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+	build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
+	if (build_id_filename) {
+		__symbol__join_symfs(filename, filename_size, build_id_filename);
+		free(build_id_filename);
+	} else {
+		if (dso->has_build_id)
+			return ENOMEM;
+		goto fallback;
+	}
+
+	build_id_path = strdup(filename);
+	if (!build_id_path)
+		return -1;
+
+	/*
+	 * old style build-id cache has name of XX/XXXXXXX.. while
+	 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+	 * extract the build-id part of dirname in the new style only.
+	 */
+	pos = strrchr(build_id_path, '/');
+	if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+		dirname(build_id_path);
+
+	if (dso__is_kcore(dso) ||
+	    readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
+	    strstr(linkname, DSO__NAME_KALLSYMS) ||
+	    access(filename, R_OK)) {
+fallback:
+		/*
+		 * If we don't have build-ids or the build-id file isn't in the
+		 * cache, or is just a kallsyms file, well, lets hope that this
+		 * DSO is the same as when 'perf record' ran.
+		 */
+		__symbol__join_symfs(filename, filename_size, dso->long_name);
+	}
+
+	free(build_id_path);
+	return 0;
+}
+
+static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+	struct map *map = args->ms.map;
+	struct dso *dso = map->dso;
+	char *command;
+	FILE *file;
+	char symfs_filename[PATH_MAX];
+	struct kcore_extract kce;
+	bool delete_extract = false;
+	int stdout_fd[2];
+	int lineno = 0;
+	int nline;
+	pid_t pid;
+	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+	if (err)
+		return err;
+
+	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
+		 map->unmap_ip(map, sym->end));
+
+	pr_debug("annotating [%p] %30s : [%p] %30s\n",
+		 dso, dso->long_name, sym, sym->name);
+
+	if (dso__is_kcore(dso)) {
+		kce.kcore_filename = symfs_filename;
+		kce.addr = map__rip_2objdump(map, sym->start);
+		kce.offs = sym->start;
+		kce.len = sym->end - sym->start;
+		if (!kcore_extract__create(&kce)) {
+			delete_extract = true;
+			strlcpy(symfs_filename, kce.extract_filename,
+				sizeof(symfs_filename));
+		}
+	} else if (dso__needs_decompress(dso)) {
+		char tmp[KMOD_DECOMP_LEN];
+
+		if (dso__decompress_kmodule_path(dso, symfs_filename,
+						 tmp, sizeof(tmp)) < 0)
+			goto out;
+
+		strcpy(symfs_filename, tmp);
+	}
+
+	err = asprintf(&command,
+		 "%s %s%s --start-address=0x%016" PRIx64
+		 " --stop-address=0x%016" PRIx64
+		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
+		 objdump_path ? objdump_path : "objdump",
+		 disassembler_style ? "-M " : "",
+		 disassembler_style ? disassembler_style : "",
+		 map__rip_2objdump(map, sym->start),
+		 map__rip_2objdump(map, sym->end),
+		 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
+		 symbol_conf.annotate_src ? "-S" : "",
+		 symfs_filename, symfs_filename);
+
+	if (err < 0) {
+		pr_err("Failure allocating memory for the command to run\n");
+		goto out_remove_tmp;
+	}
+
+	pr_debug("Executing: %s\n", command);
+
+	err = -1;
+	if (pipe(stdout_fd) < 0) {
+		pr_err("Failure creating the pipe to run %s\n", command);
+		goto out_free_command;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		pr_err("Failure forking to run %s\n", command);
+		goto out_close_stdout;
+	}
+
+	if (pid == 0) {
+		close(stdout_fd[0]);
+		dup2(stdout_fd[1], 1);
+		close(stdout_fd[1]);
+		execl("/bin/sh", "sh", "-c", command, NULL);
+		perror(command);
+		exit(-1);
+	}
+
+	close(stdout_fd[1]);
+
+	file = fdopen(stdout_fd[0], "r");
+	if (!file) {
+		pr_err("Failure creating FILE stream for %s\n", command);
+		/*
+		 * If we were using debug info should retry with
+		 * original binary.
+		 */
+		goto out_free_command;
+	}
+
+	nline = 0;
+	while (!feof(file)) {
+		/*
+		 * The source code line number (lineno) needs to be kept in
+		 * accross calls to symbol__parse_objdump_line(), so that it
+		 * can associate it with the instructions till the next one.
+		 * See disasm_line__new() and struct disasm_line::line_nr.
+		 */
+		if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0)
+			break;
+		nline++;
+	}
+
+	if (nline == 0)
+		pr_err("No output from %s\n", command);
+
+	/*
+	 * kallsyms does not have symbol sizes so there may a nop at the end.
+	 * Remove it.
+	 */
+	if (dso__is_kcore(dso))
+		delete_last_nop(sym);
+
+	fclose(file);
+	err = 0;
+out_free_command:
+	free(command);
+out_remove_tmp:
+	close(stdout_fd[0]);
+
+	if (dso__needs_decompress(dso))
+		unlink(symfs_filename);
+
+	if (delete_extract)
+		kcore_extract__delete(&kce);
+out:
+	return err;
+
+out_close_stdout:
+	close(stdout_fd[1]);
+	goto out_free_command;
+}
+
+static void calc_percent(struct sym_hist *hist,
+			 struct annotation_data *sample,
+			 s64 offset, s64 end)
+{
+	unsigned int hits = 0;
+	u64 period = 0;
+
+	while (offset < end) {
+		hits   += hist->addr[offset].nr_samples;
+		period += hist->addr[offset].period;
+		++offset;
+	}
+
+	if (hist->nr_samples) {
+		sample->he.period     = period;
+		sample->he.nr_samples = hits;
+		sample->percent = 100.0 * hits / hist->nr_samples;
+	}
+}
+
+static void annotation__calc_percent(struct annotation *notes,
+				     struct perf_evsel *evsel, s64 len)
+{
+	struct annotation_line *al, *next;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		s64 end;
+		int i;
+
+		if (al->offset == -1)
+			continue;
+
+		next = annotation_line__next(al, &notes->src->source);
+		end  = next ? next->offset : len;
+
+		for (i = 0; i < al->samples_nr; i++) {
+			struct annotation_data *sample;
+			struct sym_hist *hist;
+
+			hist   = annotation__histogram(notes, evsel->idx + i);
+			sample = &al->samples[i];
+
+			calc_percent(hist, sample, al->offset, end);
+		}
+	}
+}
+
+void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	annotation__calc_percent(notes, evsel, symbol__size(sym));
+}
+
+int symbol__annotate(struct symbol *sym, struct map *map,
+		     struct perf_evsel *evsel, size_t privsize,
+		     struct arch **parch)
+{
+	struct annotate_args args = {
+		.privsize	= privsize,
+		.evsel		= evsel,
+	};
+	struct perf_env *env = perf_evsel__env(evsel);
+	const char *arch_name = perf_env__arch(env);
+	struct arch *arch;
+	int err;
+
+	if (!arch_name)
+		return -1;
+
+	args.arch = arch = arch__find(arch_name);
+	if (arch == NULL)
+		return -ENOTSUP;
+
+	if (parch)
+		*parch = arch;
+
+	if (arch->init) {
+		err = arch->init(arch, env ? env->cpuid : NULL);
+		if (err) {
+			pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
+			return err;
+		}
+	}
+
+	args.ms.map = map;
+	args.ms.sym = sym;
+
+	return symbol__disassemble(sym, &args);
+}
+
+static void insert_source_line(struct rb_root *root, struct annotation_line *al)
+{
+	struct annotation_line *iter;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	int i, ret;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct annotation_line, rb_node);
+
+		ret = strcmp(iter->path, al->path);
+		if (ret == 0) {
+			for (i = 0; i < al->samples_nr; i++)
+				iter->samples[i].percent_sum += al->samples[i].percent;
+			return;
+		}
+
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	for (i = 0; i < al->samples_nr; i++)
+		al->samples[i].percent_sum = al->samples[i].percent;
+
+	rb_link_node(&al->rb_node, parent, p);
+	rb_insert_color(&al->rb_node, root);
+}
+
+static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)
+{
+	int i;
+
+	for (i = 0; i < a->samples_nr; i++) {
+		if (a->samples[i].percent_sum == b->samples[i].percent_sum)
+			continue;
+		return a->samples[i].percent_sum > b->samples[i].percent_sum;
+	}
+
+	return 0;
+}
+
+static void __resort_source_line(struct rb_root *root, struct annotation_line *al)
+{
+	struct annotation_line *iter;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct annotation_line, rb_node);
+
+		if (cmp_source_line(al, iter))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&al->rb_node, parent, p);
+	rb_insert_color(&al->rb_node, root);
+}
+
+static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)
+{
+	struct annotation_line *al;
+	struct rb_node *node;
+
+	node = rb_first(src_root);
+	while (node) {
+		struct rb_node *next;
+
+		al = rb_entry(node, struct annotation_line, rb_node);
+		next = rb_next(node);
+		rb_erase(node, src_root);
+
+		__resort_source_line(dest_root, al);
+		node = next;
+	}
+}
+
+static void print_summary(struct rb_root *root, const char *filename)
+{
+	struct annotation_line *al;
+	struct rb_node *node;
+
+	printf("\nSorted summary for file %s\n", filename);
+	printf("----------------------------------------------\n\n");
+
+	if (RB_EMPTY_ROOT(root)) {
+		printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+		return;
+	}
+
+	node = rb_first(root);
+	while (node) {
+		double percent, percent_max = 0.0;
+		const char *color;
+		char *path;
+		int i;
+
+		al = rb_entry(node, struct annotation_line, rb_node);
+		for (i = 0; i < al->samples_nr; i++) {
+			percent = al->samples[i].percent_sum;
+			color = get_percent_color(percent);
+			color_fprintf(stdout, color, " %7.2f", percent);
+
+			if (percent > percent_max)
+				percent_max = percent;
+		}
+
+		path = al->path;
+		color = get_percent_color(percent_max);
+		color_fprintf(stdout, color, " %s\n", path);
+
+		node = rb_next(node);
+	}
+}
+
+static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evsel->idx);
+	u64 len = symbol__size(sym), offset;
+
+	for (offset = 0; offset < len; ++offset)
+		if (h->addr[offset].nr_samples != 0)
+			printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
+			       sym->start + offset, h->addr[offset].nr_samples);
+	printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
+}
+
+static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
+{
+	char bf[32];
+	struct annotation_line *line;
+
+	list_for_each_entry_reverse(line, lines, node) {
+		if (line->offset != -1)
+			return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset);
+	}
+
+	return 0;
+}
+
+int symbol__annotate_printf(struct symbol *sym, struct map *map,
+			    struct perf_evsel *evsel, bool full_paths,
+			    int min_pcnt, int max_lines, int context)
+{
+	struct dso *dso = map->dso;
+	char *filename;
+	const char *d_filename;
+	const char *evsel_name = perf_evsel__name(evsel);
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evsel->idx);
+	struct annotation_line *pos, *queue = NULL;
+	u64 start = map__rip_2objdump(map, sym->start);
+	int printed = 2, queue_len = 0, addr_fmt_width;
+	int more = 0;
+	u64 len;
+	int width = symbol_conf.show_total_period ? 12 : 8;
+	int graph_dotted_len;
+
+	filename = strdup(dso->long_name);
+	if (!filename)
+		return -ENOMEM;
+
+	if (full_paths)
+		d_filename = filename;
+	else
+		d_filename = basename(filename);
+
+	len = symbol__size(sym);
+
+	if (perf_evsel__is_group_event(evsel))
+		width *= evsel->nr_members;
+
+	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
+				  width, width, symbol_conf.show_total_period ? "Period" :
+				  symbol_conf.show_nr_samples ? "Samples" : "Percent",
+				  d_filename, evsel_name, h->nr_samples);
+
+	printf("%-*.*s----\n",
+	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
+
+	if (verbose > 0)
+		symbol__annotate_hits(sym, evsel);
+
+	addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+
+	list_for_each_entry(pos, &notes->src->source, node) {
+		int err;
+
+		if (context && queue == NULL) {
+			queue = pos;
+			queue_len = 0;
+		}
+
+		err = annotation_line__print(pos, sym, start, evsel, len,
+					     min_pcnt, printed, max_lines,
+					     queue, addr_fmt_width);
+
+		switch (err) {
+		case 0:
+			++printed;
+			if (context) {
+				printed += queue_len;
+				queue = NULL;
+				queue_len = 0;
+			}
+			break;
+		case 1:
+			/* filtered by max_lines */
+			++more;
+			break;
+		case -1:
+		default:
+			/*
+			 * Filtered by min_pcnt or non IP lines when
+			 * context != 0
+			 */
+			if (!context)
+				break;
+			if (queue_len == context)
+				queue = list_entry(queue->node.next, typeof(*queue), node);
+			else
+				++queue_len;
+			break;
+		}
+	}
+
+	free(filename);
+
+	return more;
+}
+
+static void FILE__set_percent_color(void *fp __maybe_unused,
+				    double percent __maybe_unused,
+				    bool current __maybe_unused)
+{
+}
+
+static int FILE__set_jumps_percent_color(void *fp __maybe_unused,
+					 int nr __maybe_unused, bool current __maybe_unused)
+{
+	return 0;
+}
+
+static int FILE__set_color(void *fp __maybe_unused, int color __maybe_unused)
+{
+	return 0;
+}
+
+static void FILE__printf(void *fp, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vfprintf(fp, fmt, args);
+	va_end(args);
+}
+
+static void FILE__write_graph(void *fp, int graph)
+{
+	const char *s;
+	switch (graph) {
+
+	case DARROW_CHAR: s = "↓"; break;
+	case UARROW_CHAR: s = "↑"; break;
+	case LARROW_CHAR: s = "←"; break;
+	case RARROW_CHAR: s = "→"; break;
+	default:		s = "?"; break;
+	}
+
+	fputs(s, fp);
+}
+
+int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotation_write_ops ops = {
+		.first_line		 = true,
+		.obj			 = fp,
+		.set_color		 = FILE__set_color,
+		.set_percent_color	 = FILE__set_percent_color,
+		.set_jumps_percent_color = FILE__set_jumps_percent_color,
+		.printf			 = FILE__printf,
+		.write_graph		 = FILE__write_graph,
+	};
+	struct annotation_line *al;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		if (annotation_line__filter(al, notes))
+			continue;
+		annotation_line__write(al, notes, &ops);
+		fputc('\n', fp);
+		ops.first_line = false;
+	}
+
+	return 0;
+}
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel)
+{
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[1024];
+	char *filename;
+	int err = -1;
+	FILE *fp;
+
+	if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0)
+		return -1;
+
+	fp = fopen(filename, "w");
+	if (fp == NULL)
+		goto out_free_filename;
+
+	if (perf_evsel__is_group_event(evsel)) {
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		ev_name = buf;
+	}
+
+	fprintf(fp, "%s() %s\nEvent: %s\n\n",
+		ms->sym->name, ms->map->dso->long_name, ev_name);
+	symbol__annotate_fprintf2(ms->sym, fp);
+
+	fclose(fp);
+	err = 0;
+out_free_filename:
+	free(filename);
+	return err;
+}
+
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evidx);
+
+	memset(h, 0, notes->src->sizeof_sym_hist);
+}
+
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct sym_hist *h = annotation__histogram(notes, evidx);
+	int len = symbol__size(sym), offset;
+
+	h->nr_samples = 0;
+	for (offset = 0; offset < len; ++offset) {
+		h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8;
+		h->nr_samples += h->addr[offset].nr_samples;
+	}
+}
+
+void annotated_source__purge(struct annotated_source *as)
+{
+	struct annotation_line *al, *n;
+
+	list_for_each_entry_safe(al, n, &as->source, node) {
+		list_del(&al->node);
+		disasm_line__free(disasm_line(al));
+	}
+}
+
+static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
+{
+	size_t printed;
+
+	if (dl->al.offset == -1)
+		return fprintf(fp, "%s\n", dl->al.line);
+
+	printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name);
+
+	if (dl->ops.raw[0] != '\0') {
+		printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ",
+				   dl->ops.raw);
+	}
+
+	return printed + fprintf(fp, "\n");
+}
+
+size_t disasm__fprintf(struct list_head *head, FILE *fp)
+{
+	struct disasm_line *pos;
+	size_t printed = 0;
+
+	list_for_each_entry(pos, head, al.node)
+		printed += disasm_line__fprintf(pos, fp);
+
+	return printed;
+}
+
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym)
+{
+	if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) ||
+	    !disasm_line__has_local_offset(dl) || dl->ops.target.offset < 0 ||
+	    dl->ops.target.offset >= (s64)symbol__size(sym))
+		return false;
+
+	return true;
+}
+
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
+{
+	u64 offset, size = symbol__size(sym);
+
+	/* PLT symbols contain external offsets */
+	if (strstr(sym->name, "@plt"))
+		return;
+
+	for (offset = 0; offset < size; ++offset) {
+		struct annotation_line *al = notes->offsets[offset];
+		struct disasm_line *dl;
+
+		dl = disasm_line(al);
+
+		if (!disasm_line__is_valid_local_jump(dl, sym))
+			continue;
+
+		al = notes->offsets[dl->ops.target.offset];
+
+		/*
+		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
+		 * have to adjust to the previous offset?
+		 */
+		if (al == NULL)
+			continue;
+
+		if (++al->jump_sources > notes->max_jump_sources)
+			notes->max_jump_sources = al->jump_sources;
+
+		++notes->nr_jumps;
+	}
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size)
+{
+	struct annotation_line *al;
+
+	notes->max_line_len = 0;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		size_t line_len = strlen(al->line);
+
+		if (notes->max_line_len < line_len)
+			notes->max_line_len = line_len;
+		al->idx = notes->nr_entries++;
+		if (al->offset != -1) {
+			al->idx_asm = notes->nr_asm_entries++;
+			/*
+			 * FIXME: short term bandaid to cope with assembly
+			 * routines that comes with labels in the same column
+			 * as the address in objdump, sigh.
+			 *
+			 * E.g. copy_user_generic_unrolled
+ 			 */
+			if (al->offset < size)
+				notes->offsets[al->offset] = al;
+		} else
+			al->idx_asm = -1;
+	}
+}
+
+static inline int width_jumps(int n)
+{
+	if (n >= 100)
+		return 5;
+	if (n / 10)
+		return 2;
+	return 1;
+}
+
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
+{
+	notes->widths.addr = notes->widths.target =
+		notes->widths.min_addr = hex_width(symbol__size(sym));
+	notes->widths.max_addr = hex_width(sym->end);
+	notes->widths.jumps = width_jumps(notes->max_jump_sources);
+}
+
+void annotation__update_column_widths(struct annotation *notes)
+{
+	if (notes->options->use_offset)
+		notes->widths.target = notes->widths.min_addr;
+	else
+		notes->widths.target = notes->widths.max_addr;
+
+	notes->widths.addr = notes->widths.target;
+
+	if (notes->options->show_nr_jumps)
+		notes->widths.addr += notes->widths.jumps + 1;
+}
+
+static void annotation__calc_lines(struct annotation *notes, struct map *map,
+				  struct rb_root *root)
+{
+	struct annotation_line *al;
+	struct rb_root tmp_root = RB_ROOT;
+
+	list_for_each_entry(al, &notes->src->source, node) {
+		double percent_max = 0.0;
+		int i;
+
+		for (i = 0; i < al->samples_nr; i++) {
+			struct annotation_data *sample;
+
+			sample = &al->samples[i];
+
+			if (sample->percent > percent_max)
+				percent_max = sample->percent;
+		}
+
+		if (percent_max <= 0.5)
+			continue;
+
+		al->path = get_srcline(map->dso, notes->start + al->offset, NULL,
+				       false, true, notes->start + al->offset);
+		insert_source_line(&tmp_root, al);
+	}
+
+	resort_source_line(root, &tmp_root);
+}
+
+static void symbol__calc_lines(struct symbol *sym, struct map *map,
+			      struct rb_root *root)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	annotation__calc_lines(notes, map, root);
+}
+
+int symbol__tty_annotate2(struct symbol *sym, struct map *map,
+			  struct perf_evsel *evsel, bool print_lines,
+			  bool full_paths)
+{
+	struct dso *dso = map->dso;
+	struct rb_root source_line = RB_ROOT;
+	struct annotation_options opts = annotation__default_options;
+	struct annotation *notes = symbol__annotation(sym);
+	char buf[1024];
+
+	if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
+		return -1;
+
+	if (print_lines) {
+		srcline_full_filename = full_paths;
+		symbol__calc_lines(sym, map, &source_line);
+		print_summary(&source_line, dso->long_name);
+	}
+
+	annotation__scnprintf_samples_period(notes, buf, sizeof(buf), evsel);
+	fprintf(stdout, "%s\n%s() %s\n", buf, sym->name, dso->long_name);
+	symbol__annotate_fprintf2(sym, stdout);
+
+	annotated_source__purge(symbol__annotation(sym)->src);
+
+	return 0;
+}
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map,
+			 struct perf_evsel *evsel, bool print_lines,
+			 bool full_paths, int min_pcnt, int max_lines)
+{
+	struct dso *dso = map->dso;
+	struct rb_root source_line = RB_ROOT;
+
+	if (symbol__annotate(sym, map, evsel, 0, NULL) < 0)
+		return -1;
+
+	symbol__calc_percent(sym, evsel);
+
+	if (print_lines) {
+		srcline_full_filename = full_paths;
+		symbol__calc_lines(sym, map, &source_line);
+		print_summary(&source_line, dso->long_name);
+	}
+
+	symbol__annotate_printf(sym, map, evsel, full_paths,
+				min_pcnt, max_lines, 0);
+
+	annotated_source__purge(symbol__annotation(sym)->src);
+
+	return 0;
+}
+
+bool ui__has_annotation(void)
+{
+	return use_browser == 1 && perf_hpp_list.sym;
+}
+
+
+double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes)
+{
+	double percent_max = 0.0;
+	int i;
+
+	for (i = 0; i < notes->nr_events; i++) {
+		if (al->samples[i].percent > percent_max)
+			percent_max = al->samples[i].percent;
+	}
+
+	return percent_max;
+}
+
+static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
+			       void *obj, char *bf, size_t size,
+			       void (*obj__printf)(void *obj, const char *fmt, ...),
+			       void (*obj__write_graph)(void *obj, int graph))
+{
+	if (dl->ins.ops && dl->ins.ops->scnprintf) {
+		if (ins__is_jump(&dl->ins)) {
+			bool fwd;
+
+			if (dl->ops.target.outside)
+				goto call_like;
+			fwd = dl->ops.target.offset > dl->al.offset;
+			obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR);
+			obj__printf(obj, " ");
+		} else if (ins__is_call(&dl->ins)) {
+call_like:
+			obj__write_graph(obj, RARROW_CHAR);
+			obj__printf(obj, " ");
+		} else if (ins__is_ret(&dl->ins)) {
+			obj__write_graph(obj, LARROW_CHAR);
+			obj__printf(obj, " ");
+		} else {
+			obj__printf(obj, "  ");
+		}
+	} else {
+		obj__printf(obj, "  ");
+	}
+
+	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
+}
+
+static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
+				     bool first_line, bool current_entry, bool change_color, int width,
+				     void *obj,
+				     int  (*obj__set_color)(void *obj, int color),
+				     void (*obj__set_percent_color)(void *obj, double percent, bool current),
+				     int  (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+				     void (*obj__printf)(void *obj, const char *fmt, ...),
+				     void (*obj__write_graph)(void *obj, int graph))
+
+{
+	double percent_max = annotation_line__max_percent(al, notes);
+	int pcnt_width = annotation__pcnt_width(notes),
+	    cycles_width = annotation__cycles_width(notes);
+	bool show_title = false;
+	char bf[256];
+	int printed;
+
+	if (first_line && (al->offset == -1 || percent_max == 0.0)) {
+		if (notes->have_cycles) {
+			if (al->ipc == 0.0 && al->cycles == 0)
+				show_title = true;
+		} else
+			show_title = true;
+	}
+
+	if (al->offset != -1 && percent_max != 0.0) {
+		int i;
+
+		for (i = 0; i < notes->nr_events; i++) {
+			obj__set_percent_color(obj, al->samples[i].percent, current_entry);
+			if (notes->options->show_total_period) {
+				obj__printf(obj, "%11" PRIu64 " ", al->samples[i].he.period);
+			} else if (notes->options->show_nr_samples) {
+				obj__printf(obj, "%6" PRIu64 " ",
+						   al->samples[i].he.nr_samples);
+			} else {
+				obj__printf(obj, "%6.2f ",
+						   al->samples[i].percent);
+			}
+		}
+	} else {
+		obj__set_percent_color(obj, 0, current_entry);
+
+		if (!show_title)
+			obj__printf(obj, "%-*s", pcnt_width, " ");
+		else {
+			obj__printf(obj, "%-*s", pcnt_width,
+					   notes->options->show_total_period ? "Period" :
+					   notes->options->show_nr_samples ? "Samples" : "Percent");
+		}
+	}
+
+	if (notes->have_cycles) {
+		if (al->ipc)
+			obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
+		else if (!show_title)
+			obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " ");
+		else
+			obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
+
+		if (al->cycles)
+			obj__printf(obj, "%*" PRIu64 " ",
+					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
+		else if (!show_title)
+			obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " ");
+		else
+			obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
+	}
+
+	obj__printf(obj, " ");
+
+	if (!*al->line)
+		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
+	else if (al->offset == -1) {
+		if (al->line_nr && notes->options->show_linenr)
+			printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
+		else
+			printed = scnprintf(bf, sizeof(bf), "%-*s  ", notes->widths.addr, " ");
+		obj__printf(obj, bf);
+		obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
+	} else {
+		u64 addr = al->offset;
+		int color = -1;
+
+		if (!notes->options->use_offset)
+			addr += notes->start;
+
+		if (!notes->options->use_offset) {
+			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
+		} else {
+			if (al->jump_sources &&
+			    notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
+				if (notes->options->show_nr_jumps) {
+					int prev;
+					printed = scnprintf(bf, sizeof(bf), "%*d ",
+							    notes->widths.jumps,
+							    al->jump_sources);
+					prev = obj__set_jumps_percent_color(obj, al->jump_sources,
+									    current_entry);
+					obj__printf(obj, bf);
+					obj__set_color(obj, prev);
+				}
+print_addr:
+				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+						    notes->widths.target, addr);
+			} else if (ins__is_call(&disasm_line(al)->ins) &&
+				   notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+				goto print_addr;
+			} else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+				goto print_addr;
+			} else {
+				printed = scnprintf(bf, sizeof(bf), "%-*s  ",
+						    notes->widths.addr, " ");
+			}
+		}
+
+		if (change_color)
+			color = obj__set_color(obj, HE_COLORSET_ADDR);
+		obj__printf(obj, bf);
+		if (change_color)
+			obj__set_color(obj, color);
+
+		disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph);
+
+		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf);
+	}
+
+}
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    struct annotation_write_ops *ops)
+{
+	__annotation_line__write(al, notes, ops->first_line, ops->current_entry,
+				 ops->change_color, ops->width, ops->obj,
+				 ops->set_color, ops->set_percent_color,
+				 ops->set_jumps_percent_color, ops->printf,
+				 ops->write_graph);
+}
+
+int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
+		      struct annotation_options *options, struct arch **parch)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	size_t size = symbol__size(sym);
+	int nr_pcnt = 1, err;
+
+	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
+	if (notes->offsets == NULL)
+		return -1;
+
+	if (perf_evsel__is_group_event(evsel))
+		nr_pcnt = evsel->nr_members;
+
+	err = symbol__annotate(sym, map, evsel, 0, parch);
+	if (err)
+		goto out_free_offsets;
+
+	notes->options = options;
+
+	symbol__calc_percent(sym, evsel);
+
+	notes->start = map__rip_2objdump(map, sym->start);
+
+	annotation__set_offsets(notes, size);
+	annotation__mark_jump_targets(notes, sym);
+	annotation__compute_ipc(notes, size);
+	annotation__init_column_widths(notes, sym);
+	notes->nr_events = nr_pcnt;
+
+	annotation__update_column_widths(notes);
+
+	return 0;
+
+out_free_offsets:
+	zfree(&notes->offsets);
+	return -1;
+}
+
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+					   char *bf, size_t size,
+					   struct perf_evsel *evsel,
+					   bool show_freq)
+{
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[1024], ref[30] = " show reference callgraph, ";
+	char sample_freq_str[64] = "";
+	unsigned long nr_samples = 0;
+	int nr_members = 1;
+	bool enable_ref = false;
+	u64 nr_events = 0;
+	char unit;
+	int i;
+
+	if (perf_evsel__is_group_event(evsel)) {
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		ev_name = buf;
+                nr_members = evsel->nr_members;
+	}
+
+	for (i = 0; i < nr_members; i++) {
+		struct sym_hist *ah = annotation__histogram(notes, evsel->idx + i);
+
+		nr_samples += ah->nr_samples;
+		nr_events  += ah->period;
+	}
+
+	if (symbol_conf.show_ref_callgraph && strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
+
+	if (show_freq)
+		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+	nr_samples = convert_unit(nr_samples, &unit);
+	return scnprintf(bf, size, "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+			 nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+			 ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+}
+
+#define ANNOTATION__CFG(n) \
+	{ .name = #n, .value = &annotation__default_options.n, }
+
+/*
+ * Keep the entries sorted, they are bsearch'ed
+ */
+static struct annotation_config {
+	const char *name;
+	void *value;
+} annotation__configs[] = {
+	ANNOTATION__CFG(hide_src_code),
+	ANNOTATION__CFG(jump_arrows),
+	ANNOTATION__CFG(offset_level),
+	ANNOTATION__CFG(show_linenr),
+	ANNOTATION__CFG(show_nr_jumps),
+	ANNOTATION__CFG(show_nr_samples),
+	ANNOTATION__CFG(show_total_period),
+	ANNOTATION__CFG(use_offset),
+};
+
+#undef ANNOTATION__CFG
+
+static int annotation_config__cmp(const void *name, const void *cfgp)
+{
+	const struct annotation_config *cfg = cfgp;
+
+	return strcmp(name, cfg->name);
+}
+
+static int annotation__config(const char *var, const char *value,
+			    void *data __maybe_unused)
+{
+	struct annotation_config *cfg;
+	const char *name;
+
+	if (!strstarts(var, "annotate."))
+		return 0;
+
+	name = var + 9;
+	cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
+		      sizeof(struct annotation_config), annotation_config__cmp);
+
+	if (cfg == NULL)
+		pr_debug("%s variable unknown, ignoring...", var);
+	else if (strcmp(var, "annotate.offset_level") == 0) {
+		perf_config_int(cfg->value, name, value);
+
+		if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
+			*(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
+		else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
+			*(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+	} else {
+		*(bool *)cfg->value = perf_config_bool(name, value);
+	}
+	return 0;
+}
+
+void annotation_config__init(void)
+{
+	perf_config(annotation__config, NULL);
+
+	annotation__default_options.show_total_period = symbol_conf.show_total_period;
+	annotation__default_options.show_nr_samples   = symbol_conf.show_nr_samples;
+}
diff --git a/util/annotate.h b/util/annotate.h
new file mode 100644
index 0000000..f28a9e4
--- /dev/null
+++ b/util/annotate.h
@@ -0,0 +1,352 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ANNOTATE_H
+#define __PERF_ANNOTATE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include "symbol.h"
+#include "hist.h"
+#include "sort.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <pthread.h>
+
+struct ins_ops;
+
+struct ins {
+	const char     *name;
+	struct ins_ops *ops;
+};
+
+struct ins_operands {
+	char	*raw;
+	struct {
+		char	*raw;
+		char	*name;
+		struct symbol *sym;
+		u64	addr;
+		s64	offset;
+		bool	offset_avail;
+		bool	outside;
+	} target;
+	union {
+		struct {
+			char	*raw;
+			char	*name;
+			u64	addr;
+		} source;
+		struct {
+			struct ins	    ins;
+			struct ins_operands *ops;
+		} locked;
+	};
+};
+
+struct arch;
+
+struct ins_ops {
+	void (*free)(struct ins_operands *ops);
+	int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
+			 struct ins_operands *ops);
+};
+
+bool ins__is_jump(const struct ins *ins);
+bool ins__is_call(const struct ins *ins);
+bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
+
+#define ANNOTATION__IPC_WIDTH 6
+#define ANNOTATION__CYCLES_WIDTH 6
+
+struct annotation_options {
+	bool hide_src_code,
+	     use_offset,
+	     jump_arrows,
+	     show_linenr,
+	     show_nr_jumps,
+	     show_nr_samples,
+	     show_total_period;
+	u8   offset_level;
+};
+
+enum {
+	ANNOTATION__OFFSET_JUMP_TARGETS = 1,
+	ANNOTATION__OFFSET_CALL,
+	ANNOTATION__MAX_OFFSET_LEVEL,
+};
+
+#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS
+
+extern struct annotation_options annotation__default_options;
+
+struct annotation;
+
+struct sym_hist_entry {
+	u64		nr_samples;
+	u64		period;
+};
+
+struct annotation_data {
+	double			 percent;
+	double			 percent_sum;
+	struct sym_hist_entry	 he;
+};
+
+struct annotation_line {
+	struct list_head	 node;
+	struct rb_node		 rb_node;
+	s64			 offset;
+	char			*line;
+	int			 line_nr;
+	int			 jump_sources;
+	float			 ipc;
+	u64			 cycles;
+	size_t			 privsize;
+	char			*path;
+	u32			 idx;
+	int			 idx_asm;
+	int			 samples_nr;
+	struct annotation_data	 samples[0];
+};
+
+struct disasm_line {
+	struct ins		 ins;
+	struct ins_operands	 ops;
+
+	/* This needs to be at the end. */
+	struct annotation_line	 al;
+};
+
+static inline struct disasm_line *disasm_line(struct annotation_line *al)
+{
+	return al ? container_of(al, struct disasm_line, al) : NULL;
+}
+
+/*
+ * Is this offset in the same function as the line it is used?
+ * asm functions jump to other functions, for instance.
+ */
+static inline bool disasm_line__has_local_offset(const struct disasm_line *dl)
+{
+	return dl->ops.target.offset_avail && !dl->ops.target.outside;
+}
+
+/*
+ * Can we draw an arrow from the jump to its target, for instance? I.e.
+ * is the jump and its target in the same function?
+ */
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym);
+
+void disasm_line__free(struct disasm_line *dl);
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head);
+
+struct annotation_write_ops {
+	bool first_line, current_entry, change_color;
+	int  width;
+	void *obj;
+	int  (*set_color)(void *obj, int color);
+	void (*set_percent_color)(void *obj, double percent, bool current);
+	int  (*set_jumps_percent_color)(void *obj, int nr, bool current);
+	void (*printf)(void *obj, const char *fmt, ...);
+	void (*write_graph)(void *obj, int graph);
+};
+
+double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes);
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+			    struct annotation_write_ops *ops);
+
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+					   char *bf, size_t size,
+					   struct perf_evsel *evsel,
+					   bool show_freq);
+
+static inline int annotation__scnprintf_samples_period(struct annotation *notes,
+						       char *bf, size_t size,
+						       struct perf_evsel *evsel)
+{
+	return __annotation__scnprintf_samples_period(notes, bf, size, evsel, true);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
+size_t disasm__fprintf(struct list_head *head, FILE *fp);
+void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
+
+struct sym_hist {
+	u64		      nr_samples;
+	u64		      period;
+	struct sym_hist_entry addr[0];
+};
+
+struct cyc_hist {
+	u64	start;
+	u64	cycles;
+	u64	cycles_aggr;
+	u32	num;
+	u32	num_aggr;
+	u8	have_start;
+	/* 1 byte padding */
+	u16	reset;
+};
+
+/** struct annotated_source - symbols with hits have this attached as in sannotation
+ *
+ * @histogram: Array of addr hit histograms per event being monitored
+ * @lines: If 'print_lines' is specified, per source code line percentages
+ * @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
+ *
+ * lines is allocated, percentages calculated and all sorted by percentage
+ * when the annotation is about to be presented, so the percentages are for
+ * one of the entries in the histogram array, i.e. for the event/counter being
+ * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
+ * returns.
+ */
+struct annotated_source {
+	struct list_head   source;
+	int    		   nr_histograms;
+	size_t		   sizeof_sym_hist;
+	struct cyc_hist	   *cycles_hist;
+	struct sym_hist	   histograms[0];
+};
+
+struct annotation {
+	pthread_mutex_t		lock;
+	u64			max_coverage;
+	u64			start;
+	struct annotation_options *options;
+	struct annotation_line	**offsets;
+	int			nr_events;
+	int			nr_jumps;
+	int			max_jump_sources;
+	int			nr_entries;
+	int			nr_asm_entries;
+	u16			max_line_len;
+	struct {
+		u8		addr;
+		u8		jumps;
+		u8		target;
+		u8		min_addr;
+		u8		max_addr;
+	} widths;
+	bool			have_cycles;
+	struct annotated_source *src;
+};
+
+static inline int annotation__cycles_width(struct annotation *notes)
+{
+	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
+}
+
+static inline int annotation__pcnt_width(struct annotation *notes)
+{
+	return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+}
+
+static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
+{
+	return notes->options->hide_src_code && al->offset == -1;
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size);
+void annotation__compute_ipc(struct annotation *notes, size_t size);
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
+void annotation__update_column_widths(struct annotation *notes);
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
+
+static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+{
+	return (((void *)&notes->src->histograms) +
+	 	(notes->src->sizeof_sym_hist * idx));
+}
+
+static inline struct annotation *symbol__annotation(struct symbol *sym)
+{
+	return (void *)sym - symbol_conf.priv_size;
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+				 int evidx);
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+				    struct addr_map_symbol *start,
+				    unsigned cycles);
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+				 int evidx, u64 addr);
+
+int symbol__alloc_hist(struct symbol *sym);
+void symbol__annotate_zero_histograms(struct symbol *sym);
+
+int symbol__annotate(struct symbol *sym, struct map *map,
+		     struct perf_evsel *evsel, size_t privsize,
+		     struct arch **parch);
+int symbol__annotate2(struct symbol *sym, struct map *map,
+		      struct perf_evsel *evsel,
+		      struct annotation_options *options,
+		      struct arch **parch);
+
+enum symbol_disassemble_errno {
+	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__SYMBOL_ANNOTATE_ERRNO__START		= -10000,
+
+	SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX	= __SYMBOL_ANNOTATE_ERRNO__START,
+
+	__SYMBOL_ANNOTATE_ERRNO__END,
+};
+
+int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
+				 int errnum, char *buf, size_t buflen);
+
+int symbol__annotate_printf(struct symbol *sym, struct map *map,
+			    struct perf_evsel *evsel, bool full_paths,
+			    int min_pcnt, int max_lines, int context);
+int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp);
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void annotated_source__purge(struct annotated_source *as);
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel);
+
+bool ui__has_annotation(void);
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map,
+			 struct perf_evsel *evsel, bool print_lines,
+			 bool full_paths, int min_pcnt, int max_lines);
+
+int symbol__tty_annotate2(struct symbol *sym, struct map *map,
+			  struct perf_evsel *evsel, bool print_lines,
+			  bool full_paths);
+
+#ifdef HAVE_SLANG_SUPPORT
+int symbol__tui_annotate(struct symbol *sym, struct map *map,
+			 struct perf_evsel *evsel,
+			 struct hist_browser_timer *hbt);
+#else
+static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
+				struct map *map __maybe_unused,
+				struct perf_evsel *evsel  __maybe_unused,
+				struct hist_browser_timer *hbt
+				__maybe_unused)
+{
+	return 0;
+}
+#endif
+
+extern const char	*disassembler_style;
+
+void annotation_config__init(void);
+
+#endif	/* __PERF_ANNOTATE_H */
diff --git a/util/arm-spe-pkt-decoder.c b/util/arm-spe-pkt-decoder.c
new file mode 100644
index 0000000..b94001b
--- /dev/null
+++ b/util/arm-spe-pkt-decoder.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+#define BIT(n)		(1ULL << (n))
+
+#define NS_FLAG		BIT(63)
+#define EL_FLAG		(BIT(62) | BIT(61))
+
+#define SPE_HEADER0_PAD			0x0
+#define SPE_HEADER0_END			0x1
+#define SPE_HEADER0_ADDRESS		0x30 /* address packet (short) */
+#define SPE_HEADER0_ADDRESS_MASK	0x38
+#define SPE_HEADER0_COUNTER		0x18 /* counter packet (short) */
+#define SPE_HEADER0_COUNTER_MASK	0x38
+#define SPE_HEADER0_TIMESTAMP		0x71
+#define SPE_HEADER0_TIMESTAMP		0x71
+#define SPE_HEADER0_EVENTS		0x2
+#define SPE_HEADER0_EVENTS_MASK		0xf
+#define SPE_HEADER0_SOURCE		0x3
+#define SPE_HEADER0_SOURCE_MASK		0xf
+#define SPE_HEADER0_CONTEXT		0x24
+#define SPE_HEADER0_CONTEXT_MASK	0x3c
+#define SPE_HEADER0_OP_TYPE		0x8
+#define SPE_HEADER0_OP_TYPE_MASK	0x3c
+#define SPE_HEADER1_ALIGNMENT		0x0
+#define SPE_HEADER1_ADDRESS		0xb0 /* address packet (extended) */
+#define SPE_HEADER1_ADDRESS_MASK	0xf8
+#define SPE_HEADER1_COUNTER		0x98 /* counter packet (extended) */
+#define SPE_HEADER1_COUNTER_MASK	0xf8
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+	memcpy((d), (s), (n));    \
+	*(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const arm_spe_packet_name[] = {
+	[ARM_SPE_PAD]		= "PAD",
+	[ARM_SPE_END]		= "END",
+	[ARM_SPE_TIMESTAMP]	= "TS",
+	[ARM_SPE_ADDRESS]	= "ADDR",
+	[ARM_SPE_COUNTER]	= "LAT",
+	[ARM_SPE_CONTEXT]	= "CONTEXT",
+	[ARM_SPE_OP_TYPE]	= "OP-TYPE",
+	[ARM_SPE_EVENTS]	= "EVENTS",
+	[ARM_SPE_DATA_SOURCE]	= "DATA-SOURCE",
+};
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
+{
+	return arm_spe_packet_name[type];
+}
+
+/* return ARM SPE payload size from its encoding,
+ * which is in bits 5:4 of the byte.
+ * 00 : byte
+ * 01 : halfword (2)
+ * 10 : word (4)
+ * 11 : doubleword (8)
+ */
+static int payloadlen(unsigned char byte)
+{
+	return 1 << ((byte & 0x30) >> 4);
+}
+
+static int arm_spe_get_payload(const unsigned char *buf, size_t len,
+			       struct arm_spe_pkt *packet)
+{
+	size_t payload_len = payloadlen(buf[0]);
+
+	if (len < 1 + payload_len)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	buf++;
+
+	switch (payload_len) {
+	case 1: packet->payload = *(uint8_t *)buf; break;
+	case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break;
+	case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break;
+	case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break;
+	default: return ARM_SPE_BAD_PACKET;
+	}
+
+	return 1 + payload_len;
+}
+
+static int arm_spe_get_pad(struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_PAD;
+	return 1;
+}
+
+static int arm_spe_get_alignment(const unsigned char *buf, size_t len,
+				 struct arm_spe_pkt *packet)
+{
+	unsigned int alignment = 1 << ((buf[0] & 0xf) + 1);
+
+	if (len < alignment)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	packet->type = ARM_SPE_PAD;
+	return alignment - (((uintptr_t)buf) & (alignment - 1));
+}
+
+static int arm_spe_get_end(struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_END;
+	return 1;
+}
+
+static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
+				 struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_TIMESTAMP;
+	return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_events(const unsigned char *buf, size_t len,
+			      struct arm_spe_pkt *packet)
+{
+	int ret = arm_spe_get_payload(buf, len, packet);
+
+	packet->type = ARM_SPE_EVENTS;
+
+	/* we use index to identify Events with a less number of
+	 * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
+	 * LLC-REFILL, and REMOTE-ACCESS events are identified iff
+	 * index > 1.
+	 */
+	packet->index = ret - 1;
+
+	return ret;
+}
+
+static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
+				   struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_DATA_SOURCE;
+	return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_context(const unsigned char *buf, size_t len,
+			       struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_CONTEXT;
+	packet->index = buf[0] & 0x3;
+
+	return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
+			       struct arm_spe_pkt *packet)
+{
+	packet->type = ARM_SPE_OP_TYPE;
+	packet->index = buf[0] & 0x3;
+	return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_counter(const unsigned char *buf, size_t len,
+			       const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+	if (len < 2)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	packet->type = ARM_SPE_COUNTER;
+	if (ext_hdr)
+		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+	else
+		packet->index = buf[0] & 0x7;
+
+	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+
+	return 1 + ext_hdr + 2;
+}
+
+static int arm_spe_get_addr(const unsigned char *buf, size_t len,
+			    const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+	if (len < 8)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	packet->type = ARM_SPE_ADDRESS;
+	if (ext_hdr)
+		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+	else
+		packet->index = buf[0] & 0x7;
+
+	memcpy_le64(&packet->payload, buf + 1, 8);
+
+	return 1 + ext_hdr + 8;
+}
+
+static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
+				 struct arm_spe_pkt *packet)
+{
+	unsigned int byte;
+
+	memset(packet, 0, sizeof(struct arm_spe_pkt));
+
+	if (!len)
+		return ARM_SPE_NEED_MORE_BYTES;
+
+	byte = buf[0];
+	if (byte == SPE_HEADER0_PAD)
+		return arm_spe_get_pad(packet);
+	else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */
+		return arm_spe_get_end(packet);
+	else if (byte & 0xc0 /* 0y11xxxxxx */) {
+		if (byte & 0x80) {
+			if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS)
+				return arm_spe_get_addr(buf, len, 0, packet);
+			if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER)
+				return arm_spe_get_counter(buf, len, 0, packet);
+		} else
+			if (byte == SPE_HEADER0_TIMESTAMP)
+				return arm_spe_get_timestamp(buf, len, packet);
+			else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS)
+				return arm_spe_get_events(buf, len, packet);
+			else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE)
+				return arm_spe_get_data_source(buf, len, packet);
+			else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT)
+				return arm_spe_get_context(buf, len, packet);
+			else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE)
+				return arm_spe_get_op_type(buf, len, packet);
+	} else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) {
+		/* 16-bit header */
+		byte = buf[1];
+		if (byte == SPE_HEADER1_ALIGNMENT)
+			return arm_spe_get_alignment(buf, len, packet);
+		else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS)
+			return arm_spe_get_addr(buf, len, 1, packet);
+		else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER)
+			return arm_spe_get_counter(buf, len, 1, packet);
+	}
+
+	return ARM_SPE_BAD_PACKET;
+}
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+		       struct arm_spe_pkt *packet)
+{
+	int ret;
+
+	ret = arm_spe_do_get_packet(buf, len, packet);
+	/* put multiple consecutive PADs on the same line, up to
+	 * the fixed-width output format of 16 bytes per line.
+	 */
+	if (ret > 0 && packet->type == ARM_SPE_PAD) {
+		while (ret < 16 && len > (size_t)ret && !buf[ret])
+			ret += 1;
+	}
+	return ret;
+}
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
+		     size_t buf_len)
+{
+	int ret, ns, el, idx = packet->index;
+	unsigned long long payload = packet->payload;
+	const char *name = arm_spe_pkt_name(packet->type);
+
+	switch (packet->type) {
+	case ARM_SPE_BAD:
+	case ARM_SPE_PAD:
+	case ARM_SPE_END:
+		return snprintf(buf, buf_len, "%s", name);
+	case ARM_SPE_EVENTS: {
+		size_t blen = buf_len;
+
+		ret = 0;
+		ret = snprintf(buf, buf_len, "EV");
+		buf += ret;
+		blen -= ret;
+		if (payload & 0x1) {
+			ret = snprintf(buf, buf_len, " EXCEPTION-GEN");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x2) {
+			ret = snprintf(buf, buf_len, " RETIRED");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x4) {
+			ret = snprintf(buf, buf_len, " L1D-ACCESS");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x8) {
+			ret = snprintf(buf, buf_len, " L1D-REFILL");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x10) {
+			ret = snprintf(buf, buf_len, " TLB-ACCESS");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x20) {
+			ret = snprintf(buf, buf_len, " TLB-REFILL");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x40) {
+			ret = snprintf(buf, buf_len, " NOT-TAKEN");
+			buf += ret;
+			blen -= ret;
+		}
+		if (payload & 0x80) {
+			ret = snprintf(buf, buf_len, " MISPRED");
+			buf += ret;
+			blen -= ret;
+		}
+		if (idx > 1) {
+			if (payload & 0x100) {
+				ret = snprintf(buf, buf_len, " LLC-ACCESS");
+				buf += ret;
+				blen -= ret;
+			}
+			if (payload & 0x200) {
+				ret = snprintf(buf, buf_len, " LLC-REFILL");
+				buf += ret;
+				blen -= ret;
+			}
+			if (payload & 0x400) {
+				ret = snprintf(buf, buf_len, " REMOTE-ACCESS");
+				buf += ret;
+				blen -= ret;
+			}
+		}
+		if (ret < 0)
+			return ret;
+		blen -= ret;
+		return buf_len - blen;
+	}
+	case ARM_SPE_OP_TYPE:
+		switch (idx) {
+		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
+					"COND-SELECT" : "INSN-OTHER");
+		case 1:	{
+			size_t blen = buf_len;
+
+			if (payload & 0x1)
+				ret = snprintf(buf, buf_len, "ST");
+			else
+				ret = snprintf(buf, buf_len, "LD");
+			buf += ret;
+			blen -= ret;
+			if (payload & 0x2) {
+				if (payload & 0x4) {
+					ret = snprintf(buf, buf_len, " AT");
+					buf += ret;
+					blen -= ret;
+				}
+				if (payload & 0x8) {
+					ret = snprintf(buf, buf_len, " EXCL");
+					buf += ret;
+					blen -= ret;
+				}
+				if (payload & 0x10) {
+					ret = snprintf(buf, buf_len, " AR");
+					buf += ret;
+					blen -= ret;
+				}
+			} else if (payload & 0x4) {
+				ret = snprintf(buf, buf_len, " SIMD-FP");
+				buf += ret;
+				blen -= ret;
+			}
+			if (ret < 0)
+				return ret;
+			blen -= ret;
+			return buf_len - blen;
+		}
+		case 2:	{
+			size_t blen = buf_len;
+
+			ret = snprintf(buf, buf_len, "B");
+			buf += ret;
+			blen -= ret;
+			if (payload & 0x1) {
+				ret = snprintf(buf, buf_len, " COND");
+				buf += ret;
+				blen -= ret;
+			}
+			if (payload & 0x2) {
+				ret = snprintf(buf, buf_len, " IND");
+				buf += ret;
+				blen -= ret;
+			}
+			if (ret < 0)
+				return ret;
+			blen -= ret;
+			return buf_len - blen;
+			}
+		default: return 0;
+		}
+	case ARM_SPE_DATA_SOURCE:
+	case ARM_SPE_TIMESTAMP:
+		return snprintf(buf, buf_len, "%s %lld", name, payload);
+	case ARM_SPE_ADDRESS:
+		switch (idx) {
+		case 0:
+		case 1: ns = !!(packet->payload & NS_FLAG);
+			el = (packet->payload & EL_FLAG) >> 61;
+			payload &= ~(0xffULL << 56);
+			return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d",
+				        (idx == 1) ? "TGT" : "PC", payload, el, ns);
+		case 2:	return snprintf(buf, buf_len, "VA 0x%llx", payload);
+		case 3:	ns = !!(packet->payload & NS_FLAG);
+			payload &= ~(0xffULL << 56);
+			return snprintf(buf, buf_len, "PA 0x%llx ns=%d",
+					payload, ns);
+		default: return 0;
+		}
+	case ARM_SPE_CONTEXT:
+		return snprintf(buf, buf_len, "%s 0x%lx el%d", name,
+				(unsigned long)payload, idx + 1);
+	case ARM_SPE_COUNTER: {
+		size_t blen = buf_len;
+
+		ret = snprintf(buf, buf_len, "%s %d ", name,
+			       (unsigned short)payload);
+		buf += ret;
+		blen -= ret;
+		switch (idx) {
+		case 0:	ret = snprintf(buf, buf_len, "TOT"); break;
+		case 1:	ret = snprintf(buf, buf_len, "ISSUE"); break;
+		case 2:	ret = snprintf(buf, buf_len, "XLAT"); break;
+		default: ret = 0;
+		}
+		if (ret < 0)
+			return ret;
+		blen -= ret;
+		return buf_len - blen;
+	}
+	default:
+		break;
+	}
+
+	return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+			name, payload, packet->index);
+}
diff --git a/util/arm-spe-pkt-decoder.h b/util/arm-spe-pkt-decoder.h
new file mode 100644
index 0000000..d786ef6
--- /dev/null
+++ b/util/arm-spe-pkt-decoder.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
+#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define ARM_SPE_PKT_DESC_MAX		256
+
+#define ARM_SPE_NEED_MORE_BYTES		-1
+#define ARM_SPE_BAD_PACKET		-2
+
+enum arm_spe_pkt_type {
+	ARM_SPE_BAD,
+	ARM_SPE_PAD,
+	ARM_SPE_END,
+	ARM_SPE_TIMESTAMP,
+	ARM_SPE_ADDRESS,
+	ARM_SPE_COUNTER,
+	ARM_SPE_CONTEXT,
+	ARM_SPE_OP_TYPE,
+	ARM_SPE_EVENTS,
+	ARM_SPE_DATA_SOURCE,
+};
+
+struct arm_spe_pkt {
+	enum arm_spe_pkt_type	type;
+	unsigned char		index;
+	uint64_t		payload;
+};
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+		       struct arm_spe_pkt *packet);
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
+#endif
diff --git a/util/arm-spe.c b/util/arm-spe.c
new file mode 100644
index 0000000..6067267
--- /dev/null
+++ b/util/arm-spe.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "arm-spe.h"
+#include "arm-spe-pkt-decoder.h"
+
+struct arm_spe {
+	struct auxtrace			auxtrace;
+	struct auxtrace_queues		queues;
+	struct auxtrace_heap		heap;
+	u32				auxtrace_type;
+	struct perf_session		*session;
+	struct machine			*machine;
+	u32				pmu_type;
+};
+
+struct arm_spe_queue {
+	struct arm_spe		*spe;
+	unsigned int		queue_nr;
+	struct auxtrace_buffer	*buffer;
+	bool			on_heap;
+	bool			done;
+	pid_t			pid;
+	pid_t			tid;
+	int			cpu;
+};
+
+static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
+			 unsigned char *buf, size_t len)
+{
+	struct arm_spe_pkt packet;
+	size_t pos = 0;
+	int ret, pkt_len, i;
+	char desc[ARM_SPE_PKT_DESC_MAX];
+	const char *color = PERF_COLOR_BLUE;
+
+	color_fprintf(stdout, color,
+		      ". ... ARM SPE data: size %zu bytes\n",
+		      len);
+
+	while (len) {
+		ret = arm_spe_get_packet(buf, len, &packet);
+		if (ret > 0)
+			pkt_len = ret;
+		else
+			pkt_len = 1;
+		printf(".");
+		color_fprintf(stdout, color, "  %08x: ", pos);
+		for (i = 0; i < pkt_len; i++)
+			color_fprintf(stdout, color, " %02x", buf[i]);
+		for (; i < 16; i++)
+			color_fprintf(stdout, color, "   ");
+		if (ret > 0) {
+			ret = arm_spe_pkt_desc(&packet, desc,
+					       ARM_SPE_PKT_DESC_MAX);
+			if (ret > 0)
+				color_fprintf(stdout, color, " %s\n", desc);
+		} else {
+			color_fprintf(stdout, color, " Bad packet!\n");
+		}
+		pos += pkt_len;
+		buf += pkt_len;
+		len -= pkt_len;
+	}
+}
+
+static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
+			       size_t len)
+{
+	printf(".\n");
+	arm_spe_dump(spe, buf, len);
+}
+
+static int arm_spe_process_event(struct perf_session *session __maybe_unused,
+				 union perf_event *event __maybe_unused,
+				 struct perf_sample *sample __maybe_unused,
+				 struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static int arm_spe_process_auxtrace_event(struct perf_session *session,
+					  union perf_event *event,
+					  struct perf_tool *tool __maybe_unused)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+					     auxtrace);
+	struct auxtrace_buffer *buffer;
+	off_t data_offset;
+	int fd = perf_data__fd(session->data);
+	int err;
+
+	if (perf_data__is_pipe(session->data)) {
+		data_offset = 0;
+	} else {
+		data_offset = lseek(fd, 0, SEEK_CUR);
+		if (data_offset == -1)
+			return -errno;
+	}
+
+	err = auxtrace_queues__add_event(&spe->queues, session, event,
+					 data_offset, &buffer);
+	if (err)
+		return err;
+
+	/* Dump here now we have copied a piped trace out of the pipe */
+	if (dump_trace) {
+		if (auxtrace_buffer__get_data(buffer, fd)) {
+			arm_spe_dump_event(spe, buffer->data,
+					     buffer->size);
+			auxtrace_buffer__put_data(buffer);
+		}
+	}
+
+	return 0;
+}
+
+static int arm_spe_flush(struct perf_session *session __maybe_unused,
+			 struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static void arm_spe_free_queue(void *priv)
+{
+	struct arm_spe_queue *speq = priv;
+
+	if (!speq)
+		return;
+	free(speq);
+}
+
+static void arm_spe_free_events(struct perf_session *session)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+					     auxtrace);
+	struct auxtrace_queues *queues = &spe->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		arm_spe_free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+	auxtrace_queues__free(queues);
+}
+
+static void arm_spe_free(struct perf_session *session)
+{
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+					     auxtrace);
+
+	auxtrace_heap__free(&spe->heap);
+	arm_spe_free_events(session);
+	session->auxtrace = NULL;
+	free(spe);
+}
+
+static const char * const arm_spe_info_fmts[] = {
+	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
+};
+
+static void arm_spe_print_info(u64 *arr)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
+}
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session)
+{
+	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+	size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
+	struct arm_spe *spe;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+					min_sz)
+		return -EINVAL;
+
+	spe = zalloc(sizeof(struct arm_spe));
+	if (!spe)
+		return -ENOMEM;
+
+	err = auxtrace_queues__init(&spe->queues);
+	if (err)
+		goto err_free;
+
+	spe->session = session;
+	spe->machine = &session->machines.host; /* No kvm support */
+	spe->auxtrace_type = auxtrace_info->type;
+	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+
+	spe->auxtrace.process_event = arm_spe_process_event;
+	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
+	spe->auxtrace.flush_events = arm_spe_flush;
+	spe->auxtrace.free_events = arm_spe_free_events;
+	spe->auxtrace.free = arm_spe_free;
+	session->auxtrace = &spe->auxtrace;
+
+	arm_spe_print_info(&auxtrace_info->priv[0]);
+
+	return 0;
+
+err_free:
+	free(spe);
+	return err;
+}
diff --git a/util/arm-spe.h b/util/arm-spe.h
new file mode 100644
index 0000000..98d3235
--- /dev/null
+++ b/util/arm-spe.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__PERF_ARM_SPE_H__
+#define INCLUDE__PERF_ARM_SPE_H__
+
+#define ARM_SPE_PMU_NAME "arm_spe_"
+
+enum {
+	ARM_SPE_PMU_TYPE,
+	ARM_SPE_PER_CPU_MMAPS,
+	ARM_SPE_AUXTRACE_PRIV_MAX,
+};
+
+#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+					       struct perf_pmu *arm_spe_pmu);
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session);
+
+struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+#endif
diff --git a/util/auxtrace.c b/util/auxtrace.c
new file mode 100644
index 0000000..857de69
--- /dev/null
+++ b/util/auxtrace.c
@@ -0,0 +1,2164 @@
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/list.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+#include <linux/hash.h>
+
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+#include <subcmd/parse-options.h>
+
+#include "cs-etm.h"
+#include "intel-pt.h"
+#include "intel-bts.h"
+#include "arm-spe.h"
+
+#include "sane_ctype.h"
+#include "symbol/kallsyms.h"
+
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+	return !session->itrace_synth_opts ||
+	       session->itrace_synth_opts->dont_decode;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd)
+{
+	struct perf_event_mmap_page *pc = userpg;
+
+	WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+	mm->userpg = userpg;
+	mm->mask = mp->mask;
+	mm->len = mp->len;
+	mm->prev = 0;
+	mm->idx = mp->idx;
+	mm->tid = mp->tid;
+	mm->cpu = mp->cpu;
+
+	if (!mp->len) {
+		mm->base = NULL;
+		return 0;
+	}
+
+#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	pr_err("Cannot use AUX area tracing mmaps\n");
+	return -1;
+#endif
+
+	pc->aux_offset = mp->offset;
+	pc->aux_size = mp->len;
+
+	mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+	if (mm->base == MAP_FAILED) {
+		pr_debug2("failed to mmap AUX area\n");
+		mm->base = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+	if (mm->base) {
+		munmap(mm->base, mm->len);
+		mm->base = NULL;
+	}
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite)
+{
+	if (auxtrace_pages) {
+		mp->offset = auxtrace_offset;
+		mp->len = auxtrace_pages * (size_t)page_size;
+		mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+		mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+		pr_debug2("AUX area mmap length %zu\n", mp->len);
+	} else {
+		mp->len = 0;
+	}
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct perf_evlist *evlist, int idx,
+				   bool per_cpu)
+{
+	mp->idx = idx;
+
+	if (per_cpu) {
+		mp->cpu = evlist->cpus->map[idx];
+		if (evlist->threads)
+			mp->tid = thread_map__pid(evlist->threads, 0);
+		else
+			mp->tid = -1;
+	} else {
+		mp->cpu = -1;
+		mp->tid = thread_map__pid(evlist->threads, idx);
+	}
+}
+
+#define AUXTRACE_INIT_NR_QUEUES	32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+	struct auxtrace_queue *queue_array;
+	unsigned int max_nr_queues, i;
+
+	max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+	if (nr_queues > max_nr_queues)
+		return NULL;
+
+	queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+	if (!queue_array)
+		return NULL;
+
+	for (i = 0; i < nr_queues; i++) {
+		INIT_LIST_HEAD(&queue_array[i].head);
+		queue_array[i].priv = NULL;
+	}
+
+	return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+	queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+	queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+	if (!queues->queue_array)
+		return -ENOMEM;
+	return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+				 unsigned int new_nr_queues)
+{
+	unsigned int nr_queues = queues->nr_queues;
+	struct auxtrace_queue *queue_array;
+	unsigned int i;
+
+	if (!nr_queues)
+		nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+	while (nr_queues && nr_queues < new_nr_queues)
+		nr_queues <<= 1;
+
+	if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+		return -EINVAL;
+
+	queue_array = auxtrace_alloc_queue_array(nr_queues);
+	if (!queue_array)
+		return -ENOMEM;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		list_splice_tail(&queues->queue_array[i].head,
+				 &queue_array[i].head);
+		queue_array[i].priv = queues->queue_array[i].priv;
+	}
+
+	queues->nr_queues = nr_queues;
+	queues->queue_array = queue_array;
+
+	return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+	int fd = perf_data__fd(session->data);
+	void *p;
+	ssize_t ret;
+
+	if (size > SSIZE_MAX)
+		return NULL;
+
+	p = malloc(size);
+	if (!p)
+		return NULL;
+
+	ret = readn(fd, p, size);
+	if (ret != (ssize_t)size) {
+		free(p);
+		return NULL;
+	}
+
+	return p;
+}
+
+static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
+					 unsigned int idx,
+					 struct auxtrace_buffer *buffer)
+{
+	struct auxtrace_queue *queue;
+	int err;
+
+	if (idx >= queues->nr_queues) {
+		err = auxtrace_queues__grow(queues, idx + 1);
+		if (err)
+			return err;
+	}
+
+	queue = &queues->queue_array[idx];
+
+	if (!queue->set) {
+		queue->set = true;
+		queue->tid = buffer->tid;
+		queue->cpu = buffer->cpu;
+	} else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
+		pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
+		       queue->cpu, queue->tid, buffer->cpu, buffer->tid);
+		return -EINVAL;
+	}
+
+	buffer->buffer_nr = queues->next_buffer_nr++;
+
+	list_add_tail(&buffer->list, &queue->head);
+
+	queues->new_data = true;
+	queues->populated = true;
+
+	return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+					 unsigned int idx,
+					 struct auxtrace_buffer *buffer)
+{
+	u64 sz = buffer->size;
+	bool consecutive = false;
+	struct auxtrace_buffer *b;
+	int err;
+
+	while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+		b = memdup(buffer, sizeof(struct auxtrace_buffer));
+		if (!b)
+			return -ENOMEM;
+		b->size = BUFFER_LIMIT_FOR_32_BIT;
+		b->consecutive = consecutive;
+		err = auxtrace_queues__queue_buffer(queues, idx, b);
+		if (err) {
+			auxtrace_buffer__free(b);
+			return err;
+		}
+		buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+		sz -= BUFFER_LIMIT_FOR_32_BIT;
+		consecutive = true;
+	}
+
+	buffer->size = sz;
+	buffer->consecutive = consecutive;
+
+	return 0;
+}
+
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+	unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+	return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+				       struct perf_session *session,
+				       unsigned int idx,
+				       struct auxtrace_buffer *buffer,
+				       struct auxtrace_buffer **buffer_ptr)
+{
+	int err = -ENOMEM;
+
+	if (filter_cpu(session, buffer->cpu))
+		return 0;
+
+	buffer = memdup(buffer, sizeof(*buffer));
+	if (!buffer)
+		return -ENOMEM;
+
+	if (session->one_mmap) {
+		buffer->data = buffer->data_offset - session->one_mmap_offset +
+			       session->one_mmap_addr;
+	} else if (perf_data__is_pipe(session->data)) {
+		buffer->data = auxtrace_copy_data(buffer->size, session);
+		if (!buffer->data)
+			goto out_free;
+		buffer->data_needs_freeing = true;
+	} else if (BITS_PER_LONG == 32 &&
+		   buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+		err = auxtrace_queues__split_buffer(queues, idx, buffer);
+		if (err)
+			goto out_free;
+	}
+
+	err = auxtrace_queues__queue_buffer(queues, idx, buffer);
+	if (err)
+		goto out_free;
+
+	/* FIXME: Doesn't work for split buffer */
+	if (buffer_ptr)
+		*buffer_ptr = buffer;
+
+	return 0;
+
+out_free:
+	auxtrace_buffer__free(buffer);
+	return err;
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+			       struct perf_session *session,
+			       union perf_event *event, off_t data_offset,
+			       struct auxtrace_buffer **buffer_ptr)
+{
+	struct auxtrace_buffer buffer = {
+		.pid = -1,
+		.tid = event->auxtrace.tid,
+		.cpu = event->auxtrace.cpu,
+		.data_offset = data_offset,
+		.offset = event->auxtrace.offset,
+		.reference = event->auxtrace.reference,
+		.size = event->auxtrace.size,
+	};
+	unsigned int idx = event->auxtrace.idx;
+
+	return auxtrace_queues__add_buffer(queues, session, idx, &buffer,
+					   buffer_ptr);
+}
+
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+					      struct perf_session *session,
+					      off_t file_offset, size_t sz)
+{
+	union perf_event *event;
+	int err;
+	char buf[PERF_SAMPLE_MAX_SIZE];
+
+	err = perf_session__peek_event(session, file_offset, buf,
+				       PERF_SAMPLE_MAX_SIZE, &event, NULL);
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_AUXTRACE) {
+		if (event->header.size < sizeof(struct auxtrace_event) ||
+		    event->header.size != sz) {
+			err = -EINVAL;
+			goto out;
+		}
+		file_offset += event->header.size;
+		err = auxtrace_queues__add_event(queues, session, event,
+						 file_offset, NULL);
+	}
+out:
+	return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		while (!list_empty(&queues->queue_array[i].head)) {
+			struct auxtrace_buffer *buffer;
+
+			buffer = list_entry(queues->queue_array[i].head.next,
+					    struct auxtrace_buffer, list);
+			list_del(&buffer->list);
+			auxtrace_buffer__free(buffer);
+		}
+	}
+
+	zfree(&queues->queue_array);
+	queues->nr_queues = 0;
+}
+
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+			     unsigned int pos, unsigned int queue_nr,
+			     u64 ordinal)
+{
+	unsigned int parent;
+
+	while (pos) {
+		parent = (pos - 1) >> 1;
+		if (heap_array[parent].ordinal <= ordinal)
+			break;
+		heap_array[pos] = heap_array[parent];
+		pos = parent;
+	}
+	heap_array[pos].queue_nr = queue_nr;
+	heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+		       u64 ordinal)
+{
+	struct auxtrace_heap_item *heap_array;
+
+	if (queue_nr >= heap->heap_sz) {
+		unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+		while (heap_sz <= queue_nr)
+			heap_sz <<= 1;
+		heap_array = realloc(heap->heap_array,
+				     heap_sz * sizeof(struct auxtrace_heap_item));
+		if (!heap_array)
+			return -ENOMEM;
+		heap->heap_array = heap_array;
+		heap->heap_sz = heap_sz;
+	}
+
+	auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+	return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+	zfree(&heap->heap_array);
+	heap->heap_cnt = 0;
+	heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+	unsigned int pos, last, heap_cnt = heap->heap_cnt;
+	struct auxtrace_heap_item *heap_array;
+
+	if (!heap_cnt)
+		return;
+
+	heap->heap_cnt -= 1;
+
+	heap_array = heap->heap_array;
+
+	pos = 0;
+	while (1) {
+		unsigned int left, right;
+
+		left = (pos << 1) + 1;
+		if (left >= heap_cnt)
+			break;
+		right = left + 1;
+		if (right >= heap_cnt) {
+			heap_array[pos] = heap_array[left];
+			return;
+		}
+		if (heap_array[left].ordinal < heap_array[right].ordinal) {
+			heap_array[pos] = heap_array[left];
+			pos = left;
+		} else {
+			heap_array[pos] = heap_array[right];
+			pos = right;
+		}
+	}
+
+	last = heap_cnt - 1;
+	auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+			 heap_array[last].ordinal);
+}
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist)
+{
+	if (itr)
+		return itr->info_priv_size(itr, evlist);
+	return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+	pr_err("AUX area tracing is not supported on this architecture\n");
+	return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct auxtrace_info_event *auxtrace_info,
+			       size_t priv_size)
+{
+	if (itr)
+		return itr->info_fill(itr, session, auxtrace_info, priv_size);
+	return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+	if (itr)
+		itr->free(itr);
+}
+
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+	if (itr && itr->snapshot_start)
+		return itr->snapshot_start(itr);
+	return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr)
+{
+	if (itr && itr->snapshot_finish)
+		return itr->snapshot_finish(itr);
+	return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm,
+				   unsigned char *data, u64 *head, u64 *old)
+{
+	if (itr && itr->find_snapshot)
+		return itr->find_snapshot(itr, idx, mm, data, head, old);
+	return 0;
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+			     struct perf_evlist *evlist,
+			     struct record_opts *opts)
+{
+	if (itr)
+		return itr->recording_options(itr, evlist, opts);
+	return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+	if (itr)
+		return itr->reference(itr);
+	return 0;
+}
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+				    struct record_opts *opts, const char *str)
+{
+	if (!str)
+		return 0;
+
+	if (itr)
+		return itr->parse_snapshot_options(itr, opts, str);
+
+	pr_err("No AUX area tracing to snapshot\n");
+	return -EINVAL;
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
+{
+	*err = 0;
+	return NULL;
+}
+
+static int auxtrace_index__alloc(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+
+	auxtrace_index = malloc(sizeof(struct auxtrace_index));
+	if (!auxtrace_index)
+		return -ENOMEM;
+
+	auxtrace_index->nr = 0;
+	INIT_LIST_HEAD(&auxtrace_index->list);
+
+	list_add_tail(&auxtrace_index->list, head);
+
+	return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index, *n;
+
+	list_for_each_entry_safe(auxtrace_index, n, head, list) {
+		list_del(&auxtrace_index->list);
+		free(auxtrace_index);
+	}
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+	int err;
+
+	if (list_empty(head)) {
+		err = auxtrace_index__alloc(head);
+		if (err)
+			return NULL;
+	}
+
+	auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+	if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+		err = auxtrace_index__alloc(head);
+		if (err)
+			return NULL;
+		auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+					    list);
+	}
+
+	return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+				   union perf_event *event, off_t file_offset)
+{
+	struct auxtrace_index *auxtrace_index;
+	size_t nr;
+
+	auxtrace_index = auxtrace_index__last(head);
+	if (!auxtrace_index)
+		return -ENOMEM;
+
+	nr = auxtrace_index->nr;
+	auxtrace_index->entries[nr].file_offset = file_offset;
+	auxtrace_index->entries[nr].sz = event->header.size;
+	auxtrace_index->nr += 1;
+
+	return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+				    struct auxtrace_index *auxtrace_index)
+{
+	struct auxtrace_index_entry ent;
+	size_t i;
+
+	for (i = 0; i < auxtrace_index->nr; i++) {
+		ent.file_offset = auxtrace_index->entries[i].file_offset;
+		ent.sz = auxtrace_index->entries[i].sz;
+		if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+			return -errno;
+	}
+	return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+	u64 total = 0;
+	int err;
+
+	list_for_each_entry(auxtrace_index, head, list)
+		total += auxtrace_index->nr;
+
+	if (writen(fd, &total, sizeof(total)) != sizeof(total))
+		return -errno;
+
+	list_for_each_entry(auxtrace_index, head, list) {
+		err = auxtrace_index__do_write(fd, auxtrace_index);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+					 bool needs_swap)
+{
+	struct auxtrace_index *auxtrace_index;
+	struct auxtrace_index_entry ent;
+	size_t nr;
+
+	if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+		return -1;
+
+	auxtrace_index = auxtrace_index__last(head);
+	if (!auxtrace_index)
+		return -1;
+
+	nr = auxtrace_index->nr;
+	if (needs_swap) {
+		auxtrace_index->entries[nr].file_offset =
+						bswap_64(ent.file_offset);
+		auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+	} else {
+		auxtrace_index->entries[nr].file_offset = ent.file_offset;
+		auxtrace_index->entries[nr].sz = ent.sz;
+	}
+
+	auxtrace_index->nr = nr + 1;
+
+	return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+			    bool needs_swap)
+{
+	struct list_head *head = &session->auxtrace_index;
+	u64 nr;
+
+	if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+		return -1;
+
+	if (needs_swap)
+		nr = bswap_64(nr);
+
+	if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+		return -1;
+
+	while (nr--) {
+		int err;
+
+		err = auxtrace_index__process_entry(fd, head, needs_swap);
+		if (err)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+						struct perf_session *session,
+						struct auxtrace_index_entry *ent)
+{
+	return auxtrace_queues__add_indexed_event(queues, session,
+						  ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+				   struct perf_session *session)
+{
+	struct auxtrace_index *auxtrace_index;
+	struct auxtrace_index_entry *ent;
+	size_t i;
+	int err;
+
+	if (auxtrace__dont_decode(session))
+		return 0;
+
+	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+		for (i = 0; i < auxtrace_index->nr; i++) {
+			ent = &auxtrace_index->entries[i];
+			err = auxtrace_queues__process_index_entry(queues,
+								   session,
+								   ent);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+					      struct auxtrace_buffer *buffer)
+{
+	if (buffer) {
+		if (list_is_last(&buffer->list, &queue->head))
+			return NULL;
+		return list_entry(buffer->list.next, struct auxtrace_buffer,
+				  list);
+	} else {
+		if (list_empty(&queue->head))
+			return NULL;
+		return list_entry(queue->head.next, struct auxtrace_buffer,
+				  list);
+	}
+}
+
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+	size_t adj = buffer->data_offset & (page_size - 1);
+	size_t size = buffer->size + adj;
+	off_t file_offset = buffer->data_offset - adj;
+	void *addr;
+
+	if (buffer->data)
+		return buffer->data;
+
+	addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+	if (addr == MAP_FAILED)
+		return NULL;
+
+	buffer->mmap_addr = addr;
+	buffer->mmap_size = size;
+
+	buffer->data = addr + adj;
+
+	return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+	if (!buffer->data || !buffer->mmap_addr)
+		return;
+	munmap(buffer->mmap_addr, buffer->mmap_size);
+	buffer->mmap_addr = NULL;
+	buffer->mmap_size = 0;
+	buffer->data = NULL;
+	buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+	auxtrace_buffer__put_data(buffer);
+	if (buffer->data_needs_freeing) {
+		buffer->data_needs_freeing = false;
+		zfree(&buffer->data);
+		buffer->use_data = NULL;
+		buffer->size = 0;
+	}
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+	auxtrace_buffer__drop_data(buffer);
+	free(buffer);
+}
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+			  const char *msg)
+{
+	size_t size;
+
+	memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event));
+
+	auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+	auxtrace_error->type = type;
+	auxtrace_error->code = code;
+	auxtrace_error->cpu = cpu;
+	auxtrace_error->pid = pid;
+	auxtrace_error->tid = tid;
+	auxtrace_error->ip = ip;
+	strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+
+	size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+	       strlen(auxtrace_error->msg) + 1;
+	auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+					 struct perf_tool *tool,
+					 struct perf_session *session,
+					 perf_event__handler_t process)
+{
+	union perf_event *ev;
+	size_t priv_size;
+	int err;
+
+	pr_debug2("Synthesizing auxtrace information\n");
+	priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
+	ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+	ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) +
+					priv_size;
+	err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+					 priv_size);
+	if (err)
+		goto out_free;
+
+	err = process(tool, ev, NULL, NULL);
+out_free:
+	free(ev);
+	return err;
+}
+
+int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
+				      union perf_event *event,
+				      struct perf_session *session)
+{
+	enum auxtrace_type type = event->auxtrace_info.type;
+
+	if (dump_trace)
+		fprintf(stdout, " type: %u\n", type);
+
+	switch (type) {
+	case PERF_AUXTRACE_INTEL_PT:
+		return intel_pt_process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_INTEL_BTS:
+		return intel_bts_process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_ARM_SPE:
+		return arm_spe_process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_CS_ETM:
+		return cs_etm__process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_UNKNOWN:
+	default:
+		return -EINVAL;
+	}
+}
+
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session)
+{
+	s64 err;
+
+	if (dump_trace)
+		fprintf(stdout, " size: %#"PRIx64"  offset: %#"PRIx64"  ref: %#"PRIx64"  idx: %u  tid: %d  cpu: %d\n",
+			event->auxtrace.size, event->auxtrace.offset,
+			event->auxtrace.reference, event->auxtrace.idx,
+			event->auxtrace.tid, event->auxtrace.cpu);
+
+	if (auxtrace__dont_decode(session))
+		return event->auxtrace.size;
+
+	if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+		return -EINVAL;
+
+	err = session->auxtrace->process_auxtrace_event(session, event, tool);
+	if (err < 0)
+		return err;
+
+	return event->auxtrace.size;
+}
+
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE		PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD		100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ	16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ		1024
+#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ	64
+#define PERF_ITRACE_MAX_LAST_BRANCH_SZ		1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+{
+	synth_opts->instructions = true;
+	synth_opts->branches = true;
+	synth_opts->transactions = true;
+	synth_opts->ptwrites = true;
+	synth_opts->pwr_events = true;
+	synth_opts->errors = true;
+	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+	synth_opts->initial_skip = 0;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+			    int unset)
+{
+	struct itrace_synth_opts *synth_opts = opt->value;
+	const char *p;
+	char *endptr;
+	bool period_type_set = false;
+	bool period_set = false;
+
+	synth_opts->set = true;
+
+	if (unset) {
+		synth_opts->dont_decode = true;
+		return 0;
+	}
+
+	if (!str) {
+		itrace_synth_opts__set_default(synth_opts);
+		return 0;
+	}
+
+	for (p = str; *p;) {
+		switch (*p++) {
+		case 'i':
+			synth_opts->instructions = true;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				synth_opts->period = strtoull(p, &endptr, 10);
+				period_set = true;
+				p = endptr;
+				while (*p == ' ' || *p == ',')
+					p += 1;
+				switch (*p++) {
+				case 'i':
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_INSTRUCTIONS;
+					period_type_set = true;
+					break;
+				case 't':
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_TICKS;
+					period_type_set = true;
+					break;
+				case 'm':
+					synth_opts->period *= 1000;
+					/* Fall through */
+				case 'u':
+					synth_opts->period *= 1000;
+					/* Fall through */
+				case 'n':
+					if (*p++ != 's')
+						goto out_err;
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_NANOSECS;
+					period_type_set = true;
+					break;
+				case '\0':
+					goto out;
+				default:
+					goto out_err;
+				}
+			}
+			break;
+		case 'b':
+			synth_opts->branches = true;
+			break;
+		case 'x':
+			synth_opts->transactions = true;
+			break;
+		case 'w':
+			synth_opts->ptwrites = true;
+			break;
+		case 'p':
+			synth_opts->pwr_events = true;
+			break;
+		case 'e':
+			synth_opts->errors = true;
+			break;
+		case 'd':
+			synth_opts->log = true;
+			break;
+		case 'c':
+			synth_opts->branches = true;
+			synth_opts->calls = true;
+			break;
+		case 'r':
+			synth_opts->branches = true;
+			synth_opts->returns = true;
+			break;
+		case 'g':
+			synth_opts->callchain = true;
+			synth_opts->callchain_sz =
+					PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				unsigned int val;
+
+				val = strtoul(p, &endptr, 10);
+				p = endptr;
+				if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+					goto out_err;
+				synth_opts->callchain_sz = val;
+			}
+			break;
+		case 'l':
+			synth_opts->last_branch = true;
+			synth_opts->last_branch_sz =
+					PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				unsigned int val;
+
+				val = strtoul(p, &endptr, 10);
+				p = endptr;
+				if (!val ||
+				    val > PERF_ITRACE_MAX_LAST_BRANCH_SZ)
+					goto out_err;
+				synth_opts->last_branch_sz = val;
+			}
+			break;
+		case 's':
+			synth_opts->initial_skip = strtoul(p, &endptr, 10);
+			if (p == endptr)
+				goto out_err;
+			p = endptr;
+			break;
+		case ' ':
+		case ',':
+			break;
+		default:
+			goto out_err;
+		}
+	}
+out:
+	if (synth_opts->instructions) {
+		if (!period_type_set)
+			synth_opts->period_type =
+					PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		if (!period_set)
+			synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
+
+	return 0;
+
+out_err:
+	pr_err("Bad Instruction Tracing options '%s'\n", str);
+	return -EINVAL;
+}
+
+static const char * const auxtrace_error_type_name[] = {
+	[PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+	const char *error_type_name = NULL;
+
+	if (type < PERF_AUXTRACE_ERROR_MAX)
+		error_type_name = auxtrace_error_type_name[type];
+	if (!error_type_name)
+		error_type_name = "unknown AUX";
+	return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+	struct auxtrace_error_event *e = &event->auxtrace_error;
+	int ret;
+
+	ret = fprintf(fp, " %s error type %u",
+		      auxtrace_error_name(e->type), e->type);
+	ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
+		       e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
+	return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+				      union perf_event *event)
+{
+	struct auxtrace_error_event *e = &event->auxtrace_error;
+
+	if (e->type < PERF_AUXTRACE_ERROR_MAX)
+		session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+	int i;
+
+	for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+		if (!stats->nr_auxtrace_errors[i])
+			continue;
+		ui__warning("%u %s errors\n",
+			    stats->nr_auxtrace_errors[i],
+			    auxtrace_error_name(i));
+	}
+}
+
+int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event,
+				       struct perf_session *session)
+{
+	if (auxtrace__dont_decode(session))
+		return 0;
+
+	perf_event__fprintf_auxtrace_error(event, stdout);
+	return 0;
+}
+
+static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 bool snapshot, size_t snapshot_size)
+{
+	u64 head, old = mm->prev, offset, ref;
+	unsigned char *data = mm->base;
+	size_t size, head_off, old_off, len1, len2, padding;
+	union perf_event ev;
+	void *data1, *data2;
+
+	if (snapshot) {
+		head = auxtrace_mmap__read_snapshot_head(mm);
+		if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
+						   &head, &old))
+			return -1;
+	} else {
+		head = auxtrace_mmap__read_head(mm);
+	}
+
+	if (old == head)
+		return 0;
+
+	pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+		  mm->idx, old, head, head - old);
+
+	if (mm->mask) {
+		head_off = head & mm->mask;
+		old_off = old & mm->mask;
+	} else {
+		head_off = head % mm->len;
+		old_off = old % mm->len;
+	}
+
+	if (head_off > old_off)
+		size = head_off - old_off;
+	else
+		size = mm->len - (old_off - head_off);
+
+	if (snapshot && size > snapshot_size)
+		size = snapshot_size;
+
+	ref = auxtrace_record__reference(itr);
+
+	if (head > old || size <= head || mm->mask) {
+		offset = head - size;
+	} else {
+		/*
+		 * When the buffer size is not a power of 2, 'head' wraps at the
+		 * highest multiple of the buffer size, so we have to subtract
+		 * the remainder here.
+		 */
+		u64 rem = (0ULL - mm->len) % mm->len;
+
+		offset = head - size - rem;
+	}
+
+	if (size > head_off) {
+		len1 = size - head_off;
+		data1 = &data[mm->len - len1];
+		len2 = head_off;
+		data2 = &data[0];
+	} else {
+		len1 = size;
+		data1 = &data[head_off - len1];
+		len2 = 0;
+		data2 = NULL;
+	}
+
+	if (itr->alignment) {
+		unsigned int unwanted = len1 % itr->alignment;
+
+		len1 -= unwanted;
+		size -= unwanted;
+	}
+
+	/* padding must be written by fn() e.g. record__process_auxtrace() */
+	padding = size & 7;
+	if (padding)
+		padding = 8 - padding;
+
+	memset(&ev, 0, sizeof(ev));
+	ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+	ev.auxtrace.header.size = sizeof(ev.auxtrace);
+	ev.auxtrace.size = size + padding;
+	ev.auxtrace.offset = offset;
+	ev.auxtrace.reference = ref;
+	ev.auxtrace.idx = mm->idx;
+	ev.auxtrace.tid = mm->tid;
+	ev.auxtrace.cpu = mm->cpu;
+
+	if (fn(tool, &ev, data1, len1, data2, len2))
+		return -1;
+
+	mm->prev = head;
+
+	if (!snapshot) {
+		auxtrace_mmap__write_tail(mm, head);
+		if (itr->read_finish) {
+			int err;
+
+			err = itr->read_finish(itr, mm->idx);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 1;
+}
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn)
+{
+	return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 size_t snapshot_size)
+{
+	return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+}
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ *         is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+	struct hlist_head *hashtable;
+	size_t sz;
+	size_t entry_size;
+	size_t limit;
+	size_t cnt;
+	unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+					   unsigned int limit_percent)
+{
+	struct auxtrace_cache *c;
+	struct hlist_head *ht;
+	size_t sz, i;
+
+	c = zalloc(sizeof(struct auxtrace_cache));
+	if (!c)
+		return NULL;
+
+	sz = 1UL << bits;
+
+	ht = calloc(sz, sizeof(struct hlist_head));
+	if (!ht)
+		goto out_free;
+
+	for (i = 0; i < sz; i++)
+		INIT_HLIST_HEAD(&ht[i]);
+
+	c->hashtable = ht;
+	c->sz = sz;
+	c->entry_size = entry_size;
+	c->limit = (c->sz * limit_percent) / 100;
+	c->bits = bits;
+
+	return c;
+
+out_free:
+	free(c);
+	return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_node *tmp;
+	size_t i;
+
+	if (!c)
+		return;
+
+	for (i = 0; i < c->sz; i++) {
+		hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+			hlist_del(&entry->hash);
+			auxtrace_cache__free_entry(c, entry);
+		}
+	}
+
+	c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+	if (!c)
+		return;
+
+	auxtrace_cache__drop(c);
+	free(c->hashtable);
+	free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+	return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+				void *entry)
+{
+	free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+			struct auxtrace_cache_entry *entry)
+{
+	if (c->limit && ++c->cnt > c->limit)
+		auxtrace_cache__drop(c);
+
+	entry->key = key;
+	hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+	return 0;
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_head *hlist;
+
+	if (!c)
+		return NULL;
+
+	hlist = &c->hashtable[hash_32(key, c->bits)];
+	hlist_for_each_entry(entry, hlist, hash) {
+		if (entry->key == key)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+	free(filt->str);
+	filt->action   = NULL;
+	filt->sym_from = NULL;
+	filt->sym_to   = NULL;
+	filt->filename = NULL;
+	filt->str      = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+	struct addr_filter *filt = zalloc(sizeof(*filt));
+
+	if (filt)
+		INIT_LIST_HEAD(&filt->list);
+
+	return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+	if (filt)
+		addr_filter__free_str(filt);
+	free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_add_tail(&filt->list, &filts->head);
+	filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_del_init(&filt->list);
+	filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+	INIT_LIST_HEAD(&filts->head);
+	filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+	struct addr_filter *filt, *n;
+
+	list_for_each_entry_safe(filt, n, &filts->head, list) {
+		addr_filters__del(filts, filt);
+		addr_filter__free(filt);
+	}
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+			    const char *str_delim)
+{
+	*inp += strspn(*inp, " ");
+
+	if (isdigit(**inp)) {
+		char *endptr;
+
+		if (!num)
+			return -EINVAL;
+		errno = 0;
+		*num = strtoull(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp)
+			return -EINVAL;
+		*inp = endptr;
+	} else {
+		size_t n;
+
+		if (!str)
+			return -EINVAL;
+		*inp += strspn(*inp, " ");
+		*str = *inp;
+		n = strcspn(*inp, str_delim);
+		if (!n)
+			return -EINVAL;
+		*inp += n;
+		if (**inp) {
+			**inp = '\0';
+			*inp += 1;
+		}
+	}
+	return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+	if (!strcmp(filt->action, "filter")) {
+		filt->start = true;
+		filt->range = true;
+	} else if (!strcmp(filt->action, "start")) {
+		filt->start = true;
+	} else if (!strcmp(filt->action, "stop")) {
+		filt->start = false;
+	} else if (!strcmp(filt->action, "tracestop")) {
+		filt->start = false;
+		filt->range = true;
+		filt->action += 5; /* Change 'tracestop' to 'stop' */
+	} else {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+	*idx = -1;
+
+	*inp += strspn(*inp, " ");
+
+	if (**inp != '#')
+		return 0;
+
+	*inp += 1;
+
+	if (**inp == 'g' || **inp == 'G') {
+		*inp += 1;
+		*idx = 0;
+	} else {
+		unsigned long num;
+		char *endptr;
+
+		errno = 0;
+		num = strtoul(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp || num > INT_MAX)
+			return -EINVAL;
+		*inp = endptr;
+		*idx = num;
+	}
+
+	return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+	int err = parse_num_or_str(inp, num, str, " ");
+
+	if (!err && *str)
+		err = parse_sym_idx(inp, idx);
+
+	return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+	char *fstr;
+	int err;
+
+	filt->str = fstr = strdup(*filter_inp);
+	if (!fstr)
+		return -ENOMEM;
+
+	err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+	if (err)
+		goto out_err;
+
+	err = parse_action(filt);
+	if (err)
+		goto out_err;
+
+	err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+			      &filt->sym_from_idx);
+	if (err)
+		goto out_err;
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '/') {
+		fstr += 1;
+		err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+				      &filt->sym_to_idx);
+		if (err)
+			goto out_err;
+		filt->range = true;
+	}
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '@') {
+		fstr += 1;
+		err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+		if (err)
+			goto out_err;
+	}
+
+	fstr += strspn(fstr, " ,");
+
+	*filter_inp += fstr - filt->str;
+
+	return 0;
+
+out_err:
+	addr_filter__free_str(filt);
+
+	return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter)
+{
+	struct addr_filter *filt;
+	const char *fstr = filter;
+	int err;
+
+	while (*fstr) {
+		filt = addr_filter__new();
+		err = parse_one_filter(filt, &fstr);
+		if (err) {
+			addr_filter__free(filt);
+			addr_filters__exit(filts);
+			return err;
+		}
+		addr_filters__add(filts, filt);
+	}
+
+	return 0;
+}
+
+struct sym_args {
+	const char	*name;
+	u64		start;
+	u64		size;
+	int		idx;
+	int		cnt;
+	bool		started;
+	bool		global;
+	bool		selected;
+	bool		duplicate;
+	bool		near;
+};
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+	/* A function with the same name, and global or the n'th found or any */
+	return symbol_type__is_a(type, MAP__FUNCTION) &&
+	       !strcmp(name, args->name) &&
+	       ((args->global && isupper(type)) ||
+		(args->selected && ++(args->cnt) == args->idx) ||
+		(!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (args->started) {
+		if (!args->size)
+			args->size = start - args->start;
+		if (args->selected) {
+			if (args->size)
+				return 1;
+		} else if (kern_sym_match(args, name, type)) {
+			args->duplicate = true;
+			return 1;
+		}
+	} else if (kern_sym_match(args, name, type)) {
+		args->started = true;
+		args->start = start;
+	}
+
+	return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (kern_sym_match(args, name, type)) {
+		pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+		       ++args->cnt, start, type, name);
+		args->near = true;
+	} else if (args->near) {
+		args->near = false;
+		pr_err("\t\twhich is near\t\t%s\n", name);
+	}
+
+	return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+	if (idx > 0) {
+		pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+		       idx, sym_name);
+	} else if (!idx) {
+		pr_err("Global symbol '%s' not found.\n", sym_name);
+	} else {
+		pr_err("Symbol '%s' not found.\n", sym_name);
+	}
+	pr_err("Note that symbols must be functions.\n");
+
+	return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+	struct sym_args args = {
+		.name = sym_name,
+		.idx = idx,
+		.global = !idx,
+		.selected = idx > 0,
+	};
+	int err;
+
+	*start = 0;
+	*size = 0;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+	if (err < 0) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	if (args.duplicate) {
+		pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+		args.cnt = 0;
+		kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+		pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+		       sym_name);
+		pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+		return -EINVAL;
+	}
+
+	if (!args.started) {
+		pr_err("Kernel symbol lookup: ");
+		return sym_not_found_error(sym_name, idx);
+	}
+
+	*start = args.start;
+	*size = args.size;
+
+	return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+			       char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (!symbol_type__is_a(type, MAP__FUNCTION))
+		return 0;
+
+	if (!args->started) {
+		args->started = true;
+		args->start = start;
+	}
+	/* Don't know exactly where the kernel ends, so we add a page */
+	args->size = round_up(start, page_size) + page_size - args->start;
+
+	return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+	struct sym_args args = { .started = false };
+	int err;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+	if (err < 0 || !args.started) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	filt->addr = args.start;
+	filt->size = args.size;
+
+	return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+	if (start + size >= filt->addr)
+		return 0;
+
+	if (filt->sym_from) {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->sym_from, filt->addr);
+	} else {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->addr);
+	}
+
+	return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+	bool no_size = false;
+	u64 start, size;
+	int err;
+
+	if (symbol_conf.kptr_restrict) {
+		pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+		return addr_filter__entire_kernel(filt);
+
+	if (filt->sym_from) {
+		err = find_kern_sym(filt->sym_from, &start, &size,
+				    filt->sym_from_idx);
+		if (err)
+			return err;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to) {
+			filt->size = size;
+			no_size = !size;
+		}
+	}
+
+	if (filt->sym_to) {
+		err = find_kern_sym(filt->sym_to, &start, &size,
+				    filt->sym_to_idx);
+		if (err)
+			return err;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+		filt->size = start + size - filt->addr;
+		no_size = !size;
+	}
+
+	/* The very last symbol in kallsyms does not imply a particular size */
+	if (no_size) {
+		pr_err("Cannot determine size of symbol '%s'\n",
+		       filt->sym_to ? filt->sym_to : filt->sym_from);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+	struct map *map;
+	struct dso *dso;
+
+	map = dso__new_map(name);
+	if (!map)
+		return NULL;
+
+	map__load(map);
+
+	dso = dso__get(map->dso);
+
+	map__put(map);
+
+	return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+			  int idx)
+{
+	/* Same name, and global or the n'th found or any */
+	return !arch__compare_symbol_names(name, sym->name) &&
+	       ((!idx && sym->binding == STB_GLOBAL) ||
+		(idx > 0 && ++*cnt == idx) ||
+		idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+	struct symbol *sym;
+	bool near = false;
+	int cnt = 0;
+
+	pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	while (sym) {
+		if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+			pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+			       ++cnt, sym->start,
+			       sym->binding == STB_GLOBAL ? 'g' :
+			       sym->binding == STB_LOCAL  ? 'l' : 'w',
+			       sym->name);
+			near = true;
+		} else if (near) {
+			near = false;
+			pr_err("\t\twhich is near\t\t%s\n", sym->name);
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+	       sym_name);
+	pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+			u64 *size, int idx)
+{
+	struct symbol *sym;
+	int cnt = 0;
+
+	*start = 0;
+	*size = 0;
+
+	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	while (sym) {
+		if (*start) {
+			if (!*size)
+				*size = sym->start - *start;
+			if (idx > 0) {
+				if (*size)
+					return 1;
+			} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+				print_duplicate_syms(dso, sym_name);
+				return -EINVAL;
+			}
+		} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+			*start = sym->start;
+			*size = sym->end - sym->start;
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	if (!*start)
+		return sym_not_found_error(sym_name, idx);
+
+	return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+	struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
+	struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+
+	if (!first_sym || !last_sym) {
+		pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+		       filt->filename);
+		return -EINVAL;
+	}
+
+	filt->addr = first_sym->start;
+	filt->size = last_sym->end - first_sym->start;
+
+	return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+	u64 start, size;
+	struct dso *dso;
+	int err = 0;
+
+	if (!filt->sym_from && !filt->sym_to)
+		return 0;
+
+	if (!filt->filename)
+		return addr_filter__resolve_kernel_syms(filt);
+
+	dso = load_dso(filt->filename);
+	if (!dso) {
+		pr_err("Failed to load symbols from: %s\n", filt->filename);
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+		err = addr_filter__entire_dso(filt, dso);
+		goto put_dso;
+	}
+
+	if (filt->sym_from) {
+		err = find_dso_sym(dso, filt->sym_from, &start, &size,
+				   filt->sym_from_idx);
+		if (err)
+			goto put_dso;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to)
+			filt->size = size;
+	}
+
+	if (filt->sym_to) {
+		err = find_dso_sym(dso, filt->sym_to, &start, &size,
+				   filt->sym_to_idx);
+		if (err)
+			goto put_dso;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+
+		filt->size = start + size - filt->addr;
+	}
+
+put_dso:
+	dso__put(dso);
+
+	return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+	char filename_buf[PATH_MAX];
+	const char *at = "";
+	const char *fn = "";
+	char *filter;
+	int err;
+
+	if (filt->filename) {
+		at = "@";
+		fn = realpath(filt->filename, filename_buf);
+		if (!fn)
+			return NULL;
+	}
+
+	if (filt->range) {
+		err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, filt->size, at, fn);
+	} else {
+		err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, at, fn);
+	}
+
+	return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct perf_evsel *evsel, const char *filter,
+			     int max_nr)
+{
+	struct addr_filters filts;
+	struct addr_filter *filt;
+	int err;
+
+	addr_filters__init(&filts);
+
+	err = addr_filters__parse_bare_filter(&filts, filter);
+	if (err)
+		goto out_exit;
+
+	if (filts.cnt > max_nr) {
+		pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+		       filts.cnt, max_nr);
+		err = -EINVAL;
+		goto out_exit;
+	}
+
+	list_for_each_entry(filt, &filts.head, list) {
+		char *new_filter;
+
+		err = addr_filter__resolve_syms(filt);
+		if (err)
+			goto out_exit;
+
+		new_filter = addr_filter__to_str(filt);
+		if (!new_filter) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+
+		if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+	}
+
+out_exit:
+	addr_filters__exit(&filts);
+
+	if (err) {
+		pr_err("Failed to parse address filter: '%s'\n", filter);
+		pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+		pr_err("Where multiple filters are separated by space or comma.\n");
+	}
+
+	return err;
+}
+
+static struct perf_pmu *perf_evsel__find_pmu(struct perf_evsel *evsel)
+{
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (pmu->type == evsel->attr.type)
+			break;
+	}
+
+	return pmu;
+}
+
+static int perf_evsel__nr_addr_filter(struct perf_evsel *evsel)
+{
+	struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+	int nr_addr_filters = 0;
+
+	if (!pmu)
+		return 0;
+
+	perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+	return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	char *filter;
+	int err, max_nr;
+
+	evlist__for_each_entry(evlist, evsel) {
+		filter = evsel->filter;
+		max_nr = perf_evsel__nr_addr_filter(evsel);
+		if (!filter || !max_nr)
+			continue;
+		evsel->filter = NULL;
+		err = parse_addr_filter(evsel, filter, max_nr);
+		free(filter);
+		if (err)
+			return err;
+		pr_debug("Address filter: %s\n", evsel->filter);
+	}
+
+	return 0;
+}
diff --git a/util/auxtrace.h b/util/auxtrace.h
new file mode 100644
index 0000000..e731f55
--- /dev/null
+++ b/util/auxtrace.h
@@ -0,0 +1,721 @@
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+
+union perf_event;
+struct perf_session;
+struct perf_evlist;
+struct perf_tool;
+struct option;
+struct record_opts;
+struct auxtrace_info_event;
+struct events_stats;
+
+enum auxtrace_type {
+	PERF_AUXTRACE_UNKNOWN,
+	PERF_AUXTRACE_INTEL_PT,
+	PERF_AUXTRACE_INTEL_BTS,
+	PERF_AUXTRACE_CS_ETM,
+	PERF_AUXTRACE_ARM_SPE,
+};
+
+enum itrace_period_type {
+	PERF_ITRACE_PERIOD_INSTRUCTIONS,
+	PERF_ITRACE_PERIOD_TICKS,
+	PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ *          because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @branches: whether to synthesize 'branches' events
+ * @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @thread_stack: feed branches to the thread_stack
+ * @last_branch: add branch context to 'instruction' events
+ * @callchain_sz: maximum callchain size
+ * @last_branch_sz: branch context size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
+ */
+struct itrace_synth_opts {
+	bool			set;
+	bool			inject;
+	bool			instructions;
+	bool			branches;
+	bool			transactions;
+	bool			ptwrites;
+	bool			pwr_events;
+	bool			errors;
+	bool			dont_decode;
+	bool			log;
+	bool			calls;
+	bool			returns;
+	bool			callchain;
+	bool			thread_stack;
+	bool			last_branch;
+	unsigned int		callchain_sz;
+	unsigned int		last_branch_sz;
+	unsigned long long	period;
+	enum itrace_period_type	period_type;
+	unsigned long		initial_skip;
+	unsigned long		*cpu_bitmap;
+};
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ *                               perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+	u64			file_offset;
+	u64			sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ *                         file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+	struct list_head	list;
+	size_t			nr;
+	struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+	int (*process_event)(struct perf_session *session,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_tool *tool);
+	int (*process_auxtrace_event)(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_tool *tool);
+	int (*flush_events)(struct perf_session *session,
+			    struct perf_tool *tool);
+	void (*free_events)(struct perf_session *session);
+	void (*free)(struct perf_session *session);
+};
+
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ *                      needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ *               to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ *          perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ *             recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+	struct list_head	list;
+	size_t			size;
+	pid_t			pid;
+	pid_t			tid;
+	int			cpu;
+	void			*data;
+	off_t			data_offset;
+	void			*mmap_addr;
+	size_t			mmap_size;
+	bool			data_needs_freeing;
+	bool			consecutive;
+	u64			offset;
+	u64			reference;
+	u64			buffer_nr;
+	size_t			use_size;
+	void			*use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+	struct list_head	head;
+	pid_t			tid;
+	int			cpu;
+	bool			set;
+	void			*priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+	struct auxtrace_queue	*queue_array;
+	unsigned int		nr_queues;
+	bool			new_data;
+	bool			populated;
+	u64			next_buffer_nr;
+};
+
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ *           to be a timestamp
+ */
+struct auxtrace_heap_item {
+	unsigned int		queue_nr;
+	u64			ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+	struct auxtrace_heap_item	*heap_array;
+	unsigned int		heap_cnt;
+	unsigned int		heap_sz;
+};
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+	void		*base;
+	void		*userpg;
+	size_t		mask;
+	size_t		len;
+	u64		prev;
+	int		idx;
+	pid_t		tid;
+	int		cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+	size_t		mask;
+	off_t		offset;
+	size_t		len;
+	int		prot;
+	int		idx;
+	pid_t		tid;
+	int		cpu;
+};
+
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ * @alignment: alignment (if any) for AUX area data
+ */
+struct auxtrace_record {
+	int (*recording_options)(struct auxtrace_record *itr,
+				 struct perf_evlist *evlist,
+				 struct record_opts *opts);
+	size_t (*info_priv_size)(struct auxtrace_record *itr,
+				 struct perf_evlist *evlist);
+	int (*info_fill)(struct auxtrace_record *itr,
+			 struct perf_session *session,
+			 struct auxtrace_info_event *auxtrace_info,
+			 size_t priv_size);
+	void (*free)(struct auxtrace_record *itr);
+	int (*snapshot_start)(struct auxtrace_record *itr);
+	int (*snapshot_finish)(struct auxtrace_record *itr);
+	int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+			     struct auxtrace_mmap *mm, unsigned char *data,
+			     u64 *head, u64 *old);
+	int (*parse_snapshot_options)(struct auxtrace_record *itr,
+				      struct record_opts *opts,
+				      const char *str);
+	u64 (*reference)(struct auxtrace_record *itr);
+	int (*read_finish)(struct auxtrace_record *itr, int idx);
+	unsigned int alignment;
+};
+
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ *          to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ *                and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+	struct list_head	list;
+	bool			range;
+	bool			start;
+	const char		*action;
+	const char		*sym_from;
+	const char		*sym_to;
+	int			sym_from_idx;
+	int			sym_to_idx;
+	u64			addr;
+	u64			size;
+	const char		*filename;
+	char			*str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+	struct list_head	head;
+	int			cnt;
+};
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+/*
+ * In snapshot mode the mmapped page is read-only which makes using
+ * __sync_val_compare_and_swap() problematic.  However, snapshot mode expects
+ * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
+ * the event) so there is not a race anyway.
+ */
+static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 head = READ_ONCE(pc->aux_head);
+
+	/* Ensure all reads are done after we read the head */
+	rmb();
+	return head;
+}
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	u64 head = READ_ONCE(pc->aux_head);
+#else
+	u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+#endif
+
+	/* Ensure all reads are done after we read the head */
+	rmb();
+	return head;
+}
+
+static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	u64 old_tail;
+#endif
+
+	/* Ensure all reads are done before we write the tail out */
+	mb();
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	pc->aux_tail = tail;
+#else
+	do {
+		old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
+	} while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
+#endif
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct perf_evlist *evlist, int idx,
+				   bool per_cpu);
+
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+				  union perf_event *event, void *data1,
+				  size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn);
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 size_t snapshot_size);
+
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+			       struct perf_session *session,
+			       union perf_event *event, off_t data_offset,
+			       struct auxtrace_buffer **buffer_ptr);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+				   struct perf_session *session);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+					      struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+		       u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
+struct auxtrace_cache_entry {
+	struct hlist_node hash;
+	u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+					   unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+			struct auxtrace_cache_entry *entry);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+					      int *err);
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+				    struct record_opts *opts,
+				    const char *str);
+int auxtrace_record__options(struct auxtrace_record *itr,
+			     struct perf_evlist *evlist,
+			     struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct auxtrace_info_event *auxtrace_info,
+			       size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm,
+				   unsigned char *data, u64 *head, u64 *old);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+				   off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+			    bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+			  const char *msg);
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+					 struct perf_tool *tool,
+					 struct perf_session *session,
+					 perf_event__handler_t process);
+int perf_event__process_auxtrace_info(struct perf_tool *tool,
+				      union perf_event *event,
+				      struct perf_session *session);
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session);
+int perf_event__process_auxtrace_error(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_session *session);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+			    int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+				      union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter);
+int auxtrace_parse_filters(struct perf_evlist *evlist);
+
+static inline int auxtrace__process_event(struct perf_session *session,
+					  union perf_event *event,
+					  struct perf_sample *sample,
+					  struct perf_tool *tool)
+{
+	if (!session->auxtrace)
+		return 0;
+
+	return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+static inline int auxtrace__flush_events(struct perf_session *session,
+					 struct perf_tool *tool)
+{
+	if (!session->auxtrace)
+		return 0;
+
+	return session->auxtrace->flush_events(session, tool);
+}
+
+static inline void auxtrace__free_events(struct perf_session *session)
+{
+	if (!session->auxtrace)
+		return;
+
+	return session->auxtrace->free_events(session);
+}
+
+static inline void auxtrace__free(struct perf_session *session)
+{
+	if (!session->auxtrace)
+		return;
+
+	return session->auxtrace->free(session);
+}
+
+#else
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
+		      int *err)
+{
+	*err = 0;
+	return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+				     struct perf_tool *tool __maybe_unused,
+				     struct perf_session *session __maybe_unused,
+				     perf_event__handler_t process __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+			     struct perf_evlist *evlist __maybe_unused,
+			     struct record_opts *opts __maybe_unused)
+{
+	return 0;
+}
+
+#define perf_event__process_auxtrace_info		0
+#define perf_event__process_auxtrace			0
+#define perf_event__process_auxtrace_error		0
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+				      __maybe_unused,
+				      union perf_event *event
+				      __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+				       __maybe_unused)
+{
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+				    struct record_opts *opts __maybe_unused,
+				    const char *str)
+{
+	if (!str)
+		return 0;
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_sample *sample __maybe_unused,
+			    struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+			   struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+			  struct list_head *head __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+			    u64 size __maybe_unused,
+			    struct perf_session *session __maybe_unused,
+			    bool needs_swap __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_parse_filters(struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct perf_evlist *evlist, int idx,
+				   bool per_cpu);
+
+#endif
+
+#endif
diff --git a/util/block-range.c b/util/block-range.c
new file mode 100644
index 0000000..f1451c9
--- /dev/null
+++ b/util/block-range.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+	struct rb_root root;
+	u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+	/*
+	 * XXX still paranoid for now; see if we can make this depend on
+	 * DEBUG=1 builds.
+	 */
+#if 1
+	struct rb_node *rb;
+	u64 old = 0; /* NULL isn't executable */
+
+	for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+		struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+		assert(old < entry->start);
+		assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+		old = entry->end;
+	}
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *parent = NULL;
+	struct block_range *entry;
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (addr < entry->start)
+			p = &parent->rb_left;
+		else if (addr > entry->end)
+			p = &parent->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_left;
+	while (*p) {
+		node = *p;
+		p = &node->rb_right;
+	}
+	rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_right;
+	while (*p) {
+		node = *p;
+		p = &node->rb_left;
+	}
+	rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end:   branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *n, *parent = NULL;
+	struct block_range *next, *entry = NULL;
+	struct block_range_iter iter = { NULL, NULL };
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (start < entry->start)
+			p = &parent->rb_left;
+		else if (start > entry->end)
+			p = &parent->rb_right;
+		else
+			break;
+	}
+
+	/*
+	 * Didn't find anything.. there's a hole at @start, however @end might
+	 * be inside/behind the next range.
+	 */
+	if (!*p) {
+		if (!entry) /* tree empty */
+			goto do_whole;
+
+		/*
+		 * If the last node is before, advance one to find the next.
+		 */
+		n = parent;
+		if (entry->end < start) {
+			n = rb_next(n);
+			if (!n)
+				goto do_whole;
+		}
+		next = rb_entry(n, struct block_range, node);
+
+		if (next->start <= end) { /* add head: [start...][n->start...] */
+			struct block_range *head = malloc(sizeof(struct block_range));
+			if (!head)
+				return iter;
+
+			*head = (struct block_range){
+				.start		= start,
+				.end		= next->start - 1,
+				.is_target	= 1,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&head->node, &next->node);
+			rb_insert_color(&head->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.start = head;
+			goto do_tail;
+		}
+
+do_whole:
+		/*
+		 * The whole [start..end] range is non-overlapping.
+		 */
+		entry = malloc(sizeof(struct block_range));
+		if (!entry)
+			return iter;
+
+		*entry = (struct block_range){
+			.start		= start,
+			.end		= end,
+			.is_target	= 1,
+			.is_branch	= 1,
+		};
+
+		rb_link_node(&entry->node, parent, p);
+		rb_insert_color(&entry->node, &block_ranges.root);
+		block_range__debug();
+
+		iter.start = entry;
+		iter.end   = entry;
+		goto done;
+	}
+
+	/*
+	 * We found a range that overlapped with ours, split if needed.
+	 */
+	if (entry->start < start) { /* split: [e->start...][start...] */
+		struct block_range *head = malloc(sizeof(struct block_range));
+		if (!head)
+			return iter;
+
+		*head = (struct block_range){
+			.start		= entry->start,
+			.end		= start - 1,
+			.is_target	= entry->is_target,
+			.is_branch	= 0,
+
+			.coverage	= entry->coverage,
+			.entry		= entry->entry,
+		};
+
+		entry->start		= start;
+		entry->is_target	= 1;
+		entry->entry		= 0;
+
+		rb_link_left_of_node(&head->node, &entry->node);
+		rb_insert_color(&head->node, &block_ranges.root);
+		block_range__debug();
+
+	} else if (entry->start == start)
+		entry->is_target = 1;
+
+	iter.start = entry;
+
+do_tail:
+	/*
+	 * At this point we've got: @iter.start = [@start...] but @end can still be
+	 * inside or beyond it.
+	 */
+	entry = iter.start;
+	for (;;) {
+		/*
+		 * If @end is inside @entry, split.
+		 */
+		if (end < entry->end) { /* split: [...end][...e->end] */
+			struct block_range *tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= end + 1,
+				.end		= entry->end,
+				.is_target	= 0,
+				.is_branch	= entry->is_branch,
+
+				.coverage	= entry->coverage,
+				.taken		= entry->taken,
+				.pred		= entry->pred,
+			};
+
+			entry->end		= end;
+			entry->is_branch	= 1;
+			entry->taken		= 0;
+			entry->pred		= 0;
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = entry;
+			goto done;
+		}
+
+		/*
+		 * If @end matches @entry, done
+		 */
+		if (end == entry->end) {
+			entry->is_branch = 1;
+			iter.end = entry;
+			goto done;
+		}
+
+		next = block_range__next(entry);
+		if (!next)
+			goto add_tail;
+
+		/*
+		 * If @end is in beyond @entry but not inside @next, add tail.
+		 */
+		if (end < next->start) { /* add tail: [...e->end][...end] */
+			struct block_range *tail;
+add_tail:
+			tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= end,
+				.is_target	= 0,
+				.is_branch	= 1,
+			};
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = tail;
+			goto done;
+		}
+
+		/*
+		 * If there is a hole between @entry and @next, fill it.
+		 */
+		if (entry->end + 1 != next->start) {
+			struct block_range *hole = malloc(sizeof(struct block_range));
+			if (!hole)
+				return iter;
+
+			*hole = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= next->start - 1,
+				.is_target	= 0,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&hole->node, &next->node);
+			rb_insert_color(&hole->node, &block_ranges.root);
+			block_range__debug();
+		}
+
+		entry = next;
+	}
+
+done:
+	assert(iter.start->start == start && iter.start->is_target);
+	assert(iter.end->end == end && iter.end->is_branch);
+
+	block_ranges.blocks++;
+
+	return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ *    br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+	struct symbol *sym;
+
+	if (!br) {
+		if (block_ranges.blocks)
+			return 0;
+
+		return -1;
+	}
+
+	sym = br->sym;
+	if (!sym)
+		return -1;
+
+	return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/util/block-range.h b/util/block-range.h
new file mode 100644
index 0000000..a5ba719
--- /dev/null
+++ b/util/block-range.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include "symbol.h"
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node:	treenode
+ * @start:	inclusive start of range
+ * @end:	inclusive end of range
+ * @is_target:	@start is a jump target
+ * @is_branch:	@end is a branch instruction
+ * @coverage:	number of blocks that cover this range
+ * @taken:	number of times the branch is taken (requires @is_branch)
+ * @pred:	number of times the taken branch was predicted
+ */
+struct block_range {
+	struct rb_node node;
+
+	struct symbol *sym;
+
+	u64 start;
+	u64 end;
+
+	int is_target, is_branch;
+
+	u64 coverage;
+	u64 entry;
+	u64 taken;
+	u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+	struct rb_node *n = rb_next(&br->node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+	struct block_range *start;
+	struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+	return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+	if (iter->start == iter->end)
+		return false;
+
+	iter->start = block_range__next(iter->start);
+	return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+	if (!iter->start || !iter->end)
+		return false;
+	return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
diff --git a/util/bpf-loader.c b/util/bpf-loader.c
new file mode 100644
index 0000000..af7ad81
--- /dev/null
+++ b/util/bpf-loader.c
@@ -0,0 +1,1787 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf-loader.c
+ *
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <errno.h>
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-event.h"
+#include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
+#include "strfilter.h"
+#include "llvm-utils.h"
+#include "c++/clang-c.h"
+
+#define DEFINE_PRINT_FN(name, level) \
+static int libbpf_##name(const char *fmt, ...)	\
+{						\
+	va_list args;				\
+	int ret;				\
+						\
+	va_start(args, fmt);			\
+	ret = veprintf(level, verbose, pr_fmt(fmt), args);\
+	va_end(args);				\
+	return ret;				\
+}
+
+DEFINE_PRINT_FN(warning, 1)
+DEFINE_PRINT_FN(info, 1)
+DEFINE_PRINT_FN(debug, 1)
+
+struct bpf_prog_priv {
+	bool is_tp;
+	char *sys_name;
+	char *evt_name;
+	struct perf_probe_event pev;
+	bool need_prologue;
+	struct bpf_insn *insns_buf;
+	int nr_types;
+	int *type_mapping;
+};
+
+static bool libbpf_initialized;
+
+struct bpf_object *
+bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
+{
+	struct bpf_object *obj;
+
+	if (!libbpf_initialized) {
+		libbpf_set_print(libbpf_warning,
+				 libbpf_info,
+				 libbpf_debug);
+		libbpf_initialized = true;
+	}
+
+	obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
+	if (IS_ERR(obj)) {
+		pr_debug("bpf: failed to load buffer\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return obj;
+}
+
+struct bpf_object *bpf__prepare_load(const char *filename, bool source)
+{
+	struct bpf_object *obj;
+
+	if (!libbpf_initialized) {
+		libbpf_set_print(libbpf_warning,
+				 libbpf_info,
+				 libbpf_debug);
+		libbpf_initialized = true;
+	}
+
+	if (source) {
+		int err;
+		void *obj_buf;
+		size_t obj_buf_sz;
+
+		perf_clang__init();
+		err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz);
+		perf_clang__cleanup();
+		if (err) {
+			pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err);
+			err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz);
+			if (err)
+				return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
+		} else
+			pr_debug("bpf: successfull builtin compilation\n");
+		obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
+
+		if (!IS_ERR(obj) && llvm_param.dump_obj)
+			llvm__dump_obj(filename, obj_buf, obj_buf_sz);
+
+		free(obj_buf);
+	} else
+		obj = bpf_object__open(filename);
+
+	if (IS_ERR(obj)) {
+		pr_debug("bpf: failed to load %s\n", filename);
+		return obj;
+	}
+
+	return obj;
+}
+
+void bpf__clear(void)
+{
+	struct bpf_object *obj, *tmp;
+
+	bpf_object__for_each_safe(obj, tmp) {
+		bpf__unprobe(obj);
+		bpf_object__close(obj);
+	}
+}
+
+static void
+clear_prog_priv(struct bpf_program *prog __maybe_unused,
+		void *_priv)
+{
+	struct bpf_prog_priv *priv = _priv;
+
+	cleanup_perf_probe_events(&priv->pev, 1);
+	zfree(&priv->insns_buf);
+	zfree(&priv->type_mapping);
+	zfree(&priv->sys_name);
+	zfree(&priv->evt_name);
+	free(priv);
+}
+
+static int
+prog_config__exec(const char *value, struct perf_probe_event *pev)
+{
+	pev->uprobes = true;
+	pev->target = strdup(value);
+	if (!pev->target)
+		return -ENOMEM;
+	return 0;
+}
+
+static int
+prog_config__module(const char *value, struct perf_probe_event *pev)
+{
+	pev->uprobes = false;
+	pev->target = strdup(value);
+	if (!pev->target)
+		return -ENOMEM;
+	return 0;
+}
+
+static int
+prog_config__bool(const char *value, bool *pbool, bool invert)
+{
+	int err;
+	bool bool_value;
+
+	if (!pbool)
+		return -EINVAL;
+
+	err = strtobool(value, &bool_value);
+	if (err)
+		return err;
+
+	*pbool = invert ? !bool_value : bool_value;
+	return 0;
+}
+
+static int
+prog_config__inlines(const char *value,
+		     struct perf_probe_event *pev __maybe_unused)
+{
+	return prog_config__bool(value, &probe_conf.no_inlines, true);
+}
+
+static int
+prog_config__force(const char *value,
+		   struct perf_probe_event *pev __maybe_unused)
+{
+	return prog_config__bool(value, &probe_conf.force_add, false);
+}
+
+static struct {
+	const char *key;
+	const char *usage;
+	const char *desc;
+	int (*func)(const char *, struct perf_probe_event *);
+} bpf_prog_config_terms[] = {
+	{
+		.key	= "exec",
+		.usage	= "exec=<full path of file>",
+		.desc	= "Set uprobe target",
+		.func	= prog_config__exec,
+	},
+	{
+		.key	= "module",
+		.usage	= "module=<module name>    ",
+		.desc	= "Set kprobe module",
+		.func	= prog_config__module,
+	},
+	{
+		.key	= "inlines",
+		.usage	= "inlines=[yes|no]        ",
+		.desc	= "Probe at inline symbol",
+		.func	= prog_config__inlines,
+	},
+	{
+		.key	= "force",
+		.usage	= "force=[yes|no]          ",
+		.desc	= "Forcibly add events with existing name",
+		.func	= prog_config__force,
+	},
+};
+
+static int
+do_prog_config(const char *key, const char *value,
+	       struct perf_probe_event *pev)
+{
+	unsigned int i;
+
+	pr_debug("config bpf program: %s=%s\n", key, value);
+	for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+		if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
+			return bpf_prog_config_terms[i].func(value, pev);
+
+	pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
+		 key, value);
+
+	pr_debug("\nHint: Valid options are:\n");
+	for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+		pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
+			 bpf_prog_config_terms[i].desc);
+	pr_debug("\n");
+
+	return -BPF_LOADER_ERRNO__PROGCONF_TERM;
+}
+
+static const char *
+parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+	char *text = strdup(config_str);
+	char *sep, *line;
+	const char *main_str = NULL;
+	int err = 0;
+
+	if (!text) {
+		pr_debug("Not enough memory: dup config_str failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	line = text;
+	while ((sep = strchr(line, ';'))) {
+		char *equ;
+
+		*sep = '\0';
+		equ = strchr(line, '=');
+		if (!equ) {
+			pr_warning("WARNING: invalid config in BPF object: %s\n",
+				   line);
+			pr_warning("\tShould be 'key=value'.\n");
+			goto nextline;
+		}
+		*equ = '\0';
+
+		err = do_prog_config(line, equ + 1, pev);
+		if (err)
+			break;
+nextline:
+		line = sep + 1;
+	}
+
+	if (!err)
+		main_str = config_str + (line - text);
+	free(text);
+
+	return err ? ERR_PTR(err) : main_str;
+}
+
+static int
+parse_prog_config(const char *config_str, const char **p_main_str,
+		  bool *is_tp, struct perf_probe_event *pev)
+{
+	int err;
+	const char *main_str = parse_prog_config_kvpair(config_str, pev);
+
+	if (IS_ERR(main_str))
+		return PTR_ERR(main_str);
+
+	*p_main_str = main_str;
+	if (!strchr(main_str, '=')) {
+		/* Is a tracepoint event? */
+		const char *s = strchr(main_str, ':');
+
+		if (!s) {
+			pr_debug("bpf: '%s' is not a valid tracepoint\n",
+				 config_str);
+			return -BPF_LOADER_ERRNO__CONFIG;
+		}
+
+		*is_tp = true;
+		return 0;
+	}
+
+	*is_tp = false;
+	err = parse_perf_probe_command(main_str, pev);
+	if (err < 0) {
+		pr_debug("bpf: '%s' is not a valid config string\n",
+			 config_str);
+		/* parse failed, don't need clear pev. */
+		return -BPF_LOADER_ERRNO__CONFIG;
+	}
+	return 0;
+}
+
+static int
+config_bpf_program(struct bpf_program *prog)
+{
+	struct perf_probe_event *pev = NULL;
+	struct bpf_prog_priv *priv = NULL;
+	const char *config_str, *main_str;
+	bool is_tp = false;
+	int err;
+
+	/* Initialize per-program probing setting */
+	probe_conf.no_inlines = false;
+	probe_conf.force_add = false;
+
+	config_str = bpf_program__title(prog, false);
+	if (IS_ERR(config_str)) {
+		pr_debug("bpf: unable to get title for program\n");
+		return PTR_ERR(config_str);
+	}
+
+	priv = calloc(sizeof(*priv), 1);
+	if (!priv) {
+		pr_debug("bpf: failed to alloc priv\n");
+		return -ENOMEM;
+	}
+	pev = &priv->pev;
+
+	pr_debug("bpf: config program '%s'\n", config_str);
+	err = parse_prog_config(config_str, &main_str, &is_tp, pev);
+	if (err)
+		goto errout;
+
+	if (is_tp) {
+		char *s = strchr(main_str, ':');
+
+		priv->is_tp = true;
+		priv->sys_name = strndup(main_str, s - main_str);
+		priv->evt_name = strdup(s + 1);
+		goto set_priv;
+	}
+
+	if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
+		pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
+			 config_str, PERF_BPF_PROBE_GROUP);
+		err = -BPF_LOADER_ERRNO__GROUP;
+		goto errout;
+	} else if (!pev->group)
+		pev->group = strdup(PERF_BPF_PROBE_GROUP);
+
+	if (!pev->group) {
+		pr_debug("bpf: strdup failed\n");
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	if (!pev->event) {
+		pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n",
+			 config_str);
+		err = -BPF_LOADER_ERRNO__EVENTNAME;
+		goto errout;
+	}
+	pr_debug("bpf: config '%s' is ok\n", config_str);
+
+set_priv:
+	err = bpf_program__set_priv(prog, priv, clear_prog_priv);
+	if (err) {
+		pr_debug("Failed to set priv for program '%s'\n", config_str);
+		goto errout;
+	}
+
+	return 0;
+
+errout:
+	if (pev)
+		clear_perf_probe_event(pev);
+	free(priv);
+	return err;
+}
+
+static int bpf__prepare_probe(void)
+{
+	static int err = 0;
+	static bool initialized = false;
+
+	/*
+	 * Make err static, so if init failed the first, bpf__prepare_probe()
+	 * fails each time without calling init_probe_symbol_maps multiple
+	 * times.
+	 */
+	if (initialized)
+		return err;
+
+	initialized = true;
+	err = init_probe_symbol_maps(false);
+	if (err < 0)
+		pr_debug("Failed to init_probe_symbol_maps\n");
+	probe_conf.max_probes = MAX_PROBES;
+	return err;
+}
+
+static int
+preproc_gen_prologue(struct bpf_program *prog, int n,
+		     struct bpf_insn *orig_insns, int orig_insns_cnt,
+		     struct bpf_prog_prep_result *res)
+{
+	struct bpf_prog_priv *priv = bpf_program__priv(prog);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	struct bpf_insn *buf;
+	size_t prologue_cnt = 0;
+	int i, err;
+
+	if (IS_ERR(priv) || !priv || priv->is_tp)
+		goto errout;
+
+	pev = &priv->pev;
+
+	if (n < 0 || n >= priv->nr_types)
+		goto errout;
+
+	/* Find a tev belongs to that type */
+	for (i = 0; i < pev->ntevs; i++) {
+		if (priv->type_mapping[i] == n)
+			break;
+	}
+
+	if (i >= pev->ntevs) {
+		pr_debug("Internal error: prologue type %d not found\n", n);
+		return -BPF_LOADER_ERRNO__PROLOGUE;
+	}
+
+	tev = &pev->tevs[i];
+
+	buf = priv->insns_buf;
+	err = bpf__gen_prologue(tev->args, tev->nargs,
+				buf, &prologue_cnt,
+				BPF_MAXINSNS - orig_insns_cnt);
+	if (err) {
+		const char *title;
+
+		title = bpf_program__title(prog, false);
+		if (!title)
+			title = "[unknown]";
+
+		pr_debug("Failed to generate prologue for program %s\n",
+			 title);
+		return err;
+	}
+
+	memcpy(&buf[prologue_cnt], orig_insns,
+	       sizeof(struct bpf_insn) * orig_insns_cnt);
+
+	res->new_insn_ptr = buf;
+	res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
+	res->pfd = NULL;
+	return 0;
+
+errout:
+	pr_debug("Internal error in preproc_gen_prologue\n");
+	return -BPF_LOADER_ERRNO__PROLOGUE;
+}
+
+/*
+ * compare_tev_args is reflexive, transitive and antisymmetric.
+ * I can proof it but this margin is too narrow to contain.
+ */
+static int compare_tev_args(const void *ptev1, const void *ptev2)
+{
+	int i, ret;
+	const struct probe_trace_event *tev1 =
+		*(const struct probe_trace_event **)ptev1;
+	const struct probe_trace_event *tev2 =
+		*(const struct probe_trace_event **)ptev2;
+
+	ret = tev2->nargs - tev1->nargs;
+	if (ret)
+		return ret;
+
+	for (i = 0; i < tev1->nargs; i++) {
+		struct probe_trace_arg *arg1, *arg2;
+		struct probe_trace_arg_ref *ref1, *ref2;
+
+		arg1 = &tev1->args[i];
+		arg2 = &tev2->args[i];
+
+		ret = strcmp(arg1->value, arg2->value);
+		if (ret)
+			return ret;
+
+		ref1 = arg1->ref;
+		ref2 = arg2->ref;
+
+		while (ref1 && ref2) {
+			ret = ref2->offset - ref1->offset;
+			if (ret)
+				return ret;
+
+			ref1 = ref1->next;
+			ref2 = ref2->next;
+		}
+
+		if (ref1 || ref2)
+			return ref2 ? 1 : -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Assign a type number to each tevs in a pev.
+ * mapping is an array with same slots as tevs in that pev.
+ * nr_types will be set to number of types.
+ */
+static int map_prologue(struct perf_probe_event *pev, int *mapping,
+			int *nr_types)
+{
+	int i, type = 0;
+	struct probe_trace_event **ptevs;
+
+	size_t array_sz = sizeof(*ptevs) * pev->ntevs;
+
+	ptevs = malloc(array_sz);
+	if (!ptevs) {
+		pr_debug("Not enough memory: alloc ptevs failed\n");
+		return -ENOMEM;
+	}
+
+	pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
+	for (i = 0; i < pev->ntevs; i++)
+		ptevs[i] = &pev->tevs[i];
+
+	qsort(ptevs, pev->ntevs, sizeof(*ptevs),
+	      compare_tev_args);
+
+	for (i = 0; i < pev->ntevs; i++) {
+		int n;
+
+		n = ptevs[i] - pev->tevs;
+		if (i == 0) {
+			mapping[n] = type;
+			pr_debug("mapping[%d]=%d\n", n, type);
+			continue;
+		}
+
+		if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
+			mapping[n] = type;
+		else
+			mapping[n] = ++type;
+
+		pr_debug("mapping[%d]=%d\n", n, mapping[n]);
+	}
+	free(ptevs);
+	*nr_types = type + 1;
+
+	return 0;
+}
+
+static int hook_load_preprocessor(struct bpf_program *prog)
+{
+	struct bpf_prog_priv *priv = bpf_program__priv(prog);
+	struct perf_probe_event *pev;
+	bool need_prologue = false;
+	int err, i;
+
+	if (IS_ERR(priv) || !priv) {
+		pr_debug("Internal error when hook preprocessor\n");
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (priv->is_tp) {
+		priv->need_prologue = false;
+		return 0;
+	}
+
+	pev = &priv->pev;
+	for (i = 0; i < pev->ntevs; i++) {
+		struct probe_trace_event *tev = &pev->tevs[i];
+
+		if (tev->nargs > 0) {
+			need_prologue = true;
+			break;
+		}
+	}
+
+	/*
+	 * Since all tevs don't have argument, we don't need generate
+	 * prologue.
+	 */
+	if (!need_prologue) {
+		priv->need_prologue = false;
+		return 0;
+	}
+
+	priv->need_prologue = true;
+	priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
+	if (!priv->insns_buf) {
+		pr_debug("Not enough memory: alloc insns_buf failed\n");
+		return -ENOMEM;
+	}
+
+	priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
+	if (!priv->type_mapping) {
+		pr_debug("Not enough memory: alloc type_mapping failed\n");
+		return -ENOMEM;
+	}
+	memset(priv->type_mapping, -1,
+	       sizeof(int) * pev->ntevs);
+
+	err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
+	if (err)
+		return err;
+
+	err = bpf_program__set_prep(prog, priv->nr_types,
+				    preproc_gen_prologue);
+	return err;
+}
+
+int bpf__probe(struct bpf_object *obj)
+{
+	int err = 0;
+	struct bpf_program *prog;
+	struct bpf_prog_priv *priv;
+	struct perf_probe_event *pev;
+
+	err = bpf__prepare_probe();
+	if (err) {
+		pr_debug("bpf__prepare_probe failed\n");
+		return err;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		err = config_bpf_program(prog);
+		if (err)
+			goto out;
+
+		priv = bpf_program__priv(prog);
+		if (IS_ERR(priv) || !priv) {
+			err = PTR_ERR(priv);
+			goto out;
+		}
+
+		if (priv->is_tp) {
+			bpf_program__set_tracepoint(prog);
+			continue;
+		}
+
+		bpf_program__set_kprobe(prog);
+		pev = &priv->pev;
+
+		err = convert_perf_probe_events(pev, 1);
+		if (err < 0) {
+			pr_debug("bpf_probe: failed to convert perf probe events\n");
+			goto out;
+		}
+
+		err = apply_perf_probe_events(pev, 1);
+		if (err < 0) {
+			pr_debug("bpf_probe: failed to apply perf probe events\n");
+			goto out;
+		}
+
+		/*
+		 * After probing, let's consider prologue, which
+		 * adds program fetcher to BPF programs.
+		 *
+		 * hook_load_preprocessorr() hooks pre-processor
+		 * to bpf_program, let it generate prologue
+		 * dynamically during loading.
+		 */
+		err = hook_load_preprocessor(prog);
+		if (err)
+			goto out;
+	}
+out:
+	return err < 0 ? err : 0;
+}
+
+#define EVENTS_WRITE_BUFSIZE  4096
+int bpf__unprobe(struct bpf_object *obj)
+{
+	int err, ret = 0;
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = bpf_program__priv(prog);
+		int i;
+
+		if (IS_ERR(priv) || !priv || priv->is_tp)
+			continue;
+
+		for (i = 0; i < priv->pev.ntevs; i++) {
+			struct probe_trace_event *tev = &priv->pev.tevs[i];
+			char name_buf[EVENTS_WRITE_BUFSIZE];
+			struct strfilter *delfilter;
+
+			snprintf(name_buf, EVENTS_WRITE_BUFSIZE,
+				 "%s:%s", tev->group, tev->event);
+			name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0';
+
+			delfilter = strfilter__new(name_buf, NULL);
+			if (!delfilter) {
+				pr_debug("Failed to create filter for unprobing\n");
+				ret = -ENOMEM;
+				continue;
+			}
+
+			err = del_perf_probe_events(delfilter);
+			strfilter__delete(delfilter);
+			if (err) {
+				pr_debug("Failed to delete %s\n", name_buf);
+				ret = err;
+				continue;
+			}
+		}
+	}
+	return ret;
+}
+
+int bpf__load(struct bpf_object *obj)
+{
+	int err;
+
+	err = bpf_object__load(obj);
+	if (err) {
+		pr_debug("bpf: load objects failed\n");
+		return err;
+	}
+	return 0;
+}
+
+int bpf__foreach_event(struct bpf_object *obj,
+		       bpf_prog_iter_callback_t func,
+		       void *arg)
+{
+	struct bpf_program *prog;
+	int err;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = bpf_program__priv(prog);
+		struct probe_trace_event *tev;
+		struct perf_probe_event *pev;
+		int i, fd;
+
+		if (IS_ERR(priv) || !priv) {
+			pr_debug("bpf: failed to get private field\n");
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+
+		if (priv->is_tp) {
+			fd = bpf_program__fd(prog);
+			err = (*func)(priv->sys_name, priv->evt_name, fd, arg);
+			if (err) {
+				pr_debug("bpf: tracepoint call back failed, stop iterate\n");
+				return err;
+			}
+			continue;
+		}
+
+		pev = &priv->pev;
+		for (i = 0; i < pev->ntevs; i++) {
+			tev = &pev->tevs[i];
+
+			if (priv->need_prologue) {
+				int type = priv->type_mapping[i];
+
+				fd = bpf_program__nth_fd(prog, type);
+			} else {
+				fd = bpf_program__fd(prog);
+			}
+
+			if (fd < 0) {
+				pr_debug("bpf: failed to get file descriptor\n");
+				return fd;
+			}
+
+			err = (*func)(tev->group, tev->event, fd, arg);
+			if (err) {
+				pr_debug("bpf: call back failed, stop iterate\n");
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+enum bpf_map_op_type {
+	BPF_MAP_OP_SET_VALUE,
+	BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+	BPF_MAP_KEY_ALL,
+	BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+	struct list_head list;
+	enum bpf_map_op_type op_type;
+	enum bpf_map_key_type key_type;
+	union {
+		struct parse_events_array array;
+	} k;
+	union {
+		u64 value;
+		struct perf_evsel *evsel;
+	} v;
+};
+
+struct bpf_map_priv {
+	struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+	if (!list_empty(&op->list))
+		list_del(&op->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES)
+		parse_events__clear_array(&op->k.array);
+	free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+	struct bpf_map_op *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+		list_del_init(&pos->list);
+		bpf_map_op__delete(pos);
+	}
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+		    void *_priv)
+{
+	struct bpf_map_priv *priv = _priv;
+
+	bpf_map_priv__purge(priv);
+	free(priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+	op->key_type = BPF_MAP_KEY_ALL;
+	if (!term)
+		return 0;
+
+	if (term->array.nr_ranges) {
+		size_t memsz = term->array.nr_ranges *
+				sizeof(op->k.array.ranges[0]);
+
+		op->k.array.ranges = memdup(term->array.ranges, memsz);
+		if (!op->k.array.ranges) {
+			pr_debug("Not enough memory to alloc indices for map\n");
+			return -ENOMEM;
+		}
+		op->key_type = BPF_MAP_KEY_RANGES;
+		op->k.array.nr_ranges = term->array.nr_ranges;
+	}
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = zalloc(sizeof(*op));
+	if (!op) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	INIT_LIST_HEAD(&op->list);
+
+	err = bpf_map_op_setkey(op, term);
+	if (err) {
+		free(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+	struct bpf_map_op *newop;
+
+	newop = memdup(op, sizeof(*op));
+	if (!newop) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&newop->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES) {
+		size_t memsz = op->k.array.nr_ranges *
+			       sizeof(op->k.array.ranges[0]);
+
+		newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+		if (!newop->k.array.ranges) {
+			pr_debug("Failed to alloc indices for map\n");
+			free(newop);
+			return NULL;
+		}
+	}
+
+	return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+	struct bpf_map_priv *newpriv;
+	struct bpf_map_op *pos, *newop;
+
+	newpriv = zalloc(sizeof(*newpriv));
+	if (!newpriv) {
+		pr_debug("Not enough memory to alloc map private\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&newpriv->ops_list);
+
+	list_for_each_entry(pos, &priv->ops_list, list) {
+		newop = bpf_map_op__clone(pos);
+		if (!newop) {
+			bpf_map_priv__purge(newpriv);
+			return NULL;
+		}
+		list_add_tail(&newop->list, &newpriv->ops_list);
+	}
+
+	return newpriv;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+	const char *map_name = bpf_map__name(map);
+	struct bpf_map_priv *priv = bpf_map__priv(map);
+
+	if (IS_ERR(priv)) {
+		pr_debug("Failed to get private from map %s\n", map_name);
+		return PTR_ERR(priv);
+	}
+
+	if (!priv) {
+		priv = zalloc(sizeof(*priv));
+		if (!priv) {
+			pr_debug("Not enough memory to alloc map private\n");
+			return -ENOMEM;
+		}
+		INIT_LIST_HEAD(&priv->ops_list);
+
+		if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
+			free(priv);
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+	}
+
+	list_add_tail(&op->list, &priv->ops_list);
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = bpf_map_op__new(term);
+	if (IS_ERR(op))
+		return op;
+
+	err = bpf_map__add_op(map, op);
+	if (err) {
+		bpf_map_op__delete(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+			struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	const char *map_name = bpf_map__name(map);
+	const struct bpf_map_def *def = bpf_map__def(map);
+
+	if (IS_ERR(def)) {
+		pr_debug("Unable to get map definition from '%s'\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (def->type != BPF_MAP_TYPE_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+	if (def->key_size < sizeof(unsigned int)) {
+		pr_debug("Map %s has incorrect key size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+	}
+	switch (def->value_size) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		pr_debug("Map %s has incorrect value size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_VALUE;
+	op->v.value = term->val.num;
+	return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+		pr_debug("ERROR: wrong value type for 'value'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+			struct parse_events_term *term,
+			struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	const struct bpf_map_def *def;
+	struct bpf_map_op *op;
+	const char *map_name = bpf_map__name(map);
+
+	evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
+	if (!evsel) {
+		pr_debug("Event (for '%s') '%s' doesn't exist\n",
+			 map_name, term->val.str);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+	}
+
+	def = bpf_map__def(map);
+	if (IS_ERR(def)) {
+		pr_debug("Unable to get map definition from '%s'\n",
+			 map_name);
+		return PTR_ERR(def);
+	}
+
+	/*
+	 * No need to check key_size and value_size:
+	 * kernel has already checked them.
+	 */
+	if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_EVSEL;
+	op->v.evsel = evsel;
+	return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct perf_evlist *evlist)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+		pr_debug("ERROR: wrong value type for 'event'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+	const char *config_opt;
+	int (*config_func)(struct bpf_map *, struct parse_events_term *,
+			   struct perf_evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+	{"value", bpf_map__config_value},
+	{"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+			       struct bpf_map *map,
+			       const char *map_name)
+{
+	struct parse_events_array *array = &term->array;
+	const struct bpf_map_def *def;
+	unsigned int i;
+
+	if (!array->nr_ranges)
+		return 0;
+	if (!array->ranges) {
+		pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+			 map_name, (int)array->nr_ranges);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	def = bpf_map__def(map);
+	if (IS_ERR(def)) {
+		pr_debug("ERROR: Unable to get map definition from '%s'\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	for (i = 0; i < array->nr_ranges; i++) {
+		unsigned int start = array->ranges[i].start;
+		size_t length = array->ranges[i].length;
+		unsigned int idx = start + length - 1;
+
+		if (idx >= def->max_entries) {
+			pr_debug("ERROR: index %d too large\n", idx);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+		}
+	}
+	return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct perf_evlist *evlist,
+		    int *key_scan_pos)
+{
+	/* key is "map:<mapname>.<config opt>" */
+	char *map_name = strdup(term->config + sizeof("map:") - 1);
+	struct bpf_map *map;
+	int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+	char *map_opt;
+	size_t i;
+
+	if (!map_name)
+		return -ENOMEM;
+
+	map_opt = strchr(map_name, '.');
+	if (!map_opt) {
+		pr_debug("ERROR: Invalid map config: %s\n", map_name);
+		goto out;
+	}
+
+	*map_opt++ = '\0';
+	if (*map_opt == '\0') {
+		pr_debug("ERROR: Invalid map option: %s\n", term->config);
+		goto out;
+	}
+
+	map = bpf_object__find_map_by_name(obj, map_name);
+	if (!map) {
+		pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+		err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+		goto out;
+	}
+
+	*key_scan_pos += strlen(map_opt);
+	err = config_map_indices_range_check(term, map, map_name);
+	if (err)
+		goto out;
+	*key_scan_pos -= strlen(map_opt);
+
+	for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+		struct bpf_obj_config__map_func *func =
+				&bpf_obj_config__map_funcs[i];
+
+		if (strcmp(map_opt, func->config_opt) == 0) {
+			err = func->config_func(map, term, evlist);
+			goto out;
+		}
+	}
+
+	pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+	err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+	free(map_name);
+	if (!err)
+		key_scan_pos += strlen(map_opt);
+	return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct perf_evlist *evlist,
+		    int *error_pos)
+{
+	int key_scan_pos = 0;
+	int err;
+
+	if (!obj || !term || !term->config)
+		return -EINVAL;
+
+	if (strstarts(term->config, "map:")) {
+		key_scan_pos = sizeof("map:") - 1;
+		err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+		goto out;
+	}
+	err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+	if (error_pos)
+		*error_pos = key_scan_pos;
+	return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+				 const struct bpf_map_def *pdef,
+				 struct bpf_map_op *op,
+				 void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+		      void *arg, const char *name,
+		      int map_fd, const struct bpf_map_def *pdef,
+		      struct bpf_map_op *op)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < pdef->max_entries; i++) {
+		err = func(name, map_fd, pdef, op, &i, arg);
+		if (err) {
+			pr_debug("ERROR: failed to insert value to %s[%u]\n",
+				 name, i);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+			 const char *name, int map_fd,
+			 const struct bpf_map_def *pdef,
+			 struct bpf_map_op *op)
+{
+	unsigned int i, j;
+	int err;
+
+	for (i = 0; i < op->k.array.nr_ranges; i++) {
+		unsigned int start = op->k.array.ranges[i].start;
+		size_t length = op->k.array.ranges[i].length;
+
+		for (j = 0; j < length; j++) {
+			unsigned int idx = start + j;
+
+			err = func(name, map_fd, pdef, op, &idx, arg);
+			if (err) {
+				pr_debug("ERROR: failed to insert value to %s[%u]\n",
+					 name, idx);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+			   map_config_func_t func,
+			   void *arg)
+{
+	int err, map_fd;
+	struct bpf_map_op *op;
+	const struct bpf_map_def *def;
+	const char *name = bpf_map__name(map);
+	struct bpf_map_priv *priv = bpf_map__priv(map);
+
+	if (IS_ERR(priv)) {
+		pr_debug("ERROR: failed to get private from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	if (!priv || list_empty(&priv->ops_list)) {
+		pr_debug("INFO: nothing to config for map %s\n", name);
+		return 0;
+	}
+
+	def = bpf_map__def(map);
+	if (IS_ERR(def)) {
+		pr_debug("ERROR: failed to get definition from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	map_fd = bpf_map__fd(map);
+	if (map_fd < 0) {
+		pr_debug("ERROR: failed to get fd from map %s\n", name);
+		return map_fd;
+	}
+
+	list_for_each_entry(op, &priv->ops_list, list) {
+		switch (def->type) {
+		case BPF_MAP_TYPE_ARRAY:
+		case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+			switch (op->key_type) {
+			case BPF_MAP_KEY_ALL:
+				err = foreach_key_array_all(func, arg, name,
+							    map_fd, def, op);
+				break;
+			case BPF_MAP_KEY_RANGES:
+				err = foreach_key_array_ranges(func, arg, name,
+							       map_fd, def,
+							       op);
+				break;
+			default:
+				pr_debug("ERROR: keytype for map '%s' invalid\n",
+					 name);
+				return -BPF_LOADER_ERRNO__INTERNAL;
+			}
+			if (err)
+				return err;
+			break;
+		default:
+			pr_debug("ERROR: type of '%s' incorrect\n", name);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+		}
+	}
+
+	return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+			   size_t val_size, u64 val)
+{
+	int err = 0;
+
+	switch (val_size) {
+	case 1: {
+		u8 _val = (u8)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 2: {
+		u16 _val = (u16)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 4: {
+		u32 _val = (u32)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 8: {
+		err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+		break;
+	}
+	default:
+		pr_debug("ERROR: invalid value size\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+			   struct perf_evsel *evsel)
+{
+	struct xyarray *xy = evsel->fd;
+	struct perf_event_attr *attr;
+	unsigned int key, events;
+	bool check_pass = false;
+	int *evt_fd;
+	int err;
+
+	if (!xy) {
+		pr_debug("ERROR: evsel not ready for map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (xy->row_size / xy->entry_size != 1) {
+		pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+			 name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+	}
+
+	attr = &evsel->attr;
+	if (attr->inherit) {
+		pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+	}
+
+	if (perf_evsel__is_bpf_output(evsel))
+		check_pass = true;
+	if (attr->type == PERF_TYPE_RAW)
+		check_pass = true;
+	if (attr->type == PERF_TYPE_HARDWARE)
+		check_pass = true;
+	if (!check_pass) {
+		pr_debug("ERROR: Event type is wrong for map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+	}
+
+	events = xy->entries / (xy->row_size / xy->entry_size);
+	key = *((unsigned int *)pkey);
+	if (key >= events) {
+		pr_debug("ERROR: there is no event %d for map %s\n",
+			 key, name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+	}
+	evt_fd = xyarray__entry(xy, key, 0);
+	err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+			     const struct bpf_map_def *pdef,
+			     struct bpf_map_op *op,
+			     void *pkey, void *arg __maybe_unused)
+{
+	int err;
+
+	switch (op->op_type) {
+	case BPF_MAP_OP_SET_VALUE:
+		err = apply_config_value_for_key(map_fd, pkey,
+						 pdef->value_size,
+						 op->v.value);
+		break;
+	case BPF_MAP_OP_SET_EVSEL:
+		err = apply_config_evsel_for_key(name, map_fd, pkey,
+						 op->v.evsel);
+		break;
+	default:
+		pr_debug("ERROR: unknown value type for '%s'\n", name);
+		err = -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+	return bpf_map_config_foreach_key(map,
+					  apply_obj_config_map_for_key,
+					  NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+	int err;
+
+	bpf_map__for_each(map, obj) {
+		err = apply_obj_config_map(map);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+	struct bpf_object *obj, *tmp;
+	int err;
+
+	bpf_object__for_each_safe(obj, tmp) {
+		err = apply_obj_config_object(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+#define bpf__for_each_map(pos, obj, objtmp)	\
+	bpf_object__for_each_safe(obj, objtmp)	\
+		bpf_map__for_each(pos, obj)
+
+#define bpf__for_each_stdout_map(pos, obj, objtmp)	\
+	bpf__for_each_map(pos, obj, objtmp) 		\
+		if (bpf_map__name(pos) && 		\
+			(strcmp("__bpf_stdout__", 	\
+				bpf_map__name(pos)) == 0))
+
+int bpf__setup_stdout(struct perf_evlist *evlist)
+{
+	struct bpf_map_priv *tmpl_priv = NULL;
+	struct bpf_object *obj, *tmp;
+	struct perf_evsel *evsel = NULL;
+	struct bpf_map *map;
+	int err;
+	bool need_init = false;
+
+	bpf__for_each_stdout_map(map, obj, tmp) {
+		struct bpf_map_priv *priv = bpf_map__priv(map);
+
+		if (IS_ERR(priv))
+			return -BPF_LOADER_ERRNO__INTERNAL;
+
+		/*
+		 * No need to check map type: type should have been
+		 * verified by kernel.
+		 */
+		if (!need_init && !priv)
+			need_init = !priv;
+		if (!tmpl_priv && priv)
+			tmpl_priv = priv;
+	}
+
+	if (!need_init)
+		return 0;
+
+	if (!tmpl_priv) {
+		err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/",
+				   NULL);
+		if (err) {
+			pr_debug("ERROR: failed to create bpf-output event\n");
+			return -err;
+		}
+
+		evsel = perf_evlist__last(evlist);
+	}
+
+	bpf__for_each_stdout_map(map, obj, tmp) {
+		struct bpf_map_priv *priv = bpf_map__priv(map);
+
+		if (IS_ERR(priv))
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		if (priv)
+			continue;
+
+		if (tmpl_priv) {
+			priv = bpf_map_priv__clone(tmpl_priv);
+			if (!priv)
+				return -ENOMEM;
+
+			err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
+			if (err) {
+				bpf_map_priv__clear(map, priv);
+				return err;
+			}
+		} else if (evsel) {
+			struct bpf_map_op *op;
+
+			op = bpf_map__add_newop(map, NULL);
+			if (IS_ERR(op))
+				return PTR_ERR(op);
+			op->op_type = BPF_MAP_OP_SET_EVSEL;
+			op->v.evsel = evsel;
+		}
+	}
+
+	return 0;
+}
+
+#define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
+#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
+#define NR_ERRNO	(__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
+
+static const char *bpf_loader_strerror_table[NR_ERRNO] = {
+	[ERRCODE_OFFSET(CONFIG)]	= "Invalid config string",
+	[ERRCODE_OFFSET(GROUP)]		= "Invalid group name",
+	[ERRCODE_OFFSET(EVENTNAME)]	= "No event name found in config string",
+	[ERRCODE_OFFSET(INTERNAL)]	= "BPF loader internal error",
+	[ERRCODE_OFFSET(COMPILE)]	= "Error when compiling BPF scriptlet",
+	[ERRCODE_OFFSET(PROGCONF_TERM)]	= "Invalid program config term in config string",
+	[ERRCODE_OFFSET(PROLOGUE)]	= "Failed to generate prologue",
+	[ERRCODE_OFFSET(PROLOGUE2BIG)]	= "Prologue too big for program",
+	[ERRCODE_OFFSET(PROLOGUEOOB)]	= "Offset out of bound for prologue",
+	[ERRCODE_OFFSET(OBJCONF_OPT)]	= "Invalid object config option",
+	[ERRCODE_OFFSET(OBJCONF_CONF)]	= "Config value not set (missing '=')",
+	[ERRCODE_OFFSET(OBJCONF_MAP_OPT)]	= "Invalid object map config option",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]	= "Target map doesn't exist",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]	= "Incorrect value type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]	= "Incorrect map type",
+	[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]	= "Incorrect map key size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)]	= "Incorrect map value size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]	= "Event not found for map setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]	= "Invalid map size for event setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]	= "Event dimension too large",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]	= "Doesn't support inherit event",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]	= "Wrong event type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]	= "Index too large",
+};
+
+static int
+bpf_loader_strerror(int err, char *buf, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE];
+	const char *msg;
+
+	if (!buf || !size)
+		return -1;
+
+	err = err > 0 ? err : -err;
+
+	if (err >= __LIBBPF_ERRNO__START)
+		return libbpf_strerror(err, buf, size);
+
+	if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) {
+		msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)];
+		snprintf(buf, size, "%s", msg);
+		buf[size - 1] = '\0';
+		return 0;
+	}
+
+	if (err >= __BPF_LOADER_ERRNO__END)
+		snprintf(buf, size, "Unknown bpf loader error %d", err);
+	else
+		snprintf(buf, size, "%s",
+			 str_error_r(err, sbuf, sizeof(sbuf)));
+
+	buf[size - 1] = '\0';
+	return -1;
+}
+
+#define bpf__strerror_head(err, buf, size) \
+	char sbuf[STRERR_BUFSIZE], *emsg;\
+	if (!size)\
+		return 0;\
+	if (err < 0)\
+		err = -err;\
+	bpf_loader_strerror(err, sbuf, sizeof(sbuf));\
+	emsg = sbuf;\
+	switch (err) {\
+	default:\
+		scnprintf(buf, size, "%s", emsg);\
+		break;
+
+#define bpf__strerror_entry(val, fmt...)\
+	case val: {\
+		scnprintf(buf, size, fmt);\
+		break;\
+	}
+
+#define bpf__strerror_end(buf, size)\
+	}\
+	buf[size - 1] = '\0';
+
+int bpf__strerror_prepare_load(const char *filename, bool source,
+			       int err, char *buf, size_t size)
+{
+	size_t n;
+	int ret;
+
+	n = snprintf(buf, size, "Failed to load %s%s: ",
+			 filename, source ? " from source" : "");
+	if (n >= size) {
+		buf[size - 1] = '\0';
+		return 0;
+	}
+	buf += n;
+	size -= n;
+
+	ret = bpf_loader_strerror(err, buf, size);
+	buf[size - 1] = '\0';
+	return ret;
+}
+
+int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
+			int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	case BPF_LOADER_ERRNO__PROGCONF_TERM: {
+		scnprintf(buf, size, "%s (add -v to see detail)", emsg);
+		break;
+	}
+	bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
+	bpf__strerror_entry(EACCES, "You need to be root");
+	bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
+	bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_load(struct bpf_object *obj,
+		       int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	case LIBBPF_ERRNO__KVER: {
+		unsigned int obj_kver = bpf_object__kversion(obj);
+		unsigned int real_kver;
+
+		if (fetch_kernel_version(&real_kver, NULL, 0)) {
+			scnprintf(buf, size, "Unable to fetch kernel version");
+			break;
+		}
+
+		if (obj_kver != real_kver) {
+			scnprintf(buf, size,
+				  "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")",
+				  KVER_PARAM(obj_kver),
+				  KVER_PARAM(real_kver));
+			break;
+		}
+
+		scnprintf(buf, size, "Failed to load program for unknown reason");
+		break;
+	}
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			     struct parse_events_term *term __maybe_unused,
+			     struct perf_evlist *evlist __maybe_unused,
+			     int *error_pos __maybe_unused, int err,
+			     char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+			    "Can't use this config term with this map type");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+			    "Cannot set event to BPF map in multi-thread tracing");
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+			    "%s (Hint: use -i to turn off inherit)", emsg);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+			    "Can only put raw, hardware and BPF output event into a BPF map");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+			       int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_end(buf, size);
+	return 0;
+}
diff --git a/util/bpf-loader.h b/util/bpf-loader.h
new file mode 100644
index 0000000..5d3aefd
--- /dev/null
+++ b/util/bpf-loader.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_LOADER_H
+#define __BPF_LOADER_H
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <string.h>
+#include <bpf/libbpf.h>
+#include "probe-event.h"
+#include "evlist.h"
+#include "debug.h"
+
+enum bpf_loader_errno {
+	__BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100,
+	/* Invalid config string */
+	BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START,
+	BPF_LOADER_ERRNO__GROUP,	/* Invalid group name */
+	BPF_LOADER_ERRNO__EVENTNAME,	/* Event name is missing */
+	BPF_LOADER_ERRNO__INTERNAL,	/* BPF loader internal error */
+	BPF_LOADER_ERRNO__COMPILE,	/* Error when compiling BPF scriptlet */
+	BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */
+	BPF_LOADER_ERRNO__PROLOGUE,	/* Failed to generate prologue */
+	BPF_LOADER_ERRNO__PROLOGUE2BIG,	/* Prologue too big for program */
+	BPF_LOADER_ERRNO__PROLOGUEOOB,	/* Offset out of bound for prologue */
+	BPF_LOADER_ERRNO__OBJCONF_OPT,	/* Invalid object config option */
+	BPF_LOADER_ERRNO__OBJCONF_CONF,	/* Config value not set (lost '=')) */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,	/* Invalid object map config option */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST,	/* Target map not exist */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,	/* Incorrect value type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,	/* Incorrect map type */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,	/* Incorrect map key size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,	/* Event not found for map setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,	/* Invalid map size for event setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,	/* Event dimension too large */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,	/* Doesn't support inherit event */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,	/* Wrong event type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,	/* Index too large */
+	__BPF_LOADER_ERRNO__END,
+};
+
+struct bpf_object;
+struct parse_events_term;
+#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
+
+typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event,
+					int fd, void *arg);
+
+#ifdef HAVE_LIBBPF_SUPPORT
+struct bpf_object *bpf__prepare_load(const char *filename, bool source);
+int bpf__strerror_prepare_load(const char *filename, bool source,
+			       int err, char *buf, size_t size);
+
+struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz,
+					    const char *name);
+
+void bpf__clear(void);
+
+int bpf__probe(struct bpf_object *obj);
+int bpf__unprobe(struct bpf_object *obj);
+int bpf__strerror_probe(struct bpf_object *obj, int err,
+			char *buf, size_t size);
+
+int bpf__load(struct bpf_object *obj);
+int bpf__strerror_load(struct bpf_object *obj, int err,
+		       char *buf, size_t size);
+int bpf__foreach_event(struct bpf_object *obj,
+		       bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+		    struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+			     struct parse_events_term *term,
+			     struct perf_evlist *evlist,
+			     int *error_pos, int err, char *buf,
+			     size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct perf_evlist *evlist);
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err,
+			       char *buf, size_t size);
+
+#else
+#include <errno.h>
+
+static inline struct bpf_object *
+bpf__prepare_load(const char *filename __maybe_unused,
+		  bool source __maybe_unused)
+{
+	pr_debug("ERROR: eBPF object loading is disabled during compiling.\n");
+	return ERR_PTR(-ENOTSUP);
+}
+
+static inline struct bpf_object *
+bpf__prepare_load_buffer(void *obj_buf __maybe_unused,
+					   size_t obj_buf_sz __maybe_unused)
+{
+	return ERR_PTR(-ENOTSUP);
+}
+
+static inline void bpf__clear(void) { }
+
+static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;}
+static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;}
+static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; }
+
+static inline int
+bpf__foreach_event(struct bpf_object *obj __maybe_unused,
+		   bpf_prog_iter_callback_t func __maybe_unused,
+		   void *arg __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+		struct parse_events_term *term __maybe_unused,
+		struct perf_evlist *evlist __maybe_unused,
+		int *error_pos __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+	return 0;
+}
+
+static inline int
+bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+__bpf_strerror(char *buf, size_t size)
+{
+	if (!size)
+		return 0;
+	strncpy(buf,
+		"ERROR: eBPF object loading is disabled during compiling.\n",
+		size);
+	buf[size - 1] = '\0';
+	return 0;
+}
+
+static inline
+int bpf__strerror_prepare_load(const char *filename __maybe_unused,
+			       bool source __maybe_unused,
+			       int err __maybe_unused,
+			       char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
+		    int err __maybe_unused,
+		    char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
+				     int err __maybe_unused,
+				     char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			 struct parse_events_term *term __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused,
+			 int *error_pos __maybe_unused,
+			 int err __maybe_unused,
+			 char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+			       char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+			   int err __maybe_unused, char *buf,
+			   size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+#endif
+#endif
diff --git a/util/bpf-prologue.c b/util/bpf-prologue.c
new file mode 100644
index 0000000..2934775
--- /dev/null
+++ b/util/bpf-prologue.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf-prologue.c
+ *
+ * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <bpf/libbpf.h>
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-finder.h"
+#include <errno.h>
+#include <dwarf-regs.h>
+#include <linux/filter.h>
+
+#define BPF_REG_SIZE		8
+
+#define JMP_TO_ERROR_CODE	-1
+#define JMP_TO_SUCCESS_CODE	-2
+#define JMP_TO_USER_CODE	-3
+
+struct bpf_insn_pos {
+	struct bpf_insn *begin;
+	struct bpf_insn *end;
+	struct bpf_insn *pos;
+};
+
+static inline int
+pos_get_cnt(struct bpf_insn_pos *pos)
+{
+	return pos->pos - pos->begin;
+}
+
+static int
+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
+{
+	if (!pos->pos)
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+	if (pos->pos + 1 >= pos->end) {
+		pr_err("bpf prologue: prologue too long\n");
+		pos->pos = NULL;
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+	}
+
+	*(pos->pos)++ = new_insn;
+	return 0;
+}
+
+static int
+check_pos(struct bpf_insn_pos *pos)
+{
+	if (!pos->pos || pos->pos >= pos->end)
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+	return 0;
+}
+
+/*
+ * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
+ * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM
+ * instruction (BPF_{B,H,W,DW}).
+ */
+static int
+argtype_to_ldx_size(const char *type)
+{
+	int arg_size = type ? atoi(&type[1]) : 64;
+
+	switch (arg_size) {
+	case 8:
+		return BPF_B;
+	case 16:
+		return BPF_H;
+	case 32:
+		return BPF_W;
+	case 64:
+	default:
+		return BPF_DW;
+	}
+}
+
+static const char *
+insn_sz_to_str(int insn_sz)
+{
+	switch (insn_sz) {
+	case BPF_B:
+		return "BPF_B";
+	case BPF_H:
+		return "BPF_H";
+	case BPF_W:
+		return "BPF_W";
+	case BPF_DW:
+		return "BPF_DW";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+/* Give it a shorter name */
+#define ins(i, p) append_insn((i), (p))
+
+/*
+ * Give a register name (in 'reg'), generate instruction to
+ * load register into an eBPF register rd:
+ *   'ldd target_reg, offset(ctx_reg)', where:
+ * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
+ */
+static int
+gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
+		     const char *reg, int target_reg)
+{
+	int offset = regs_query_register_offset(reg);
+
+	if (offset < 0) {
+		pr_err("bpf: prologue: failed to get register %s\n",
+		       reg);
+		return offset;
+	}
+	ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
+
+	return check_pos(pos);
+}
+
+/*
+ * Generate a BPF_FUNC_probe_read function call.
+ *
+ * src_base_addr_reg is a register holding base address,
+ * dst_addr_reg is a register holding dest address (on stack),
+ * result is:
+ *
+ *  *[dst_addr_reg] = *([src_base_addr_reg] + offset)
+ *
+ * Arguments of BPF_FUNC_probe_read:
+ *     ARG1: ptr to stack (dest)
+ *     ARG2: size (8)
+ *     ARG3: unsafe ptr (src)
+ */
+static int
+gen_read_mem(struct bpf_insn_pos *pos,
+	     int src_base_addr_reg,
+	     int dst_addr_reg,
+	     long offset)
+{
+	/* mov arg3, src_base_addr_reg */
+	if (src_base_addr_reg != BPF_REG_ARG3)
+		ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
+	/* add arg3, #offset */
+	if (offset)
+		ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
+
+	/* mov arg2, #reg_size */
+	ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
+
+	/* mov arg1, dst_addr_reg */
+	if (dst_addr_reg != BPF_REG_ARG1)
+		ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
+
+	/* Call probe_read  */
+	ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
+	/*
+	 * Error processing: if read fail, goto error code,
+	 * will be relocated. Target should be the start of
+	 * error processing code.
+	 */
+	ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
+	    pos);
+
+	return check_pos(pos);
+}
+
+/*
+ * Each arg should be bare register. Fetch and save them into argument
+ * registers (r3 - r5).
+ *
+ * BPF_REG_1 should have been initialized with pointer to
+ * 'struct pt_regs'.
+ */
+static int
+gen_prologue_fastpath(struct bpf_insn_pos *pos,
+		      struct probe_trace_arg *args, int nargs)
+{
+	int i, err = 0;
+
+	for (i = 0; i < nargs; i++) {
+		err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
+					   BPF_PROLOGUE_START_ARG_REG + i);
+		if (err)
+			goto errout;
+	}
+
+	return check_pos(pos);
+errout:
+	return err;
+}
+
+/*
+ * Slow path:
+ *   At least one argument has the form of 'offset($rx)'.
+ *
+ * Following code first stores them into stack, then loads all of then
+ * to r2 - r5.
+ * Before final loading, the final result should be:
+ *
+ * low address
+ * BPF_REG_FP - 24  ARG3
+ * BPF_REG_FP - 16  ARG2
+ * BPF_REG_FP - 8   ARG1
+ * BPF_REG_FP
+ * high address
+ *
+ * For each argument (described as: offn(...off2(off1(reg)))),
+ * generates following code:
+ *
+ *  r7 <- fp
+ *  r7 <- r7 - stack_offset  // Ideal code should initialize r7 using
+ *                           // fp before generating args. However,
+ *                           // eBPF won't regard r7 as stack pointer
+ *                           // if it is generated by minus 8 from
+ *                           // another stack pointer except fp.
+ *                           // This is why we have to set r7
+ *                           // to fp for each variable.
+ *  r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
+ *  (r7) <- r3       // skip following instructions for bare reg
+ *  r3 <- r3 + off1  . // skip if off1 == 0
+ *  r2 <- 8           \
+ *  r1 <- r7           |-> generated by gen_read_mem()
+ *  call probe_read    /
+ *  jnei r0, 0, err  ./
+ *  r3 <- (r7)
+ *  r3 <- r3 + off2  . // skip if off2 == 0
+ *  r2 <- 8           \  // r2 may be broken by probe_read, so set again
+ *  r1 <- r7           |-> generated by gen_read_mem()
+ *  call probe_read    /
+ *  jnei r0, 0, err  ./
+ *  ...
+ */
+static int
+gen_prologue_slowpath(struct bpf_insn_pos *pos,
+		      struct probe_trace_arg *args, int nargs)
+{
+	int err, i;
+
+	for (i = 0; i < nargs; i++) {
+		struct probe_trace_arg *arg = &args[i];
+		const char *reg = arg->value;
+		struct probe_trace_arg_ref *ref = NULL;
+		int stack_offset = (i + 1) * -8;
+
+		pr_debug("prologue: fetch arg %d, base reg is %s\n",
+			 i, reg);
+
+		/* value of base register is stored into ARG3 */
+		err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
+					   BPF_REG_ARG3);
+		if (err) {
+			pr_err("prologue: failed to get offset of register %s\n",
+			       reg);
+			goto errout;
+		}
+
+		/* Make r7 the stack pointer. */
+		ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
+		/* r7 += -8 */
+		ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
+		/*
+		 * Store r3 (base register) onto stack
+		 * Ensure fp[offset] is set.
+		 * fp is the only valid base register when storing
+		 * into stack. We are not allowed to use r7 as base
+		 * register here.
+		 */
+		ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
+				stack_offset), pos);
+
+		ref = arg->ref;
+		while (ref) {
+			pr_debug("prologue: arg %d: offset %ld\n",
+				 i, ref->offset);
+			err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
+					   ref->offset);
+			if (err) {
+				pr_err("prologue: failed to generate probe_read function call\n");
+				goto errout;
+			}
+
+			ref = ref->next;
+			/*
+			 * Load previous result into ARG3. Use
+			 * BPF_REG_FP instead of r7 because verifier
+			 * allows FP based addressing only.
+			 */
+			if (ref)
+				ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
+						BPF_REG_FP, stack_offset), pos);
+		}
+	}
+
+	/* Final pass: read to registers */
+	for (i = 0; i < nargs; i++) {
+		int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW;
+
+		pr_debug("prologue: load arg %d, insn_sz is %s\n",
+			 i, insn_sz_to_str(insn_sz));
+		ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i,
+				BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
+	}
+
+	ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
+
+	return check_pos(pos);
+errout:
+	return err;
+}
+
+static int
+prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
+		  struct bpf_insn *success_code, struct bpf_insn *user_code)
+{
+	struct bpf_insn *insn;
+
+	if (check_pos(pos))
+		return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+	for (insn = pos->begin; insn < pos->pos; insn++) {
+		struct bpf_insn *target;
+		u8 class = BPF_CLASS(insn->code);
+		u8 opcode;
+
+		if (class != BPF_JMP)
+			continue;
+		opcode = BPF_OP(insn->code);
+		if (opcode == BPF_CALL)
+			continue;
+
+		switch (insn->off) {
+		case JMP_TO_ERROR_CODE:
+			target = error_code;
+			break;
+		case JMP_TO_SUCCESS_CODE:
+			target = success_code;
+			break;
+		case JMP_TO_USER_CODE:
+			target = user_code;
+			break;
+		default:
+			pr_err("bpf prologue: internal error: relocation failed\n");
+			return -BPF_LOADER_ERRNO__PROLOGUE;
+		}
+
+		insn->off = target - (insn + 1);
+	}
+	return 0;
+}
+
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+		      struct bpf_insn *new_prog, size_t *new_cnt,
+		      size_t cnt_space)
+{
+	struct bpf_insn *success_code = NULL;
+	struct bpf_insn *error_code = NULL;
+	struct bpf_insn *user_code = NULL;
+	struct bpf_insn_pos pos;
+	bool fastpath = true;
+	int err = 0, i;
+
+	if (!new_prog || !new_cnt)
+		return -EINVAL;
+
+	if (cnt_space > BPF_MAXINSNS)
+		cnt_space = BPF_MAXINSNS;
+
+	pos.begin = new_prog;
+	pos.end = new_prog + cnt_space;
+	pos.pos = new_prog;
+
+	if (!nargs) {
+		ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
+		    &pos);
+
+		if (check_pos(&pos))
+			goto errout;
+
+		*new_cnt = pos_get_cnt(&pos);
+		return 0;
+	}
+
+	if (nargs > BPF_PROLOGUE_MAX_ARGS) {
+		pr_warning("bpf: prologue: %d arguments are dropped\n",
+			   nargs - BPF_PROLOGUE_MAX_ARGS);
+		nargs = BPF_PROLOGUE_MAX_ARGS;
+	}
+
+	/* First pass: validation */
+	for (i = 0; i < nargs; i++) {
+		struct probe_trace_arg_ref *ref = args[i].ref;
+
+		if (args[i].value[0] == '@') {
+			/* TODO: fetch global variable */
+			pr_err("bpf: prologue: global %s%+ld not support\n",
+				args[i].value, ref ? ref->offset : 0);
+			return -ENOTSUP;
+		}
+
+		while (ref) {
+			/* fastpath is true if all args has ref == NULL */
+			fastpath = false;
+
+			/*
+			 * Instruction encodes immediate value using
+			 * s32, ref->offset is long. On systems which
+			 * can't fill long in s32, refuse to process if
+			 * ref->offset too large (or small).
+			 */
+#ifdef __LP64__
+#define OFFSET_MAX	((1LL << 31) - 1)
+#define OFFSET_MIN	((1LL << 31) * -1)
+			if (ref->offset > OFFSET_MAX ||
+					ref->offset < OFFSET_MIN) {
+				pr_err("bpf: prologue: offset out of bound: %ld\n",
+				       ref->offset);
+				return -BPF_LOADER_ERRNO__PROLOGUEOOB;
+			}
+#endif
+			ref = ref->next;
+		}
+	}
+	pr_debug("prologue: pass validation\n");
+
+	if (fastpath) {
+		/* If all variables are registers... */
+		pr_debug("prologue: fast path\n");
+		err = gen_prologue_fastpath(&pos, args, nargs);
+		if (err)
+			goto errout;
+	} else {
+		pr_debug("prologue: slow path\n");
+
+		/* Initialization: move ctx to a callee saved register. */
+		ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
+
+		err = gen_prologue_slowpath(&pos, args, nargs);
+		if (err)
+			goto errout;
+		/*
+		 * start of ERROR_CODE (only slow pass needs error code)
+		 *   mov r2 <- 1  // r2 is error number
+		 *   mov r3 <- 0  // r3, r4... should be touched or
+		 *                // verifier would complain
+		 *   mov r4 <- 0
+		 *   ...
+		 *   goto usercode
+		 */
+		error_code = pos.pos;
+		ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
+		    &pos);
+
+		for (i = 0; i < nargs; i++)
+			ins(BPF_ALU64_IMM(BPF_MOV,
+					  BPF_PROLOGUE_START_ARG_REG + i,
+					  0),
+			    &pos);
+		ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
+				&pos);
+	}
+
+	/*
+	 * start of SUCCESS_CODE:
+	 *   mov r2 <- 0
+	 *   goto usercode  // skip
+	 */
+	success_code = pos.pos;
+	ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
+
+	/*
+	 * start of USER_CODE:
+	 *   Restore ctx to r1
+	 */
+	user_code = pos.pos;
+	if (!fastpath) {
+		/*
+		 * Only slow path needs restoring of ctx. In fast path,
+		 * register are loaded directly from r1.
+		 */
+		ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
+		err = prologue_relocate(&pos, error_code, success_code,
+					user_code);
+		if (err)
+			goto errout;
+	}
+
+	err = check_pos(&pos);
+	if (err)
+		goto errout;
+
+	*new_cnt = pos_get_cnt(&pos);
+	return 0;
+errout:
+	return err;
+}
diff --git a/util/bpf-prologue.h b/util/bpf-prologue.h
new file mode 100644
index 0000000..c50c735
--- /dev/null
+++ b/util/bpf-prologue.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_PROLOGUE_H
+#define __BPF_PROLOGUE_H
+
+#include <linux/compiler.h>
+#include <linux/filter.h>
+#include "probe-event.h"
+
+#define BPF_PROLOGUE_MAX_ARGS 3
+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
+
+#ifdef HAVE_BPF_PROLOGUE
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+		      struct bpf_insn *new_prog, size_t *new_cnt,
+		      size_t cnt_space);
+#else
+#include <errno.h>
+
+static inline int
+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
+		  int nargs __maybe_unused,
+		  struct bpf_insn *new_prog __maybe_unused,
+		  size_t *new_cnt,
+		  size_t cnt_space __maybe_unused)
+{
+	if (!new_cnt)
+		return -EINVAL;
+	*new_cnt = 0;
+	return -ENOTSUP;
+}
+#endif
+#endif /* __BPF_PROLOGUE_H */
diff --git a/util/branch.c b/util/branch.c
new file mode 100644
index 0000000..a4fce27
--- /dev/null
+++ b/util/branch.c
@@ -0,0 +1,147 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/branch.h"
+
+static bool cross_area(u64 addr1, u64 addr2, int size)
+{
+	u64 align1, align2;
+
+	align1 = addr1 & ~(size - 1);
+	align2 = addr2 & ~(size - 1);
+
+	return (align1 != align2) ? true : false;
+}
+
+#define AREA_4K		4096
+#define AREA_2M		(2 * 1024 * 1024)
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+		       u64 from, u64 to)
+{
+	if (flags->type == PERF_BR_UNKNOWN || from == 0)
+		return;
+
+	st->counts[flags->type]++;
+
+	if (flags->type == PERF_BR_COND) {
+		if (to > from)
+			st->cond_fwd++;
+		else
+			st->cond_bwd++;
+	}
+
+	if (cross_area(from, to, AREA_2M))
+		st->cross_2m++;
+	else if (cross_area(from, to, AREA_4K))
+		st->cross_4k++;
+}
+
+const char *branch_type_name(int type)
+{
+	const char *branch_names[PERF_BR_MAX] = {
+		"N/A",
+		"COND",
+		"UNCOND",
+		"IND",
+		"CALL",
+		"IND_CALL",
+		"RET",
+		"SYSCALL",
+		"SYSRET",
+		"COND_CALL",
+		"COND_RET"
+	};
+
+	if (type >= 0 && type < PERF_BR_MAX)
+		return branch_names[type];
+
+	return NULL;
+}
+
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
+{
+	u64 total = 0;
+	int i;
+
+	for (i = 0; i < PERF_BR_MAX; i++)
+		total += st->counts[i];
+
+	if (total == 0)
+		return;
+
+	fprintf(fp, "\n#");
+	fprintf(fp, "\n# Branch Statistics:");
+	fprintf(fp, "\n#");
+
+	if (st->cond_fwd > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"COND_FWD",
+			100.0 * (double)st->cond_fwd / (double)total);
+	}
+
+	if (st->cond_bwd > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"COND_BWD",
+			100.0 * (double)st->cond_bwd / (double)total);
+	}
+
+	if (st->cross_4k > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"CROSS_4K",
+			100.0 * (double)st->cross_4k / (double)total);
+	}
+
+	if (st->cross_2m > 0) {
+		fprintf(fp, "\n%8s: %5.1f%%",
+			"CROSS_2M",
+			100.0 * (double)st->cross_2m / (double)total);
+	}
+
+	for (i = 0; i < PERF_BR_MAX; i++) {
+		if (st->counts[i] > 0)
+			fprintf(fp, "\n%8s: %5.1f%%",
+				branch_type_name(i),
+				100.0 *
+				(double)st->counts[i] / (double)total);
+	}
+}
+
+static int count_str_scnprintf(int idx, const char *str, char *bf, int size)
+{
+	return scnprintf(bf, size, "%s%s", (idx) ? " " : " (", str);
+}
+
+int branch_type_str(struct branch_type_stat *st, char *bf, int size)
+{
+	int i, j = 0, printed = 0;
+	u64 total = 0;
+
+	for (i = 0; i < PERF_BR_MAX; i++)
+		total += st->counts[i];
+
+	if (total == 0)
+		return 0;
+
+	if (st->cond_fwd > 0)
+		printed += count_str_scnprintf(j++, "COND_FWD", bf + printed, size - printed);
+
+	if (st->cond_bwd > 0)
+		printed += count_str_scnprintf(j++, "COND_BWD", bf + printed, size - printed);
+
+	for (i = 0; i < PERF_BR_MAX; i++) {
+		if (i == PERF_BR_COND)
+			continue;
+
+		if (st->counts[i] > 0)
+			printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed);
+	}
+
+	if (st->cross_4k > 0)
+		printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed);
+
+	if (st->cross_2m > 0)
+		printed += count_str_scnprintf(j++, "CROSS_2M", bf + printed, size - printed);
+
+	return printed;
+}
diff --git a/util/branch.h b/util/branch.h
new file mode 100644
index 0000000..1e3c7c5
--- /dev/null
+++ b/util/branch.h
@@ -0,0 +1,25 @@
+#ifndef _PERF_BRANCH_H
+#define _PERF_BRANCH_H 1
+
+#include <stdint.h>
+#include "../perf.h"
+
+struct branch_type_stat {
+	bool	branch_to;
+	u64	counts[PERF_BR_MAX];
+	u64	cond_fwd;
+	u64	cond_bwd;
+	u64	cross_4k;
+	u64	cross_2m;
+};
+
+struct branch_flags;
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+		       u64 from, u64 to);
+
+const char *branch_type_name(int type);
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
+int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
+
+#endif /* _PERF_BRANCH_H */
diff --git a/util/build-id.c b/util/build-id.c
new file mode 100644
index 0000000..537eadd
--- /dev/null
+++ b/util/build-id.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * build-id.c
+ *
+ * build-id support
+ *
+ * Copyright (C) 2009, 2010 Red Hat Inc.
+ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "util.h"
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include "build-id.h"
+#include "event.h"
+#include "symbol.h"
+#include "thread.h"
+#include <linux/kernel.h>
+#include "debug.h"
+#include "session.h"
+#include "tool.h"
+#include "header.h"
+#include "vdso.h"
+#include "path.h"
+#include "probe-file.h"
+#include "strlist.h"
+
+#include "sane_ctype.h"
+
+static bool no_buildid_cache;
+
+int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+			   union perf_event *event,
+			   struct perf_sample *sample,
+			   struct perf_evsel *evsel __maybe_unused,
+			   struct machine *machine)
+{
+	struct addr_location al;
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
+
+	if (thread == NULL) {
+		pr_err("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
+
+	if (al.map != NULL)
+		al.map->dso->hit = 1;
+
+	thread__put(thread);
+	return 0;
+}
+
+static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event,
+				       struct perf_sample *sample
+				       __maybe_unused,
+				       struct machine *machine)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->fork.pid,
+							event->fork.tid);
+
+	dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+		    event->fork.ppid, event->fork.ptid);
+
+	if (thread) {
+		machine__remove_thread(machine, thread);
+		thread__put(thread);
+	}
+
+	return 0;
+}
+
+struct perf_tool build_id__mark_dso_hit_ops = {
+	.sample	= build_id__mark_dso_hit,
+	.mmap	= perf_event__process_mmap,
+	.mmap2	= perf_event__process_mmap2,
+	.fork	= perf_event__process_fork,
+	.exit	= perf_event__exit_del_thread,
+	.attr		 = perf_event__process_attr,
+	.build_id	 = perf_event__process_build_id,
+	.ordered_events	 = true,
+};
+
+int build_id__sprintf(const u8 *build_id, int len, char *bf)
+{
+	char *bid = bf;
+	const u8 *raw = build_id;
+	int i;
+
+	for (i = 0; i < len; ++i) {
+		sprintf(bid, "%02x", *raw);
+		++raw;
+		bid += 2;
+	}
+
+	return (bid - bf) + 1;
+}
+
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
+{
+	char notes[PATH_MAX];
+	u8 build_id[BUILD_ID_SIZE];
+	int ret;
+
+	if (!root_dir)
+		root_dir = "";
+
+	scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+	ret = sysfs__read_build_id(notes, build_id, sizeof(build_id));
+	if (ret < 0)
+		return ret;
+
+	return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+}
+
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
+{
+	u8 build_id[BUILD_ID_SIZE];
+	int ret;
+
+	ret = filename__read_build_id(pathname, build_id, sizeof(build_id));
+	if (ret < 0)
+		return ret;
+	else if (ret != sizeof(build_id))
+		return -EINVAL;
+
+	return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+}
+
+/* asnprintf consolidates asprintf and snprintf */
+static int asnprintf(char **strp, size_t size, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	if (!strp)
+		return -EINVAL;
+
+	va_start(ap, fmt);
+	if (*strp)
+		ret = vsnprintf(*strp, size, fmt, ap);
+	else
+		ret = vasprintf(strp, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+				    size_t size)
+{
+	bool retry_old = true;
+
+	snprintf(bf, size, "%s/%s/%s/kallsyms",
+		 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+	if (!access(bf, F_OK))
+		return bf;
+	if (retry_old) {
+		/* Try old style kallsyms cache */
+		snprintf(bf, size, "%s/%s/%s",
+			 buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+		retry_old = false;
+		goto retry;
+	}
+
+	return NULL;
+}
+
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size)
+{
+	char *tmp = bf;
+	int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
+			    sbuild_id, sbuild_id + 2);
+	if (ret < 0 || (tmp && size < (unsigned int)ret))
+		return NULL;
+	return bf;
+}
+
+char *build_id_cache__origname(const char *sbuild_id)
+{
+	char *linkname;
+	char buf[PATH_MAX];
+	char *ret = NULL, *p;
+	size_t offs = 5;	/* == strlen("../..") */
+	ssize_t len;
+
+	linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+	if (!linkname)
+		return NULL;
+
+	len = readlink(linkname, buf, sizeof(buf) - 1);
+	if (len <= 0)
+		goto out;
+	buf[len] = '\0';
+
+	/* The link should be "../..<origpath>/<sbuild_id>" */
+	p = strrchr(buf, '/');	/* Cut off the "/<sbuild_id>" */
+	if (p && (p > buf + offs)) {
+		*p = '\0';
+		if (buf[offs + 1] == '[')
+			offs++;	/*
+				 * This is a DSO name, like [kernel.kallsyms].
+				 * Skip the first '/', since this is not the
+				 * cache of a regular file.
+				 */
+		ret = strdup(buf + offs);	/* Skip "../..[/]" */
+	}
+out:
+	free(linkname);
+	return ret;
+}
+
+/* Check if the given build_id cache is valid on current running system */
+static bool build_id_cache__valid_id(char *sbuild_id)
+{
+	char real_sbuild_id[SBUILD_ID_SIZE] = "";
+	char *pathname;
+	int ret = 0;
+	bool result = false;
+
+	pathname = build_id_cache__origname(sbuild_id);
+	if (!pathname)
+		return false;
+
+	if (!strcmp(pathname, DSO__NAME_KALLSYMS))
+		ret = sysfs__sprintf_build_id("/", real_sbuild_id);
+	else if (pathname[0] == '/')
+		ret = filename__sprintf_build_id(pathname, real_sbuild_id);
+	else
+		ret = -EINVAL;	/* Should we support other special DSO cache? */
+	if (ret >= 0)
+		result = (strcmp(sbuild_id, real_sbuild_id) == 0);
+	free(pathname);
+
+	return result;
+}
+
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso,
+					    bool is_debug)
+{
+	return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : (is_debug ?
+	    "debug" : "elf"));
+}
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			     bool is_debug)
+{
+	bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+	bool is_vdso = dso__is_vdso((struct dso *)dso);
+	char sbuild_id[SBUILD_ID_SIZE];
+	char *linkname;
+	bool alloc = (bf == NULL);
+	int ret;
+
+	if (!dso->has_build_id)
+		return NULL;
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+	linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+	if (!linkname)
+		return NULL;
+
+	/* Check if old style build_id cache */
+	if (is_regular_file(linkname))
+		ret = asnprintf(&bf, size, "%s", linkname);
+	else
+		ret = asnprintf(&bf, size, "%s/%s", linkname,
+			 build_id_cache__basename(is_kallsyms, is_vdso,
+						  is_debug));
+	if (ret < 0 || (!alloc && size < (unsigned int)ret))
+		bf = NULL;
+	free(linkname);
+
+	return bf;
+}
+
+#define dsos__for_each_with_build_id(pos, head)	\
+	list_for_each_entry(pos, head, node)	\
+		if (!pos->has_build_id)		\
+			continue;		\
+		else
+
+static int write_buildid(const char *name, size_t name_len, u8 *build_id,
+			 pid_t pid, u16 misc, struct feat_fd *fd)
+{
+	int err;
+	struct build_id_event b;
+	size_t len;
+
+	len = name_len + 1;
+	len = PERF_ALIGN(len, NAME_ALIGN);
+
+	memset(&b, 0, sizeof(b));
+	memcpy(&b.build_id, build_id, BUILD_ID_SIZE);
+	b.pid = pid;
+	b.header.misc = misc;
+	b.header.size = sizeof(b) + len;
+
+	err = do_write(fd, &b, sizeof(b));
+	if (err < 0)
+		return err;
+
+	return write_padded(fd, name, name_len + 1, len);
+}
+
+static int machine__write_buildid_table(struct machine *machine,
+					struct feat_fd *fd)
+{
+	int err = 0;
+	struct dso *pos;
+	u16 kmisc = PERF_RECORD_MISC_KERNEL,
+	    umisc = PERF_RECORD_MISC_USER;
+
+	if (!machine__is_host(machine)) {
+		kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+		umisc = PERF_RECORD_MISC_GUEST_USER;
+	}
+
+	dsos__for_each_with_build_id(pos, &machine->dsos.head) {
+		const char *name;
+		size_t name_len;
+		bool in_kernel = false;
+
+		if (!pos->hit && !dso__is_vdso(pos))
+			continue;
+
+		if (dso__is_vdso(pos)) {
+			name = pos->short_name;
+			name_len = pos->short_name_len;
+		} else if (dso__is_kcore(pos)) {
+			name = machine->mmap_name;
+			name_len = strlen(name);
+		} else {
+			name = pos->long_name;
+			name_len = pos->long_name_len;
+		}
+
+		in_kernel = pos->kernel ||
+				is_kernel_module(name,
+					PERF_RECORD_MISC_CPUMODE_UNKNOWN);
+		err = write_buildid(name, name_len, pos->build_id, machine->pid,
+				    in_kernel ? kmisc : umisc, fd);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int perf_session__write_buildid_table(struct perf_session *session,
+				      struct feat_fd *fd)
+{
+	struct rb_node *nd;
+	int err = machine__write_buildid_table(&session->machines.host, fd);
+
+	if (err)
+		return err;
+
+	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		err = machine__write_buildid_table(pos, fd);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+static int __dsos__hit_all(struct list_head *head)
+{
+	struct dso *pos;
+
+	list_for_each_entry(pos, head, node)
+		pos->hit = true;
+
+	return 0;
+}
+
+static int machine__hit_all_dsos(struct machine *machine)
+{
+	return __dsos__hit_all(&machine->dsos.head);
+}
+
+int dsos__hit_all(struct perf_session *session)
+{
+	struct rb_node *nd;
+	int err;
+
+	err = machine__hit_all_dsos(&session->machines.host);
+	if (err)
+		return err;
+
+	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+		err = machine__hit_all_dsos(pos);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void disable_buildid_cache(void)
+{
+	no_buildid_cache = true;
+}
+
+static bool lsdir_bid_head_filter(const char *name __maybe_unused,
+				  struct dirent *d)
+{
+	return (strlen(d->d_name) == 2) &&
+		isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]);
+}
+
+static bool lsdir_bid_tail_filter(const char *name __maybe_unused,
+				  struct dirent *d)
+{
+	int i = 0;
+	while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3)
+		i++;
+	return (i == SBUILD_ID_SIZE - 3) && (d->d_name[i] == '\0');
+}
+
+struct strlist *build_id_cache__list_all(bool validonly)
+{
+	struct strlist *toplist, *linklist = NULL, *bidlist;
+	struct str_node *nd, *nd2;
+	char *topdir, *linkdir = NULL;
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	/* for filename__ functions */
+	if (validonly)
+		symbol__init(NULL);
+
+	/* Open the top-level directory */
+	if (asprintf(&topdir, "%s/.build-id/", buildid_dir) < 0)
+		return NULL;
+
+	bidlist = strlist__new(NULL, NULL);
+	if (!bidlist)
+		goto out;
+
+	toplist = lsdir(topdir, lsdir_bid_head_filter);
+	if (!toplist) {
+		pr_debug("Error in lsdir(%s): %d\n", topdir, errno);
+		/* If there is no buildid cache, return an empty list */
+		if (errno == ENOENT)
+			goto out;
+		goto err_out;
+	}
+
+	strlist__for_each_entry(nd, toplist) {
+		if (asprintf(&linkdir, "%s/%s", topdir, nd->s) < 0)
+			goto err_out;
+		/* Open the lower-level directory */
+		linklist = lsdir(linkdir, lsdir_bid_tail_filter);
+		if (!linklist) {
+			pr_debug("Error in lsdir(%s): %d\n", linkdir, errno);
+			goto err_out;
+		}
+		strlist__for_each_entry(nd2, linklist) {
+			if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s",
+				     nd->s, nd2->s) != SBUILD_ID_SIZE - 1)
+				goto err_out;
+			if (validonly && !build_id_cache__valid_id(sbuild_id))
+				continue;
+			if (strlist__add(bidlist, sbuild_id) < 0)
+				goto err_out;
+		}
+		strlist__delete(linklist);
+		zfree(&linkdir);
+	}
+
+out_free:
+	strlist__delete(toplist);
+out:
+	free(topdir);
+
+	return bidlist;
+
+err_out:
+	strlist__delete(linklist);
+	zfree(&linkdir);
+	strlist__delete(bidlist);
+	bidlist = NULL;
+	goto out_free;
+}
+
+static bool str_is_build_id(const char *maybe_sbuild_id, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (!isxdigit(maybe_sbuild_id[i]))
+			return false;
+	}
+	return true;
+}
+
+/* Return the valid complete build-id */
+char *build_id_cache__complement(const char *incomplete_sbuild_id)
+{
+	struct strlist *bidlist;
+	struct str_node *nd, *cand = NULL;
+	char *sbuild_id = NULL;
+	size_t len = strlen(incomplete_sbuild_id);
+
+	if (len >= SBUILD_ID_SIZE ||
+	    !str_is_build_id(incomplete_sbuild_id, len))
+		return NULL;
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist)
+		return NULL;
+
+	strlist__for_each_entry(nd, bidlist) {
+		if (strncmp(nd->s, incomplete_sbuild_id, len) != 0)
+			continue;
+		if (cand) {	/* Error: There are more than 2 candidates. */
+			cand = NULL;
+			break;
+		}
+		cand = nd;
+	}
+	if (cand)
+		sbuild_id = strdup(cand->s);
+	strlist__delete(bidlist);
+
+	return sbuild_id;
+}
+
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+			       struct nsinfo *nsi, bool is_kallsyms,
+			       bool is_vdso)
+{
+	char *realname = (char *)name, *filename;
+	bool slash = is_kallsyms || is_vdso;
+
+	if (!slash) {
+		realname = nsinfo__realpath(name, nsi);
+		if (!realname)
+			return NULL;
+	}
+
+	if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+		     is_vdso ? DSO__NAME_VDSO : realname,
+		     sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
+		filename = NULL;
+
+	if (!slash)
+		free(realname);
+
+	return filename;
+}
+
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+				   struct strlist **result)
+{
+	char *dir_name;
+	int ret = 0;
+
+	dir_name = build_id_cache__cachedir(NULL, pathname, nsi, false, false);
+	if (!dir_name)
+		return -ENOMEM;
+
+	*result = lsdir(dir_name, lsdir_no_dot_filter);
+	if (!*result)
+		ret = -errno;
+	free(dir_name);
+
+	return ret;
+}
+
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT)
+static int build_id_cache__add_sdt_cache(const char *sbuild_id,
+					  const char *realname,
+					  struct nsinfo *nsi)
+{
+	struct probe_cache *cache;
+	int ret;
+	struct nscookie nsc;
+
+	cache = probe_cache__new(sbuild_id, nsi);
+	if (!cache)
+		return -1;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = probe_cache__scan_sdt(cache, realname);
+	nsinfo__mountns_exit(&nsc);
+	if (ret >= 0) {
+		pr_debug4("Found %d SDTs in %s\n", ret, realname);
+		if (probe_cache__commit(cache) < 0)
+			ret = -1;
+	}
+	probe_cache__delete(cache);
+	return ret;
+}
+#else
+#define build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) (0)
+#endif
+
+static char *build_id_cache__find_debug(const char *sbuild_id,
+					struct nsinfo *nsi)
+{
+	char *realname = NULL;
+	char *debugfile;
+	struct nscookie nsc;
+	size_t len = 0;
+
+	debugfile = calloc(1, PATH_MAX);
+	if (!debugfile)
+		goto out;
+
+	len = __symbol__join_symfs(debugfile, PATH_MAX,
+				   "/usr/lib/debug/.build-id/");
+	snprintf(debugfile + len, PATH_MAX - len, "%.2s/%s.debug", sbuild_id,
+		 sbuild_id + 2);
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	realname = realpath(debugfile, NULL);
+	if (realname && access(realname, R_OK))
+		zfree(&realname);
+	nsinfo__mountns_exit(&nsc);
+out:
+	free(debugfile);
+	return realname;
+}
+
+int build_id_cache__add_s(const char *sbuild_id, const char *name,
+			  struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
+{
+	const size_t size = PATH_MAX;
+	char *realname = NULL, *filename = NULL, *dir_name = NULL,
+	     *linkname = zalloc(size), *tmp;
+	char *debugfile = NULL;
+	int err = -1;
+
+	if (!is_kallsyms) {
+		if (!is_vdso)
+			realname = nsinfo__realpath(name, nsi);
+		else
+			realname = realpath(name, NULL);
+		if (!realname)
+			goto out_free;
+	}
+
+	dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms,
+					    is_vdso);
+	if (!dir_name)
+		goto out_free;
+
+	/* Remove old style build-id cache */
+	if (is_regular_file(dir_name))
+		if (unlink(dir_name))
+			goto out_free;
+
+	if (mkdir_p(dir_name, 0755))
+		goto out_free;
+
+	/* Save the allocated buildid dirname */
+	if (asprintf(&filename, "%s/%s", dir_name,
+		     build_id_cache__basename(is_kallsyms, is_vdso,
+		     false)) < 0) {
+		filename = NULL;
+		goto out_free;
+	}
+
+	if (access(filename, F_OK)) {
+		if (is_kallsyms) {
+			if (copyfile("/proc/kallsyms", filename))
+				goto out_free;
+		} else if (nsi && nsi->need_setns) {
+			if (copyfile_ns(name, filename, nsi))
+				goto out_free;
+		} else if (link(realname, filename) && errno != EEXIST &&
+				copyfile(name, filename))
+			goto out_free;
+	}
+
+	/* Some binaries are stripped, but have .debug files with their symbol
+	 * table.  Check to see if we can locate one of those, since the elf
+	 * file itself may not be very useful to users of our tools without a
+	 * symtab.
+	 */
+	if (!is_kallsyms && !is_vdso &&
+	    strncmp(".ko", name + strlen(name) - 3, 3)) {
+		debugfile = build_id_cache__find_debug(sbuild_id, nsi);
+		if (debugfile) {
+			zfree(&filename);
+			if (asprintf(&filename, "%s/%s", dir_name,
+			    build_id_cache__basename(false, false, true)) < 0) {
+				filename = NULL;
+				goto out_free;
+			}
+			if (access(filename, F_OK)) {
+				if (nsi && nsi->need_setns) {
+					if (copyfile_ns(debugfile, filename,
+							nsi))
+						goto out_free;
+				} else if (link(debugfile, filename) &&
+						errno != EEXIST &&
+						copyfile(debugfile, filename))
+					goto out_free;
+			}
+		}
+	}
+
+	if (!build_id_cache__linkname(sbuild_id, linkname, size))
+		goto out_free;
+	tmp = strrchr(linkname, '/');
+	*tmp = '\0';
+
+	if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+		goto out_free;
+
+	*tmp = '/';
+	tmp = dir_name + strlen(buildid_dir) - 5;
+	memcpy(tmp, "../..", 5);
+
+	if (symlink(tmp, linkname) == 0)
+		err = 0;
+
+	/* Update SDT cache : error is just warned */
+	if (realname &&
+	    build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) < 0)
+		pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
+
+out_free:
+	if (!is_kallsyms)
+		free(realname);
+	free(filename);
+	free(debugfile);
+	free(dir_name);
+	free(linkname);
+	return err;
+}
+
+static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+				 const char *name, struct nsinfo *nsi,
+				 bool is_kallsyms, bool is_vdso)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	build_id__sprintf(build_id, build_id_size, sbuild_id);
+
+	return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
+				     is_vdso);
+}
+
+bool build_id_cache__cached(const char *sbuild_id)
+{
+	bool ret = false;
+	char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
+
+	if (filename && !access(filename, F_OK))
+		ret = true;
+	free(filename);
+
+	return ret;
+}
+
+int build_id_cache__remove_s(const char *sbuild_id)
+{
+	const size_t size = PATH_MAX;
+	char *filename = zalloc(size),
+	     *linkname = zalloc(size), *tmp;
+	int err = -1;
+
+	if (filename == NULL || linkname == NULL)
+		goto out_free;
+
+	if (!build_id_cache__linkname(sbuild_id, linkname, size))
+		goto out_free;
+
+	if (access(linkname, F_OK))
+		goto out_free;
+
+	if (readlink(linkname, filename, size - 1) < 0)
+		goto out_free;
+
+	if (unlink(linkname))
+		goto out_free;
+
+	/*
+	 * Since the link is relative, we must make it absolute:
+	 */
+	tmp = strrchr(linkname, '/') + 1;
+	snprintf(tmp, size - (tmp - linkname), "%s", filename);
+
+	if (rm_rf(linkname))
+		goto out_free;
+
+	err = 0;
+out_free:
+	free(filename);
+	free(linkname);
+	return err;
+}
+
+static int dso__cache_build_id(struct dso *dso, struct machine *machine)
+{
+	bool is_kallsyms = dso__is_kallsyms(dso);
+	bool is_vdso = dso__is_vdso(dso);
+	const char *name = dso->long_name;
+
+	if (dso__is_kcore(dso)) {
+		is_kallsyms = true;
+		name = machine->mmap_name;
+	}
+	return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
+				     dso->nsinfo, is_kallsyms, is_vdso);
+}
+
+static int __dsos__cache_build_ids(struct list_head *head,
+				   struct machine *machine)
+{
+	struct dso *pos;
+	int err = 0;
+
+	dsos__for_each_with_build_id(pos, head)
+		if (dso__cache_build_id(pos, machine))
+			err = -1;
+
+	return err;
+}
+
+static int machine__cache_build_ids(struct machine *machine)
+{
+	return __dsos__cache_build_ids(&machine->dsos.head, machine);
+}
+
+int perf_session__cache_build_ids(struct perf_session *session)
+{
+	struct rb_node *nd;
+	int ret;
+
+	if (no_buildid_cache)
+		return 0;
+
+	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
+		return -1;
+
+	ret = machine__cache_build_ids(&session->machines.host);
+
+	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret |= machine__cache_build_ids(pos);
+	}
+	return ret ? -1 : 0;
+}
+
+static bool machine__read_build_ids(struct machine *machine, bool with_hits)
+{
+	return __dsos__read_build_ids(&machine->dsos.head, with_hits);
+}
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
+{
+	struct rb_node *nd;
+	bool ret = machine__read_build_ids(&session->machines.host, with_hits);
+
+	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret |= machine__read_build_ids(pos, with_hits);
+	}
+
+	return ret;
+}
diff --git a/util/build-id.h b/util/build-id.h
new file mode 100644
index 0000000..f0c5651
--- /dev/null
+++ b/util/build-id.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_BUILD_ID_H_
+#define PERF_BUILD_ID_H_ 1
+
+#define BUILD_ID_SIZE	20
+#define SBUILD_ID_SIZE	(BUILD_ID_SIZE * 2 + 1)
+
+#include "tool.h"
+#include "namespaces.h"
+#include <linux/types.h>
+
+extern struct perf_tool build_id__mark_dso_hit_ops;
+struct dso;
+struct feat_fd;
+
+int build_id__sprintf(const u8 *build_id, int len, char *bf);
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+				    size_t size);
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			     bool is_debug);
+
+int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+			   struct perf_sample *sample, struct perf_evsel *evsel,
+			   struct machine *machine);
+
+int dsos__hit_all(struct perf_session *session);
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
+int perf_session__write_buildid_table(struct perf_session *session,
+				      struct feat_fd *fd);
+int perf_session__cache_build_ids(struct perf_session *session);
+
+char *build_id_cache__origname(const char *sbuild_id);
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+			       struct nsinfo *nsi, bool is_kallsyms,
+			       bool is_vdso);
+
+struct strlist;
+
+struct strlist *build_id_cache__list_all(bool validonly);
+char *build_id_cache__complement(const char *incomplete_sbuild_id);
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+				   struct strlist **result);
+bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add_s(const char *sbuild_id,
+			  const char *name, struct nsinfo *nsi,
+			  bool is_kallsyms, bool is_vdso);
+int build_id_cache__remove_s(const char *sbuild_id);
+
+extern char buildid_dir[];
+
+void set_buildid_dir(const char *dir);
+void disable_buildid_cache(void);
+
+#endif
diff --git a/util/c++/Build b/util/c++/Build
new file mode 100644
index 0000000..988fef1
--- /dev/null
+++ b/util/c++/Build
@@ -0,0 +1,2 @@
+libperf-$(CONFIG_CLANGLLVM) += clang.o
+libperf-$(CONFIG_CLANGLLVM) += clang-test.o
diff --git a/util/c++/clang-c.h b/util/c++/clang-c.h
new file mode 100644
index 0000000..e513366
--- /dev/null
+++ b/util/c++/clang-c.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_CLANG_C_H
+#define PERF_UTIL_CLANG_C_H
+
+#include <stddef.h>	/* for size_t */
+#include <util-cxx.h>	/* for __maybe_unused */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_LIBCLANGLLVM_SUPPORT
+extern void perf_clang__init(void);
+extern void perf_clang__cleanup(void);
+
+extern int test__clang_to_IR(void);
+extern int test__clang_to_obj(void);
+
+extern int perf_clang__compile_bpf(const char *filename,
+				   void **p_obj_buf,
+				   size_t *p_obj_buf_sz);
+#else
+
+#include <errno.h>
+
+static inline void perf_clang__init(void) { }
+static inline void perf_clang__cleanup(void) { }
+
+static inline int test__clang_to_IR(void) { return -1; }
+static inline int test__clang_to_obj(void) { return -1;}
+
+static inline int
+perf_clang__compile_bpf(const char *filename __maybe_unused,
+			void **p_obj_buf __maybe_unused,
+			size_t *p_obj_buf_sz __maybe_unused)
+{
+	return -ENOTSUP;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/c++/clang-test.cpp b/util/c++/clang-test.cpp
new file mode 100644
index 0000000..7b042a5
--- /dev/null
+++ b/util/c++/clang-test.cpp
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "clang.h"
+#include "clang-c.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <util-cxx.h>
+#include <tests/llvm.h>
+#include <string>
+
+class perf_clang_scope {
+public:
+	explicit perf_clang_scope() {perf_clang__init();}
+	~perf_clang_scope() {perf_clang__cleanup();}
+};
+
+static std::unique_ptr<llvm::Module>
+__test__clang_to_IR(void)
+{
+	unsigned int kernel_version;
+
+	if (fetch_kernel_version(&kernel_version, NULL, 0))
+		return std::unique_ptr<llvm::Module>(nullptr);
+
+	std::string cflag_kver("-DLINUX_VERSION_CODE=" +
+				std::to_string(kernel_version));
+
+	std::unique_ptr<llvm::Module> M =
+		perf::getModuleFromSource({cflag_kver.c_str()},
+					  "perf-test.c",
+					  test_llvm__bpf_base_prog);
+	return M;
+}
+
+extern "C" {
+int test__clang_to_IR(void)
+{
+	perf_clang_scope _scope;
+
+	auto M = __test__clang_to_IR();
+	if (!M)
+		return -1;
+	for (llvm::Function& F : *M)
+		if (F.getName() == "bpf_func__SyS_epoll_pwait")
+			return 0;
+	return -1;
+}
+
+int test__clang_to_obj(void)
+{
+	perf_clang_scope _scope;
+
+	auto M = __test__clang_to_IR();
+	if (!M)
+		return -1;
+
+	auto Buffer = perf::getBPFObjectFromModule(&*M);
+	if (!Buffer)
+		return -1;
+	return 0;
+}
+
+}
diff --git a/util/c++/clang.cpp b/util/c++/clang.cpp
new file mode 100644
index 0000000..bf31cea
--- /dev/null
+++ b/util/c++/clang.cpp
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * llvm C frontend for perf. Support dynamically compile C file
+ *
+ * Inspired by clang example code:
+ * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include "clang/Basic/Version.h"
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <memory>
+
+#include "clang.h"
+#include "clang-c.h"
+
+namespace perf {
+
+static std::unique_ptr<llvm::LLVMContext> LLVMCtx;
+
+using namespace clang;
+
+static CompilerInvocation *
+createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path,
+			 DiagnosticsEngine& Diags)
+{
+	llvm::opt::ArgStringList CCArgs {
+		"-cc1",
+		"-triple", "bpf-pc-linux",
+		"-fsyntax-only",
+		"-ferror-limit", "19",
+		"-fmessage-length", "127",
+		"-O2",
+		"-nostdsysteminc",
+		"-nobuiltininc",
+		"-vectorize-loops",
+		"-vectorize-slp",
+		"-Wno-unused-value",
+		"-Wno-pointer-sign",
+		"-x", "c"};
+
+	CCArgs.append(CFlags.begin(), CFlags.end());
+	CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs);
+
+	FrontendOptions& Opts = CI->getFrontendOpts();
+	Opts.Inputs.clear();
+	Opts.Inputs.emplace_back(Path,
+			FrontendOptions::getInputKindForExtension("c"));
+	return CI;
+}
+
+static std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags,
+		    StringRef Path, IntrusiveRefCntPtr<vfs::FileSystem> VFS)
+{
+	CompilerInstance Clang;
+	Clang.createDiagnostics();
+
+	Clang.setVirtualFileSystem(&*VFS);
+
+#if CLANG_VERSION_MAJOR < 4
+	IntrusiveRefCntPtr<CompilerInvocation> CI =
+		createCompilerInvocation(std::move(CFlags), Path,
+					 Clang.getDiagnostics());
+	Clang.setInvocation(&*CI);
+#else
+	std::shared_ptr<CompilerInvocation> CI(
+		createCompilerInvocation(std::move(CFlags), Path,
+					 Clang.getDiagnostics()));
+	Clang.setInvocation(CI);
+#endif
+
+	std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx));
+	if (!Clang.ExecuteAction(*Act))
+		return std::unique_ptr<llvm::Module>(nullptr);
+
+	return Act->takeModule();
+}
+
+std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags,
+		    StringRef Name, StringRef Content)
+{
+	using namespace vfs;
+
+	llvm::IntrusiveRefCntPtr<OverlayFileSystem> OverlayFS(
+			new OverlayFileSystem(getRealFileSystem()));
+	llvm::IntrusiveRefCntPtr<InMemoryFileSystem> MemFS(
+			new InMemoryFileSystem(true));
+
+	/*
+	 * pushOverlay helps setting working dir for MemFS. Must call
+	 * before addFile.
+	 */
+	OverlayFS->pushOverlay(MemFS);
+	MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content));
+
+	return getModuleFromSource(std::move(CFlags), Name, OverlayFS);
+}
+
+std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path)
+{
+	IntrusiveRefCntPtr<vfs::FileSystem> VFS(vfs::getRealFileSystem());
+	return getModuleFromSource(std::move(CFlags), Path, VFS);
+}
+
+std::unique_ptr<llvm::SmallVectorImpl<char>>
+getBPFObjectFromModule(llvm::Module *Module)
+{
+	using namespace llvm;
+
+	std::string TargetTriple("bpf-pc-linux");
+	std::string Error;
+	const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error);
+	if (!Target) {
+		llvm::errs() << Error;
+		return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);
+	}
+
+	llvm::TargetOptions Opt;
+	TargetMachine *TargetMachine =
+		Target->createTargetMachine(TargetTriple,
+					    "generic", "",
+					    Opt, Reloc::Static);
+
+	Module->setDataLayout(TargetMachine->createDataLayout());
+	Module->setTargetTriple(TargetTriple);
+
+	std::unique_ptr<SmallVectorImpl<char>> Buffer(new SmallVector<char, 0>());
+	raw_svector_ostream ostream(*Buffer);
+
+	legacy::PassManager PM;
+	if (TargetMachine->addPassesToEmitFile(PM, ostream,
+					       TargetMachine::CGFT_ObjectFile)) {
+		llvm::errs() << "TargetMachine can't emit a file of this type\n";
+		return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);;
+	}
+	PM.run(*Module);
+
+	return std::move(Buffer);
+}
+
+}
+
+extern "C" {
+void perf_clang__init(void)
+{
+	perf::LLVMCtx.reset(new llvm::LLVMContext());
+	LLVMInitializeBPFTargetInfo();
+	LLVMInitializeBPFTarget();
+	LLVMInitializeBPFTargetMC();
+	LLVMInitializeBPFAsmPrinter();
+}
+
+void perf_clang__cleanup(void)
+{
+	perf::LLVMCtx.reset(nullptr);
+	llvm::llvm_shutdown();
+}
+
+int perf_clang__compile_bpf(const char *filename,
+			    void **p_obj_buf,
+			    size_t *p_obj_buf_sz)
+{
+	using namespace perf;
+
+	if (!p_obj_buf || !p_obj_buf_sz)
+		return -EINVAL;
+
+	llvm::opt::ArgStringList CFlags;
+	auto M = getModuleFromSource(std::move(CFlags), filename);
+	if (!M)
+		return  -EINVAL;
+	auto O = getBPFObjectFromModule(&*M);
+	if (!O)
+		return -EINVAL;
+
+	size_t size = O->size_in_bytes();
+	void *buffer;
+
+	buffer = malloc(size);
+	if (!buffer)
+		return -ENOMEM;
+	memcpy(buffer, O->data(), size);
+	*p_obj_buf = buffer;
+	*p_obj_buf_sz = size;
+	return 0;
+}
+}
diff --git a/util/c++/clang.h b/util/c++/clang.h
new file mode 100644
index 0000000..6ce33e2
--- /dev/null
+++ b/util/c++/clang.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_CLANG_H
+#define PERF_UTIL_CLANG_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Option/Option.h"
+#include <memory>
+
+namespace perf {
+
+using namespace llvm;
+
+std::unique_ptr<Module>
+getModuleFromSource(opt::ArgStringList CFlags,
+		    StringRef Name, StringRef Content);
+
+std::unique_ptr<Module>
+getModuleFromSource(opt::ArgStringList CFlags,
+		    StringRef Path);
+
+std::unique_ptr<llvm::SmallVectorImpl<char>>
+getBPFObjectFromModule(llvm::Module *Module);
+
+}
+#endif
diff --git a/util/cache.h b/util/cache.h
new file mode 100644
index 0000000..9f2e36e
--- /dev/null
+++ b/util/cache.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CACHE_H
+#define __PERF_CACHE_H
+
+#include "strbuf.h"
+#include <subcmd/pager.h>
+#include "../ui/ui.h"
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+#define CMD_EXEC_PATH "--exec-path"
+#define CMD_DEBUGFS_DIR "--debugfs-dir="
+
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
+#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
+
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+static inline int is_absolute_path(const char *path)
+{
+	return path[0] == '/';
+}
+
+char *mkpath(const char *fmt, ...) __printf(1, 2);
+
+#endif /* __PERF_CACHE_H */
diff --git a/util/call-path.c b/util/call-path.c
new file mode 100644
index 0000000..904a170
--- /dev/null
+++ b/util/call-path.c
@@ -0,0 +1,122 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+#include "util.h"
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+			    struct symbol *sym, u64 ip, bool in_kernel)
+{
+	cp->parent = parent;
+	cp->sym = sym;
+	cp->ip = sym ? 0 : ip;
+	cp->db_id = 0;
+	cp->in_kernel = in_kernel;
+	RB_CLEAR_NODE(&cp->rb_node);
+	cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+	struct call_path_root *cpr;
+
+	cpr = zalloc(sizeof(struct call_path_root));
+	if (!cpr)
+		return NULL;
+	call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+	INIT_LIST_HEAD(&cpr->blocks);
+	return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+	struct call_path_block *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+		list_del(&pos->node);
+		free(pos);
+	}
+	free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+					struct call_path *parent,
+					struct symbol *sym, u64 ip,
+					bool in_kernel)
+{
+	struct call_path_block *cpb;
+	struct call_path *cp;
+	size_t n;
+
+	if (cpr->next < cpr->sz) {
+		cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+				      node);
+	} else {
+		cpb = zalloc(sizeof(struct call_path_block));
+		if (!cpb)
+			return NULL;
+		list_add_tail(&cpb->node, &cpr->blocks);
+		cpr->sz += CALL_PATH_BLOCK_SIZE;
+	}
+
+	n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+	cp = &cpb->cp[n];
+
+	call_path__init(cp, parent, sym, ip, in_kernel);
+
+	return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+				     struct call_path *parent,
+				     struct symbol *sym, u64 ip, u64 ks)
+{
+	struct rb_node **p;
+	struct rb_node *node_parent = NULL;
+	struct call_path *cp;
+	bool in_kernel = ip >= ks;
+
+	if (sym)
+		ip = 0;
+
+	if (!parent)
+		return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+	p = &parent->children.rb_node;
+	while (*p != NULL) {
+		node_parent = *p;
+		cp = rb_entry(node_parent, struct call_path, rb_node);
+
+		if (cp->sym == sym && cp->ip == ip)
+			return cp;
+
+		if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+	if (!cp)
+		return NULL;
+
+	rb_link_node(&cp->rb_node, node_parent, p);
+	rb_insert_color(&cp->rb_node, &parent->children);
+
+	return cp;
+}
diff --git a/util/call-path.h b/util/call-path.h
new file mode 100644
index 0000000..477f6d0
--- /dev/null
+++ b/util/call-path.h
@@ -0,0 +1,77 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitve call-graph.
+ */
+struct call_path {
+	struct call_path *parent;
+	struct symbol *sym;
+	u64 ip;
+	u64 db_id;
+	bool in_kernel;
+	struct rb_node rb_node;
+	struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+	struct call_path cp[CALL_PATH_BLOCK_SIZE];
+	struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+	struct call_path call_path;
+	struct list_head blocks;
+	size_t next;
+	size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+				     struct call_path *parent,
+				     struct symbol *sym, u64 ip, u64 ks);
+
+#endif
diff --git a/util/callchain.c b/util/callchain.c
new file mode 100644
index 0000000..32ef7bd
--- /dev/null
+++ b/util/callchain.c
@@ -0,0 +1,1571 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Handle the callchains from the stream in an ad-hoc radix tree and then
+ * sort them in an rbtree.
+ *
+ * Using a radix for code path provides a fast retrieval and factorizes
+ * memory use. Also that lets us use the paths in a hierarchical graph view.
+ *
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <math.h>
+
+#include "asm/bug.h"
+
+#include "hist.h"
+#include "util.h"
+#include "sort.h"
+#include "machine.h"
+#include "callchain.h"
+#include "branch.h"
+
+#define CALLCHAIN_PARAM_DEFAULT			\
+	.mode		= CHAIN_GRAPH_ABS,	\
+	.min_percent	= 0.5,			\
+	.order		= ORDER_CALLEE,		\
+	.key		= CCKEY_FUNCTION,	\
+	.value		= CCVAL_PERCENT,	\
+
+struct callchain_param callchain_param = {
+	CALLCHAIN_PARAM_DEFAULT
+};
+
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
+struct callchain_param callchain_param_default = {
+	CALLCHAIN_PARAM_DEFAULT
+};
+
+__thread struct callchain_cursor callchain_cursor;
+
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
+{
+	return parse_callchain_record(arg, param);
+}
+
+static int parse_callchain_mode(const char *value)
+{
+	if (!strncmp(value, "graph", strlen(value))) {
+		callchain_param.mode = CHAIN_GRAPH_ABS;
+		return 0;
+	}
+	if (!strncmp(value, "flat", strlen(value))) {
+		callchain_param.mode = CHAIN_FLAT;
+		return 0;
+	}
+	if (!strncmp(value, "fractal", strlen(value))) {
+		callchain_param.mode = CHAIN_GRAPH_REL;
+		return 0;
+	}
+	if (!strncmp(value, "folded", strlen(value))) {
+		callchain_param.mode = CHAIN_FOLDED;
+		return 0;
+	}
+	return -1;
+}
+
+static int parse_callchain_order(const char *value)
+{
+	if (!strncmp(value, "caller", strlen(value))) {
+		callchain_param.order = ORDER_CALLER;
+		callchain_param.order_set = true;
+		return 0;
+	}
+	if (!strncmp(value, "callee", strlen(value))) {
+		callchain_param.order = ORDER_CALLEE;
+		callchain_param.order_set = true;
+		return 0;
+	}
+	return -1;
+}
+
+static int parse_callchain_sort_key(const char *value)
+{
+	if (!strncmp(value, "function", strlen(value))) {
+		callchain_param.key = CCKEY_FUNCTION;
+		return 0;
+	}
+	if (!strncmp(value, "address", strlen(value))) {
+		callchain_param.key = CCKEY_ADDRESS;
+		return 0;
+	}
+	if (!strncmp(value, "srcline", strlen(value))) {
+		callchain_param.key = CCKEY_SRCLINE;
+		return 0;
+	}
+	if (!strncmp(value, "branch", strlen(value))) {
+		callchain_param.branch_callstack = 1;
+		return 0;
+	}
+	return -1;
+}
+
+static int parse_callchain_value(const char *value)
+{
+	if (!strncmp(value, "percent", strlen(value))) {
+		callchain_param.value = CCVAL_PERCENT;
+		return 0;
+	}
+	if (!strncmp(value, "period", strlen(value))) {
+		callchain_param.value = CCVAL_PERIOD;
+		return 0;
+	}
+	if (!strncmp(value, "count", strlen(value))) {
+		callchain_param.value = CCVAL_COUNT;
+		return 0;
+	}
+	return -1;
+}
+
+static int get_stack_size(const char *str, unsigned long *_size)
+{
+	char *endptr;
+	unsigned long size;
+	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
+
+	size = strtoul(str, &endptr, 0);
+
+	do {
+		if (*endptr)
+			break;
+
+		size = round_up(size, sizeof(u64));
+		if (!size || size > max_size)
+			break;
+
+		*_size = size;
+		return 0;
+
+	} while (0);
+
+	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
+	       max_size, str);
+	return -1;
+}
+
+static int
+__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
+{
+	char *tok;
+	char *endptr, *saveptr = NULL;
+	bool minpcnt_set = false;
+	bool record_opt_set = false;
+	bool try_stack_size = false;
+
+	callchain_param.enabled = true;
+	symbol_conf.use_callchain = true;
+
+	if (!arg)
+		return 0;
+
+	while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+		if (!strncmp(tok, "none", strlen(tok))) {
+			callchain_param.mode = CHAIN_NONE;
+			callchain_param.enabled = false;
+			symbol_conf.use_callchain = false;
+			return 0;
+		}
+
+		if (!parse_callchain_mode(tok) ||
+		    !parse_callchain_order(tok) ||
+		    !parse_callchain_sort_key(tok) ||
+		    !parse_callchain_value(tok)) {
+			/* parsing ok - move on to the next */
+			try_stack_size = false;
+			goto next;
+		} else if (allow_record_opt && !record_opt_set) {
+			if (parse_callchain_record(tok, &callchain_param))
+				goto try_numbers;
+
+			/* assume that number followed by 'dwarf' is stack size */
+			if (callchain_param.record_mode == CALLCHAIN_DWARF)
+				try_stack_size = true;
+
+			record_opt_set = true;
+			goto next;
+		}
+
+try_numbers:
+		if (try_stack_size) {
+			unsigned long size = 0;
+
+			if (get_stack_size(tok, &size) < 0)
+				return -1;
+			callchain_param.dump_size = size;
+			try_stack_size = false;
+		} else if (!minpcnt_set) {
+			/* try to get the min percent */
+			callchain_param.min_percent = strtod(tok, &endptr);
+			if (tok == endptr)
+				return -1;
+			minpcnt_set = true;
+		} else {
+			/* try print limit at last */
+			callchain_param.print_limit = strtoul(tok, &endptr, 0);
+			if (tok == endptr)
+				return -1;
+		}
+next:
+		arg = NULL;
+	}
+
+	if (callchain_register_param(&callchain_param) < 0) {
+		pr_err("Can't register callchain params\n");
+		return -1;
+	}
+	return 0;
+}
+
+int parse_callchain_report_opt(const char *arg)
+{
+	return __parse_callchain_report_opt(arg, false);
+}
+
+int parse_callchain_top_opt(const char *arg)
+{
+	return __parse_callchain_report_opt(arg, true);
+}
+
+int parse_callchain_record(const char *arg, struct callchain_param *param)
+{
+	char *tok, *name, *saveptr = NULL;
+	char *buf;
+	int ret = -1;
+
+	/* We need buffer that we know we can write to. */
+	buf = malloc(strlen(arg) + 1);
+	if (!buf)
+		return -ENOMEM;
+
+	strcpy(buf, arg);
+
+	tok = strtok_r((char *)buf, ",", &saveptr);
+	name = tok ? : (char *)buf;
+
+	do {
+		/* Framepointer style */
+		if (!strncmp(name, "fp", sizeof("fp"))) {
+			if (!strtok_r(NULL, ",", &saveptr)) {
+				param->record_mode = CALLCHAIN_FP;
+				ret = 0;
+			} else
+				pr_err("callchain: No more arguments "
+				       "needed for --call-graph fp\n");
+			break;
+
+		/* Dwarf style */
+		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
+			const unsigned long default_stack_dump_size = 8192;
+
+			ret = 0;
+			param->record_mode = CALLCHAIN_DWARF;
+			param->dump_size = default_stack_dump_size;
+			dwarf_callchain_users = true;
+
+			tok = strtok_r(NULL, ",", &saveptr);
+			if (tok) {
+				unsigned long size = 0;
+
+				ret = get_stack_size(tok, &size);
+				param->dump_size = size;
+			}
+		} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+			if (!strtok_r(NULL, ",", &saveptr)) {
+				param->record_mode = CALLCHAIN_LBR;
+				ret = 0;
+			} else
+				pr_err("callchain: No more arguments "
+					"needed for --call-graph lbr\n");
+			break;
+		} else {
+			pr_err("callchain: Unknown --call-graph option "
+			       "value: %s\n", arg);
+			break;
+		}
+
+	} while (0);
+
+	free(buf);
+	return ret;
+}
+
+int perf_callchain_config(const char *var, const char *value)
+{
+	char *endptr;
+
+	if (!strstarts(var, "call-graph."))
+		return 0;
+	var += sizeof("call-graph.") - 1;
+
+	if (!strcmp(var, "record-mode"))
+		return parse_callchain_record_opt(value, &callchain_param);
+	if (!strcmp(var, "dump-size")) {
+		unsigned long size = 0;
+		int ret;
+
+		ret = get_stack_size(value, &size);
+		callchain_param.dump_size = size;
+
+		return ret;
+	}
+	if (!strcmp(var, "print-type")){
+		int ret;
+		ret = parse_callchain_mode(value);
+		if (ret == -1)
+			pr_err("Invalid callchain mode: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "order")){
+		int ret;
+		ret = parse_callchain_order(value);
+		if (ret == -1)
+			pr_err("Invalid callchain order: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "sort-key")){
+		int ret;
+		ret = parse_callchain_sort_key(value);
+		if (ret == -1)
+			pr_err("Invalid callchain sort key: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "threshold")) {
+		callchain_param.min_percent = strtod(value, &endptr);
+		if (value == endptr) {
+			pr_err("Invalid callchain threshold: %s\n", value);
+			return -1;
+		}
+	}
+	if (!strcmp(var, "print-limit")) {
+		callchain_param.print_limit = strtod(value, &endptr);
+		if (value == endptr) {
+			pr_err("Invalid callchain print limit: %s\n", value);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static void
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+		    enum chain_mode mode)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct callchain_node *rnode;
+	u64 chain_cumul = callchain_cumul_hits(chain);
+
+	while (*p) {
+		u64 rnode_cumul;
+
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node);
+		rnode_cumul = callchain_cumul_hits(rnode);
+
+		switch (mode) {
+		case CHAIN_FLAT:
+		case CHAIN_FOLDED:
+			if (rnode->hit < chain->hit)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		case CHAIN_GRAPH_ABS: /* Falldown */
+		case CHAIN_GRAPH_REL:
+			if (rnode_cumul < chain_cumul)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			break;
+		case CHAIN_NONE:
+		default:
+			break;
+		}
+	}
+
+	rb_link_node(&chain->rb_node, parent, p);
+	rb_insert_color(&chain->rb_node, root);
+}
+
+static void
+__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
+		  u64 min_hit)
+{
+	struct rb_node *n;
+	struct callchain_node *child;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		__sort_chain_flat(rb_root, child, min_hit);
+	}
+
+	if (node->hit && node->hit >= min_hit)
+		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
+}
+
+/*
+ * Once we get every callchains from the stream, we can now
+ * sort them by hit
+ */
+static void
+sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
+		u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+	*rb_root = RB_ROOT;
+	__sort_chain_flat(rb_root, &root->node, min_hit);
+}
+
+static void __sort_chain_graph_abs(struct callchain_node *node,
+				   u64 min_hit)
+{
+	struct rb_node *n;
+	struct callchain_node *child;
+
+	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		__sort_chain_graph_abs(child, min_hit);
+		if (callchain_cumul_hits(child) >= min_hit)
+			rb_insert_callchain(&node->rb_root, child,
+					    CHAIN_GRAPH_ABS);
+	}
+}
+
+static void
+sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
+		     u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+	__sort_chain_graph_abs(&chain_root->node, min_hit);
+	rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+static void __sort_chain_graph_rel(struct callchain_node *node,
+				   double min_percent)
+{
+	struct rb_node *n;
+	struct callchain_node *child;
+	u64 min_hit;
+
+	node->rb_root = RB_ROOT;
+	min_hit = ceil(node->children_hit * min_percent);
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		__sort_chain_graph_rel(child, min_percent);
+		if (callchain_cumul_hits(child) >= min_hit)
+			rb_insert_callchain(&node->rb_root, child,
+					    CHAIN_GRAPH_REL);
+	}
+}
+
+static void
+sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
+		     u64 min_hit __maybe_unused, struct callchain_param *param)
+{
+	__sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
+	rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+int callchain_register_param(struct callchain_param *param)
+{
+	switch (param->mode) {
+	case CHAIN_GRAPH_ABS:
+		param->sort = sort_chain_graph_abs;
+		break;
+	case CHAIN_GRAPH_REL:
+		param->sort = sort_chain_graph_rel;
+		break;
+	case CHAIN_FLAT:
+	case CHAIN_FOLDED:
+		param->sort = sort_chain_flat;
+		break;
+	case CHAIN_NONE:
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Create a child for a parent. If inherit_children, then the new child
+ * will become the new parent of it's parent children
+ */
+static struct callchain_node *
+create_child(struct callchain_node *parent, bool inherit_children)
+{
+	struct callchain_node *new;
+
+	new = zalloc(sizeof(*new));
+	if (!new) {
+		perror("not enough memory to create child for code path tree");
+		return NULL;
+	}
+	new->parent = parent;
+	INIT_LIST_HEAD(&new->val);
+	INIT_LIST_HEAD(&new->parent_val);
+
+	if (inherit_children) {
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
+
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
+
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+	}
+
+	return new;
+}
+
+
+/*
+ * Fill the node with callchain values
+ */
+static int
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
+{
+	struct callchain_cursor_node *cursor_node;
+
+	node->val_nr = cursor->nr - cursor->pos;
+	if (!node->val_nr)
+		pr_warning("Warning: empty node in callchain tree\n");
+
+	cursor_node = callchain_cursor_current(cursor);
+
+	while (cursor_node) {
+		struct callchain_list *call;
+
+		call = zalloc(sizeof(*call));
+		if (!call) {
+			perror("not enough memory for the code path tree");
+			return -1;
+		}
+		call->ip = cursor_node->ip;
+		call->ms.sym = cursor_node->sym;
+		call->ms.map = map__get(cursor_node->map);
+		call->srcline = cursor_node->srcline;
+
+		if (cursor_node->branch) {
+			call->branch_count = 1;
+
+			if (cursor_node->branch_from) {
+				/*
+				 * branch_from is set with value somewhere else
+				 * to imply it's "to" of a branch.
+				 */
+				call->brtype_stat.branch_to = true;
+
+				if (cursor_node->branch_flags.predicted)
+					call->predicted_count = 1;
+
+				if (cursor_node->branch_flags.abort)
+					call->abort_count = 1;
+
+				branch_type_count(&call->brtype_stat,
+						  &cursor_node->branch_flags,
+						  cursor_node->branch_from,
+						  cursor_node->ip);
+			} else {
+				/*
+				 * It's "from" of a branch
+				 */
+				call->brtype_stat.branch_to = false;
+				call->cycles_count =
+					cursor_node->branch_flags.cycles;
+				call->iter_count = cursor_node->nr_loop_iter;
+				call->iter_cycles = cursor_node->iter_cycles;
+			}
+		}
+
+		list_add_tail(&call->list, &node->val);
+
+		callchain_cursor_advance(cursor);
+		cursor_node = callchain_cursor_current(cursor);
+	}
+	return 0;
+}
+
+static struct callchain_node *
+add_child(struct callchain_node *parent,
+	  struct callchain_cursor *cursor,
+	  u64 period)
+{
+	struct callchain_node *new;
+
+	new = create_child(parent, false);
+	if (new == NULL)
+		return NULL;
+
+	if (fill_node(new, cursor) < 0) {
+		struct callchain_list *call, *tmp;
+
+		list_for_each_entry_safe(call, tmp, &new->val, list) {
+			list_del(&call->list);
+			map__zput(call->ms.map);
+			free(call);
+		}
+		free(new);
+		return NULL;
+	}
+
+	new->children_hit = 0;
+	new->hit = period;
+	new->children_count = 0;
+	new->count = 1;
+	return new;
+}
+
+enum match_result {
+	MATCH_ERROR  = -1,
+	MATCH_EQ,
+	MATCH_LT,
+	MATCH_GT,
+};
+
+static enum match_result match_chain_strings(const char *left,
+					     const char *right)
+{
+	enum match_result ret = MATCH_EQ;
+	int cmp;
+
+	if (left && right)
+		cmp = strcmp(left, right);
+	else if (!left && right)
+		cmp = 1;
+	else if (left && !right)
+		cmp = -1;
+	else
+		return MATCH_ERROR;
+
+	if (cmp != 0)
+		ret = cmp < 0 ? MATCH_LT : MATCH_GT;
+
+	return ret;
+}
+
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
+						   struct map *right_map, u64 right_ip)
+{
+	struct dso *left_dso = left_map ? left_map->dso : NULL;
+	struct dso *right_dso = right_map ? right_map->dso : NULL;
+
+	if (left_dso != right_dso)
+		return left_dso < right_dso ? MATCH_LT : MATCH_GT;
+
+	if (left_ip != right_ip)
+ 		return left_ip < right_ip ? MATCH_LT : MATCH_GT;
+
+	return MATCH_EQ;
+}
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+				     struct callchain_list *cnode)
+{
+	enum match_result match = MATCH_ERROR;
+
+	switch (callchain_param.key) {
+	case CCKEY_SRCLINE:
+		match = match_chain_strings(cnode->srcline, node->srcline);
+		if (match != MATCH_ERROR)
+			break;
+		/* otherwise fall-back to symbol-based comparison below */
+		__fallthrough;
+	case CCKEY_FUNCTION:
+		if (node->sym && cnode->ms.sym) {
+			/*
+			 * Compare inlined frames based on their symbol name
+			 * because different inlined frames will have the same
+			 * symbol start. Otherwise do a faster comparison based
+			 * on the symbol start address.
+			 */
+			if (cnode->ms.sym->inlined || node->sym->inlined) {
+				match = match_chain_strings(cnode->ms.sym->name,
+							    node->sym->name);
+				if (match != MATCH_ERROR)
+					break;
+			} else {
+				match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
+								  node->map, node->sym->start);
+				break;
+			}
+		}
+		/* otherwise fall-back to IP-based comparison below */
+		__fallthrough;
+	case CCKEY_ADDRESS:
+	default:
+		match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->map, node->ip);
+		break;
+	}
+
+	if (match == MATCH_EQ && node->branch) {
+		cnode->branch_count++;
+
+		if (node->branch_from) {
+			/*
+			 * It's "to" of a branch
+			 */
+			cnode->brtype_stat.branch_to = true;
+
+			if (node->branch_flags.predicted)
+				cnode->predicted_count++;
+
+			if (node->branch_flags.abort)
+				cnode->abort_count++;
+
+			branch_type_count(&cnode->brtype_stat,
+					  &node->branch_flags,
+					  node->branch_from,
+					  node->ip);
+		} else {
+			/*
+			 * It's "from" of a branch
+			 */
+			cnode->brtype_stat.branch_to = false;
+			cnode->cycles_count += node->branch_flags.cycles;
+			cnode->iter_count += node->nr_loop_iter;
+			cnode->iter_cycles += node->iter_cycles;
+		}
+	}
+
+	return match;
+}
+
+/*
+ * Split the parent in two parts (a new child is created) and
+ * give a part of its callchain to the created child.
+ * Then create another child to host the given callchain of new branch
+ */
+static int
+split_add_child(struct callchain_node *parent,
+		struct callchain_cursor *cursor,
+		struct callchain_list *to_split,
+		u64 idx_parents, u64 idx_local, u64 period)
+{
+	struct callchain_node *new;
+	struct list_head *old_tail;
+	unsigned int idx_total = idx_parents + idx_local;
+
+	/* split */
+	new = create_child(parent, true);
+	if (new == NULL)
+		return -1;
+
+	/* split the callchain and move a part to the new child */
+	old_tail = parent->val.prev;
+	list_del_range(&to_split->list, old_tail);
+	new->val.next = &to_split->list;
+	new->val.prev = old_tail;
+	to_split->list.prev = &new->val;
+	old_tail->next = &new->val;
+
+	/* split the hits */
+	new->hit = parent->hit;
+	new->children_hit = parent->children_hit;
+	parent->children_hit = callchain_cumul_hits(new);
+	new->val_nr = parent->val_nr - idx_local;
+	parent->val_nr = idx_local;
+	new->count = parent->count;
+	new->children_count = parent->children_count;
+	parent->children_count = callchain_cumul_counts(new);
+
+	/* create a new child for the new branch if any */
+	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
+		parent->hit = 0;
+		parent->children_hit += period;
+		parent->count = 0;
+		parent->children_count += 1;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+		if (new == NULL)
+			return -1;
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) == MATCH_LT)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+	} else {
+		parent->hit = period;
+		parent->count = 1;
+	}
+	return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+	     struct callchain_cursor *cursor,
+	     u64 period);
+
+static int
+append_chain_children(struct callchain_node *root,
+		      struct callchain_cursor *cursor,
+		      u64 period)
+{
+	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return -1;
+
+	/* lookup in childrens */
+	while (*p) {
+		enum match_result ret;
+
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+
+		/* If at least first entry matches, rely to children */
+		ret = append_chain(rnode, cursor, period);
+		if (ret == MATCH_EQ)
+			goto inc_children_hit;
+		if (ret == MATCH_ERROR)
+			return -1;
+
+		if (ret == MATCH_LT)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	/* nothing in children, add to the current node */
+	rnode = add_child(root, cursor, period);
+	if (rnode == NULL)
+		return -1;
+
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
+
+inc_children_hit:
+	root->children_hit += period;
+	root->children_count++;
+	return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+	     struct callchain_cursor *cursor,
+	     u64 period)
+{
+	struct callchain_list *cnode;
+	u64 start = cursor->pos;
+	bool found = false;
+	u64 matches;
+	enum match_result cmp = MATCH_ERROR;
+
+	/*
+	 * Lookup in the current node
+	 * If we have a symbol, then compare the start to match
+	 * anywhere inside a function, unless function
+	 * mode is disabled.
+	 */
+	list_for_each_entry(cnode, &root->val, list) {
+		struct callchain_cursor_node *node;
+
+		node = callchain_cursor_current(cursor);
+		if (!node)
+			break;
+
+		cmp = match_chain(node, cnode);
+		if (cmp != MATCH_EQ)
+			break;
+
+		found = true;
+
+		callchain_cursor_advance(cursor);
+	}
+
+	/* matches not, relay no the parent */
+	if (!found) {
+		WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
+		return cmp;
+	}
+
+	matches = cursor->pos - start;
+
+	/* we match only a part of the node. Split it and add the new chain */
+	if (matches < root->val_nr) {
+		if (split_add_child(root, cursor, cnode, start, matches,
+				    period) < 0)
+			return MATCH_ERROR;
+
+		return MATCH_EQ;
+	}
+
+	/* we match 100% of the path, increment the hit */
+	if (matches == root->val_nr && cursor->pos == cursor->nr) {
+		root->hit += period;
+		root->count++;
+		return MATCH_EQ;
+	}
+
+	/* We match the node and still have a part remaining */
+	if (append_chain_children(root, cursor, period) < 0)
+		return MATCH_ERROR;
+
+	return MATCH_EQ;
+}
+
+int callchain_append(struct callchain_root *root,
+		     struct callchain_cursor *cursor,
+		     u64 period)
+{
+	if (!cursor->nr)
+		return 0;
+
+	callchain_cursor_commit(cursor);
+
+	if (append_chain_children(&root->node, cursor, period) < 0)
+		return -1;
+
+	if (cursor->nr > root->max_depth)
+		root->max_depth = cursor->nr;
+
+	return 0;
+}
+
+static int
+merge_chain_branch(struct callchain_cursor *cursor,
+		   struct callchain_node *dst, struct callchain_node *src)
+{
+	struct callchain_cursor_node **old_last = cursor->last;
+	struct callchain_node *child;
+	struct callchain_list *list, *next_list;
+	struct rb_node *n;
+	int old_pos = cursor->nr;
+	int err = 0;
+
+	list_for_each_entry_safe(list, next_list, &src->val, list) {
+		callchain_cursor_append(cursor, list->ip,
+					list->ms.map, list->ms.sym,
+					false, NULL, 0, 0, 0, list->srcline);
+		list_del(&list->list);
+		map__zput(list->ms.map);
+		free(list);
+	}
+
+	if (src->hit) {
+		callchain_cursor_commit(cursor);
+		if (append_chain_children(dst, cursor, src->hit) < 0)
+			return -1;
+	}
+
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
+		err = merge_chain_branch(cursor, dst, child);
+		if (err)
+			break;
+
+		free(child);
+	}
+
+	cursor->nr = old_pos;
+	cursor->last = old_last;
+
+	return err;
+}
+
+int callchain_merge(struct callchain_cursor *cursor,
+		    struct callchain_root *dst, struct callchain_root *src)
+{
+	return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+			    u64 ip, struct map *map, struct symbol *sym,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+			    const char *srcline)
+{
+	struct callchain_cursor_node *node = *cursor->last;
+
+	if (!node) {
+		node = calloc(1, sizeof(*node));
+		if (!node)
+			return -ENOMEM;
+
+		*cursor->last = node;
+	}
+
+	node->ip = ip;
+	map__zput(node->map);
+	node->map = map__get(map);
+	node->sym = sym;
+	node->branch = branch;
+	node->nr_loop_iter = nr_loop_iter;
+	node->iter_cycles = iter_cycles;
+	node->srcline = srcline;
+
+	if (flags)
+		memcpy(&node->branch_flags, flags,
+			sizeof(struct branch_flags));
+
+	node->branch_from = branch_from;
+	cursor->nr++;
+
+	cursor->last = &node->next;
+
+	return 0;
+}
+
+int sample__resolve_callchain(struct perf_sample *sample,
+			      struct callchain_cursor *cursor, struct symbol **parent,
+			      struct perf_evsel *evsel, struct addr_location *al,
+			      int max_stack)
+{
+	if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
+		return 0;
+
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+	    perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
+		return thread__resolve_callchain(al->thread, cursor, evsel, sample,
+						 parent, al, max_stack);
+	}
+	return 0;
+}
+
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
+{
+	if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
+		!symbol_conf.show_branchflag_count)
+		return 0;
+	return callchain_append(he->callchain, &callchain_cursor, sample->period);
+}
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved)
+{
+	al->map = node->map;
+	al->sym = node->sym;
+	al->srcline = node->srcline;
+	al->addr = node->ip;
+
+	if (al->sym == NULL) {
+		if (hide_unresolved)
+			return 0;
+		if (al->map == NULL)
+			goto out;
+	}
+
+	if (al->map->groups == &al->machine->kmaps) {
+		if (machine__is_host(al->machine)) {
+			al->cpumode = PERF_RECORD_MISC_KERNEL;
+			al->level = 'k';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+			al->level = 'g';
+		}
+	} else {
+		if (machine__is_host(al->machine)) {
+			al->cpumode = PERF_RECORD_MISC_USER;
+			al->level = '.';
+		} else if (perf_guest) {
+			al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+			al->level = 'u';
+		} else {
+			al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+			al->level = 'H';
+		}
+	}
+
+out:
+	return 1;
+}
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+			       char *bf, size_t bfsize, bool show_dso)
+{
+	bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+	bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE;
+	int printed;
+
+	if (cl->ms.sym) {
+		const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
+
+		if (show_srcline && cl->srcline)
+			printed = scnprintf(bf, bfsize, "%s %s%s",
+					    cl->ms.sym->name, cl->srcline,
+					    inlined);
+		else
+			printed = scnprintf(bf, bfsize, "%s%s",
+					    cl->ms.sym->name, inlined);
+	} else
+		printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+
+	if (show_dso)
+		scnprintf(bf + printed, bfsize - printed, " %s",
+			  cl->ms.map ?
+			  cl->ms.map->dso->short_name :
+			  "unknown");
+
+	return bf;
+}
+
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+				      char *bf, size_t bfsize, u64 total)
+{
+	double percent = 0.0;
+	u64 period = callchain_cumul_hits(node);
+	unsigned count = callchain_cumul_counts(node);
+
+	if (callchain_param.mode == CHAIN_FOLDED) {
+		period = node->hit;
+		count = node->count;
+	}
+
+	switch (callchain_param.value) {
+	case CCVAL_PERIOD:
+		scnprintf(bf, bfsize, "%"PRIu64, period);
+		break;
+	case CCVAL_COUNT:
+		scnprintf(bf, bfsize, "%u", count);
+		break;
+	case CCVAL_PERCENT:
+	default:
+		if (total)
+			percent = period * 100.0 / total;
+		scnprintf(bf, bfsize, "%.2f%%", percent);
+		break;
+	}
+	return bf;
+}
+
+int callchain_node__fprintf_value(struct callchain_node *node,
+				 FILE *fp, u64 total)
+{
+	double percent = 0.0;
+	u64 period = callchain_cumul_hits(node);
+	unsigned count = callchain_cumul_counts(node);
+
+	if (callchain_param.mode == CHAIN_FOLDED) {
+		period = node->hit;
+		count = node->count;
+	}
+
+	switch (callchain_param.value) {
+	case CCVAL_PERIOD:
+		return fprintf(fp, "%"PRIu64, period);
+	case CCVAL_COUNT:
+		return fprintf(fp, "%u", count);
+	case CCVAL_PERCENT:
+	default:
+		if (total)
+			percent = period * 100.0 / total;
+		return percent_color_fprintf(fp, "%.2f%%", percent);
+	}
+	return 0;
+}
+
+static void callchain_counts_value(struct callchain_node *node,
+				   u64 *branch_count, u64 *predicted_count,
+				   u64 *abort_count, u64 *cycles_count)
+{
+	struct callchain_list *clist;
+
+	list_for_each_entry(clist, &node->val, list) {
+		if (branch_count)
+			*branch_count += clist->branch_count;
+
+		if (predicted_count)
+			*predicted_count += clist->predicted_count;
+
+		if (abort_count)
+			*abort_count += clist->abort_count;
+
+		if (cycles_count)
+			*cycles_count += clist->cycles_count;
+	}
+}
+
+static int callchain_node_branch_counts_cumul(struct callchain_node *node,
+					      u64 *branch_count,
+					      u64 *predicted_count,
+					      u64 *abort_count,
+					      u64 *cycles_count)
+{
+	struct callchain_node *child;
+	struct rb_node *n;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
+		callchain_node_branch_counts_cumul(child, branch_count,
+						   predicted_count,
+						   abort_count,
+						   cycles_count);
+
+		callchain_counts_value(child, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count);
+	}
+
+	return 0;
+}
+
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count)
+{
+	if (branch_count)
+		*branch_count = 0;
+
+	if (predicted_count)
+		*predicted_count = 0;
+
+	if (abort_count)
+		*abort_count = 0;
+
+	if (cycles_count)
+		*cycles_count = 0;
+
+	return callchain_node_branch_counts_cumul(&root->node,
+						  branch_count,
+						  predicted_count,
+						  abort_count,
+						  cycles_count);
+}
+
+static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
+{
+	int printed;
+
+	printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
+
+	return printed;
+}
+
+static int count_float_printf(int idx, const char *str, float value,
+			      char *bf, int bfsize, float threshold)
+{
+	int printed;
+
+	if (threshold != 0.0 && value < threshold)
+		return 0;
+
+	printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
+
+	return printed;
+}
+
+static int branch_to_str(char *bf, int bfsize,
+			 u64 branch_count, u64 predicted_count,
+			 u64 abort_count,
+			 struct branch_type_stat *brtype_stat)
+{
+	int printed, i = 0;
+
+	printed = branch_type_str(brtype_stat, bf, bfsize);
+	if (printed)
+		i++;
+
+	if (predicted_count < branch_count) {
+		printed += count_float_printf(i++, "predicted",
+				predicted_count * 100.0 / branch_count,
+				bf + printed, bfsize - printed, 0.0);
+	}
+
+	if (abort_count) {
+		printed += count_float_printf(i++, "abort",
+				abort_count * 100.0 / branch_count,
+				bf + printed, bfsize - printed, 0.1);
+	}
+
+	if (i)
+		printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+	return printed;
+}
+
+static int branch_from_str(char *bf, int bfsize,
+			   u64 branch_count,
+			   u64 cycles_count, u64 iter_count,
+			   u64 iter_cycles)
+{
+	int printed = 0, i = 0;
+	u64 cycles;
+
+	cycles = cycles_count / branch_count;
+	if (cycles) {
+		printed += count_pri64_printf(i++, "cycles",
+				cycles,
+				bf + printed, bfsize - printed);
+	}
+
+	if (iter_count) {
+		printed += count_pri64_printf(i++, "iter",
+				iter_count,
+				bf + printed, bfsize - printed);
+
+		printed += count_pri64_printf(i++, "avg_cycles",
+				iter_cycles / iter_count,
+				bf + printed, bfsize - printed);
+	}
+
+	if (i)
+		printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+	return printed;
+}
+
+static int counts_str_build(char *bf, int bfsize,
+			     u64 branch_count, u64 predicted_count,
+			     u64 abort_count, u64 cycles_count,
+			     u64 iter_count, u64 iter_cycles,
+			     struct branch_type_stat *brtype_stat)
+{
+	int printed;
+
+	if (branch_count == 0)
+		return scnprintf(bf, bfsize, " (calltrace)");
+
+	if (brtype_stat->branch_to) {
+		printed = branch_to_str(bf, bfsize, branch_count,
+				predicted_count, abort_count, brtype_stat);
+	} else {
+		printed = branch_from_str(bf, bfsize, branch_count,
+				cycles_count, iter_count, iter_cycles);
+	}
+
+	if (!printed)
+		bf[0] = 0;
+
+	return printed;
+}
+
+static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
+				   u64 branch_count, u64 predicted_count,
+				   u64 abort_count, u64 cycles_count,
+				   u64 iter_count, u64 iter_cycles,
+				   struct branch_type_stat *brtype_stat)
+{
+	char str[256];
+
+	counts_str_build(str, sizeof(str), branch_count,
+			 predicted_count, abort_count, cycles_count,
+			 iter_count, iter_cycles, brtype_stat);
+
+	if (fp)
+		return fprintf(fp, "%s", str);
+
+	return scnprintf(bf, bfsize, "%s", str);
+}
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize)
+{
+	u64 branch_count, predicted_count;
+	u64 abort_count, cycles_count;
+	u64 iter_count, iter_cycles;
+
+	branch_count = clist->branch_count;
+	predicted_count = clist->predicted_count;
+	abort_count = clist->abort_count;
+	cycles_count = clist->cycles_count;
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
+
+	return callchain_counts_printf(fp, bf, bfsize, branch_count,
+				       predicted_count, abort_count,
+				       cycles_count, iter_count, iter_cycles,
+				       &clist->brtype_stat);
+}
+
+static void free_callchain_node(struct callchain_node *node)
+{
+	struct callchain_list *list, *tmp;
+	struct callchain_node *child;
+	struct rb_node *n;
+
+	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
+		list_del(&list->list);
+		map__zput(list->ms.map);
+		free(list);
+	}
+
+	list_for_each_entry_safe(list, tmp, &node->val, list) {
+		list_del(&list->list);
+		map__zput(list->ms.map);
+		free(list);
+	}
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &node->rb_root_in);
+
+		free_callchain_node(child);
+		free(child);
+	}
+}
+
+void free_callchain(struct callchain_root *root)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	free_callchain_node(&root->node);
+}
+
+static u64 decay_callchain_node(struct callchain_node *node)
+{
+	struct callchain_node *child;
+	struct rb_node *n;
+	u64 child_hits = 0;
+
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+
+		child_hits += decay_callchain_node(child);
+		n = rb_next(n);
+	}
+
+	node->hit = (node->hit * 7) / 8;
+	node->children_hit = child_hits;
+
+	return node->hit;
+}
+
+void decay_callchain(struct callchain_root *root)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	decay_callchain_node(&root->node);
+}
+
+int callchain_node__make_parent_list(struct callchain_node *node)
+{
+	struct callchain_node *parent = node->parent;
+	struct callchain_list *chain, *new;
+	LIST_HEAD(head);
+
+	while (parent) {
+		list_for_each_entry_reverse(chain, &parent->val, list) {
+			new = malloc(sizeof(*new));
+			if (new == NULL)
+				goto out;
+			*new = *chain;
+			new->has_children = false;
+			map__get(new->ms.map);
+			list_add_tail(&new->list, &head);
+		}
+		parent = parent->parent;
+	}
+
+	list_for_each_entry_safe_reverse(chain, new, &head, list)
+		list_move_tail(&chain->list, &node->parent_val);
+
+	if (!list_empty(&node->parent_val)) {
+		chain = list_first_entry(&node->parent_val, struct callchain_list, list);
+		chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
+
+		chain = list_first_entry(&node->val, struct callchain_list, list);
+		chain->has_children = false;
+	}
+	return 0;
+
+out:
+	list_for_each_entry_safe(chain, new, &head, list) {
+		list_del(&chain->list);
+		map__zput(chain->ms.map);
+		free(chain);
+	}
+	return -ENOMEM;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+			   struct callchain_cursor *src)
+{
+	int rc = 0;
+
+	callchain_cursor_reset(dst);
+	callchain_cursor_commit(src);
+
+	while (true) {
+		struct callchain_cursor_node *node;
+
+		node = callchain_cursor_current(src);
+		if (node == NULL)
+			break;
+
+		rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
+					     node->branch, &node->branch_flags,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
+					     node->branch_from, node->srcline);
+		if (rc)
+			break;
+
+		callchain_cursor_advance(src);
+	}
+
+	return rc;
+}
diff --git a/util/callchain.h b/util/callchain.h
new file mode 100644
index 0000000..154560b
--- /dev/null
+++ b/util/callchain.h
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CALLCHAIN_H
+#define __PERF_CALLCHAIN_H
+
+#include "../perf.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "event.h"
+#include "map.h"
+#include "symbol.h"
+#include "branch.h"
+
+#define HELP_PAD "\t\t\t\t"
+
+#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
+
+# define RECORD_MODE_HELP  HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
+
+#define RECORD_SIZE_HELP						\
+	HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
+	HELP_PAD "\t\tdefault: 8192 (bytes)\n"
+
+#define CALLCHAIN_RECORD_HELP  CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
+
+#define CALLCHAIN_REPORT_HELP						\
+	HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
+	HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
+	HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
+	HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
+	HELP_PAD "sort_key:\tcall graph sort key (function|address)\n"	\
+	HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
+	HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
+
+enum perf_call_graph_mode {
+	CALLCHAIN_NONE,
+	CALLCHAIN_FP,
+	CALLCHAIN_DWARF,
+	CALLCHAIN_LBR,
+	CALLCHAIN_MAX
+};
+
+enum chain_mode {
+	CHAIN_NONE,
+	CHAIN_FLAT,
+	CHAIN_GRAPH_ABS,
+	CHAIN_GRAPH_REL,
+	CHAIN_FOLDED,
+};
+
+enum chain_order {
+	ORDER_CALLER,
+	ORDER_CALLEE
+};
+
+struct callchain_node {
+	struct callchain_node	*parent;
+	struct list_head	val;
+	struct list_head	parent_val;
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
+	unsigned int		val_nr;
+	unsigned int		count;
+	unsigned int		children_count;
+	u64			hit;
+	u64			children_hit;
+};
+
+struct callchain_root {
+	u64			max_depth;
+	struct callchain_node	node;
+};
+
+struct callchain_param;
+
+typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
+				 u64, struct callchain_param *);
+
+enum chain_key {
+	CCKEY_FUNCTION,
+	CCKEY_ADDRESS,
+	CCKEY_SRCLINE
+};
+
+enum chain_value {
+	CCVAL_PERCENT,
+	CCVAL_PERIOD,
+	CCVAL_COUNT,
+};
+
+extern bool dwarf_callchain_users;
+
+struct callchain_param {
+	bool			enabled;
+	enum perf_call_graph_mode record_mode;
+	u32			dump_size;
+	enum chain_mode 	mode;
+	u16			max_stack;
+	u32			print_limit;
+	double			min_percent;
+	sort_chain_func_t	sort;
+	enum chain_order	order;
+	bool			order_set;
+	enum chain_key		key;
+	bool			branch_callstack;
+	enum chain_value	value;
+};
+
+extern struct callchain_param callchain_param;
+extern struct callchain_param callchain_param_default;
+
+struct callchain_list {
+	u64			ip;
+	struct map_symbol	ms;
+	struct /* for TUI */ {
+		bool		unfolded;
+		bool		has_children;
+	};
+	u64			branch_count;
+	u64			predicted_count;
+	u64			abort_count;
+	u64			cycles_count;
+	u64			iter_count;
+	u64			iter_cycles;
+	struct branch_type_stat brtype_stat;
+	const char		*srcline;
+	struct list_head	list;
+};
+
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persistent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+	u64				ip;
+	struct map			*map;
+	struct symbol			*sym;
+	const char			*srcline;
+	bool				branch;
+	struct branch_flags		branch_flags;
+	u64				branch_from;
+	int				nr_loop_iter;
+	u64				iter_cycles;
+	struct callchain_cursor_node	*next;
+};
+
+struct callchain_cursor {
+	u64				nr;
+	struct callchain_cursor_node	*first;
+	struct callchain_cursor_node	**last;
+	u64				pos;
+	struct callchain_cursor_node	*curr;
+};
+
+extern __thread struct callchain_cursor callchain_cursor;
+
+static inline void callchain_init(struct callchain_root *root)
+{
+	INIT_LIST_HEAD(&root->node.val);
+	INIT_LIST_HEAD(&root->node.parent_val);
+
+	root->node.parent = NULL;
+	root->node.hit = 0;
+	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
+	root->max_depth = 0;
+}
+
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
+{
+	return node->hit + node->children_hit;
+}
+
+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
+{
+	return node->count + node->children_count;
+}
+
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+		     struct callchain_cursor *cursor,
+		     u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+		    struct callchain_root *dst, struct callchain_root *src);
+
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+	struct callchain_cursor_node *node;
+
+	cursor->nr = 0;
+	cursor->last = &cursor->first;
+
+	for (node = cursor->first; node != NULL; node = node->next)
+		map__zput(node->map);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+			    struct map *map, struct symbol *sym,
+			    bool branch, struct branch_flags *flags,
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+			    const char *srcline);
+
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+	cursor->curr = cursor->first;
+	cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+	if (cursor->pos == cursor->nr)
+		return NULL;
+
+	return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+	cursor->curr = cursor->curr->next;
+	cursor->pos++;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+			   struct callchain_cursor *src);
+
+struct option;
+struct hist_entry;
+
+int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_callchain_opt(const struct option *opt, const char *arg, int unset);
+
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+			      struct callchain_cursor *cursor, struct symbol **parent,
+			      struct perf_evsel *evsel, struct addr_location *al,
+			      int max_stack);
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+			bool hide_unresolved);
+
+extern const char record_callchain_help[];
+int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
+int parse_callchain_report_opt(const char *arg);
+int parse_callchain_top_opt(const char *arg);
+int perf_callchain_config(const char *var, const char *value);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+					     struct callchain_cursor *src)
+{
+	*dest = *src;
+
+	dest->first = src->curr;
+	dest->nr -= src->pos;
+}
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+#else
+static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
+			struct ip_callchain *chain __maybe_unused)
+{
+	return -1;
+}
+#endif
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+			       char *bf, size_t bfsize, bool show_dso);
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+				      char *bf, size_t bfsize, u64 total);
+int callchain_node__fprintf_value(struct callchain_node *node,
+				  FILE *fp, u64 total);
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+					FILE *fp, char *bf, int bfsize);
+
+void free_callchain(struct callchain_root *root);
+void decay_callchain(struct callchain_root *root);
+int callchain_node__make_parent_list(struct callchain_node *node);
+
+int callchain_branch_counts(struct callchain_root *root,
+			    u64 *branch_count, u64 *predicted_count,
+			    u64 *abort_count, u64 *cycles_count);
+
+#endif	/* __PERF_CALLCHAIN_H */
diff --git a/util/cgroup.c b/util/cgroup.c
new file mode 100644
index 0000000..decb91f
--- /dev/null
+++ b/util/cgroup.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "../perf.h"
+#include <subcmd/parse-options.h>
+#include "evsel.h"
+#include "cgroup.h"
+#include "evlist.h"
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int nr_cgroups;
+
+static int
+cgroupfs_find_mountpoint(char *buf, size_t maxlen)
+{
+	FILE *fp;
+	char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+	char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
+	char *token, *saved_ptr = NULL;
+
+	fp = fopen("/proc/mounts", "r");
+	if (!fp)
+		return -1;
+
+	/*
+	 * in order to handle split hierarchy, we need to scan /proc/mounts
+	 * and inspect every cgroupfs mount point to find one that has
+	 * perf_event subsystem
+	 */
+	path_v1[0] = '\0';
+	path_v2[0] = '\0';
+
+	while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
+				__stringify(PATH_MAX)"s %*d %*d\n",
+				mountpoint, type, tokens) == 3) {
+
+		if (!path_v1[0] && !strcmp(type, "cgroup")) {
+
+			token = strtok_r(tokens, ",", &saved_ptr);
+
+			while (token != NULL) {
+				if (!strcmp(token, "perf_event")) {
+					strcpy(path_v1, mountpoint);
+					break;
+				}
+				token = strtok_r(NULL, ",", &saved_ptr);
+			}
+		}
+
+		if (!path_v2[0] && !strcmp(type, "cgroup2"))
+			strcpy(path_v2, mountpoint);
+
+		if (path_v1[0] && path_v2[0])
+			break;
+	}
+	fclose(fp);
+
+	if (path_v1[0])
+		path = path_v1;
+	else if (path_v2[0])
+		path = path_v2;
+	else
+		return -1;
+
+	if (strlen(path) < maxlen) {
+		strcpy(buf, path);
+		return 0;
+	}
+	return -1;
+}
+
+static int open_cgroup(const char *name)
+{
+	char path[PATH_MAX + 1];
+	char mnt[PATH_MAX + 1];
+	int fd;
+
+
+	if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		fprintf(stderr, "no access to cgroup %s\n", path);
+
+	return fd;
+}
+
+static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
+{
+	struct perf_evsel *counter;
+	struct cgroup *cgrp = NULL;
+	/*
+	 * check if cgrp is already defined, if so we reuse it
+	 */
+	evlist__for_each_entry(evlist, counter) {
+		if (!counter->cgrp)
+			continue;
+		if (!strcmp(counter->cgrp->name, str)) {
+			cgrp = cgroup__get(counter->cgrp);
+			break;
+		}
+	}
+
+	return cgrp;
+}
+
+static struct cgroup *cgroup__new(const char *name)
+{
+	struct cgroup *cgroup = zalloc(sizeof(*cgroup));
+
+	if (cgroup != NULL) {
+		refcount_set(&cgroup->refcnt, 1);
+
+		cgroup->name = strdup(name);
+		if (!cgroup->name)
+			goto out_err;
+		cgroup->fd = open_cgroup(name);
+		if (cgroup->fd == -1)
+			goto out_free_name;
+	}
+
+	return cgroup;
+
+out_free_name:
+	free(cgroup->name);
+out_err:
+	free(cgroup);
+	return NULL;
+}
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name)
+{
+	struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
+
+	return cgroup ?: cgroup__new(name);
+}
+
+static int add_cgroup(struct perf_evlist *evlist, const char *str)
+{
+	struct perf_evsel *counter;
+	struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
+	int n;
+
+	if (!cgrp)
+		return -1;
+	/*
+	 * find corresponding event
+	 * if add cgroup N, then need to find event N
+	 */
+	n = 0;
+	evlist__for_each_entry(evlist, counter) {
+		if (n == nr_cgroups)
+			goto found;
+		n++;
+	}
+
+	cgroup__put(cgrp);
+	return -1;
+found:
+	counter->cgrp = cgrp;
+	return 0;
+}
+
+static void cgroup__delete(struct cgroup *cgroup)
+{
+	close(cgroup->fd);
+	zfree(&cgroup->name);
+	free(cgroup);
+}
+
+void cgroup__put(struct cgroup *cgrp)
+{
+	if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
+		cgroup__delete(cgrp);
+	}
+}
+
+struct cgroup *cgroup__get(struct cgroup *cgroup)
+{
+       if (cgroup)
+		refcount_inc(&cgroup->refcnt);
+       return cgroup;
+}
+
+static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup)
+{
+	if (evsel->cgrp == NULL)
+		evsel->cgrp = cgroup__get(cgroup);
+}
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__set_default_cgroup(evsel, cgroup);
+}
+
+int parse_cgroups(const struct option *opt, const char *str,
+		  int unset __maybe_unused)
+{
+	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+	struct perf_evsel *counter;
+	struct cgroup *cgrp = NULL;
+	const char *p, *e, *eos = str + strlen(str);
+	char *s;
+	int ret, i;
+
+	if (list_empty(&evlist->entries)) {
+		fprintf(stderr, "must define events before cgroups\n");
+		return -1;
+	}
+
+	for (;;) {
+		p = strchr(str, ',');
+		e = p ? p : eos;
+
+		/* allow empty cgroups, i.e., skip */
+		if (e - str) {
+			/* termination added */
+			s = strndup(str, e - str);
+			if (!s)
+				return -1;
+			ret = add_cgroup(evlist, s);
+			free(s);
+			if (ret)
+				return -1;
+		}
+		/* nr_cgroups is increased een for empty cgroups */
+		nr_cgroups++;
+		if (!p)
+			break;
+		str = p+1;
+	}
+	/* for the case one cgroup combine to multiple events */
+	i = 0;
+	if (nr_cgroups == 1) {
+		evlist__for_each_entry(evlist, counter) {
+			if (i == 0)
+				cgrp = counter->cgrp;
+			else {
+				counter->cgrp = cgrp;
+				refcount_inc(&cgrp->refcnt);
+			}
+			i++;
+		}
+	}
+	return 0;
+}
diff --git a/util/cgroup.h b/util/cgroup.h
new file mode 100644
index 0000000..f033a80
--- /dev/null
+++ b/util/cgroup.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_H__
+#define __CGROUP_H__
+
+#include <linux/refcount.h>
+
+struct option;
+
+struct cgroup {
+	char *name;
+	int fd;
+	refcount_t refcnt;
+};
+
+
+extern int nr_cgroups; /* number of explicit cgroups defined */
+
+struct cgroup *cgroup__get(struct cgroup *cgroup);
+void cgroup__put(struct cgroup *cgroup);
+
+struct perf_evlist;
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name);
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
+
+int parse_cgroups(const struct option *opt, const char *str, int unset);
+
+#endif /* __CGROUP_H__ */
diff --git a/util/cloexec.c b/util/cloexec.c
new file mode 100644
index 0000000..ca0fff6
--- /dev/null
+++ b/util/cloexec.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <sched.h>
+#include "util.h"
+#include "../perf.h"
+#include "cloexec.h"
+#include "asm/bug.h"
+#include "debug.h"
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <sys/syscall.h>
+
+static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+
+int __weak sched_getcpu(void)
+{
+#ifdef __NR_getcpu
+	unsigned cpu;
+	int err = syscall(__NR_getcpu, &cpu, NULL, NULL);
+	if (!err)
+		return cpu;
+#else
+	errno = ENOSYS;
+#endif
+	return -1;
+}
+
+static int perf_flag_probe(void)
+{
+	/* use 'safest' configuration as used in perf_evsel__fallback() */
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.exclude_kernel = 1,
+	};
+	int fd;
+	int err;
+	int cpu;
+	pid_t pid = -1;
+	char sbuf[STRERR_BUFSIZE];
+
+	cpu = sched_getcpu();
+	if (cpu < 0)
+		cpu = 0;
+
+	/*
+	 * Using -1 for the pid is a workaround to avoid gratuitous jump label
+	 * changes.
+	 */
+	while (1) {
+		/* check cloexec flag */
+		fd = sys_perf_event_open(&attr, pid, cpu, -1,
+					 PERF_FLAG_FD_CLOEXEC);
+		if (fd < 0 && pid == -1 && errno == EACCES) {
+			pid = 0;
+			continue;
+		}
+		break;
+	}
+	err = errno;
+
+	if (fd >= 0) {
+		close(fd);
+		return 1;
+	}
+
+	WARN_ONCE(err != EINVAL && err != EBUSY,
+		  "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
+		  err, str_error_r(err, sbuf, sizeof(sbuf)));
+
+	/* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
+	while (1) {
+		fd = sys_perf_event_open(&attr, pid, cpu, -1, 0);
+		if (fd < 0 && pid == -1 && errno == EACCES) {
+			pid = 0;
+			continue;
+		}
+		break;
+	}
+	err = errno;
+
+	if (fd >= 0)
+		close(fd);
+
+	if (WARN_ONCE(fd < 0 && err != EBUSY,
+		      "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
+		      err, str_error_r(err, sbuf, sizeof(sbuf))))
+		return -1;
+
+	return 0;
+}
+
+unsigned long perf_event_open_cloexec_flag(void)
+{
+	static bool probed;
+
+	if (!probed) {
+		if (perf_flag_probe() <= 0)
+			flag = 0;
+		probed = true;
+	}
+
+	return flag;
+}
diff --git a/util/cloexec.h b/util/cloexec.h
new file mode 100644
index 0000000..78216b1
--- /dev/null
+++ b/util/cloexec.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CLOEXEC_H
+#define __PERF_CLOEXEC_H
+
+unsigned long perf_event_open_cloexec_flag(void);
+
+#endif /* __PERF_CLOEXEC_H */
diff --git a/util/color.c b/util/color.c
new file mode 100644
index 0000000..39e628b
--- /dev/null
+++ b/util/color.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "cache.h"
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_use_color_default = -1;
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+	if (value) {
+		if (!strcasecmp(value, "never"))
+			return 0;
+		if (!strcasecmp(value, "always"))
+			return 1;
+		if (!strcasecmp(value, "auto"))
+			goto auto_color;
+	}
+
+	/* Missing or explicit false to turn off colorization */
+	if (!perf_config_bool(var, value))
+		return 0;
+
+	/* any normal truth value defaults to 'auto' */
+ auto_color:
+	if (stdout_is_tty < 0)
+		stdout_is_tty = isatty(1);
+	if (stdout_is_tty || pager_in_use()) {
+		char *term = getenv("TERM");
+		if (term && strcmp(term, "dumb"))
+			return 1;
+	}
+	return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value,
+			      void *cb __maybe_unused)
+{
+	if (!strcmp(var, "color.ui")) {
+		perf_use_color_default = perf_config_colorbool(var, value, -1);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int __color_vsnprintf(char *bf, size_t size, const char *color,
+			     const char *fmt, va_list args, const char *trail)
+{
+	int r = 0;
+
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(1) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
+		r += scnprintf(bf, size, "%s", color);
+	r += vscnprintf(bf + r, size - r, fmt, args);
+	if (perf_use_color_default && *color)
+		r += scnprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
+	if (trail)
+		r += scnprintf(bf + r, size - r, "%s", trail);
+	return r;
+}
+
+/* Colors are not included in return value */
+static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
+		va_list args)
+{
+	int r = 0;
+
+	/*
+	 * Auto-detect:
+	 */
+	if (perf_use_color_default < 0) {
+		if (isatty(fileno(fp)) || pager_in_use())
+			perf_use_color_default = 1;
+		else
+			perf_use_color_default = 0;
+	}
+
+	if (perf_use_color_default && *color)
+		fprintf(fp, "%s", color);
+	r += vfprintf(fp, fmt, args);
+	if (perf_use_color_default && *color)
+		fprintf(fp, "%s", PERF_COLOR_RESET);
+	return r;
+}
+
+int color_vsnprintf(char *bf, size_t size, const char *color,
+		    const char *fmt, va_list args)
+{
+	return __color_vsnprintf(bf, size, color, fmt, args, NULL);
+}
+
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
+{
+	return __color_vfprintf(fp, color, fmt, args);
+}
+
+int color_snprintf(char *bf, size_t size, const char *color,
+		   const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = color_vsnprintf(bf, size, color, fmt, args);
+	va_end(args);
+	return r;
+}
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = color_vfprintf(fp, color, fmt, args);
+	va_end(args);
+	return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+		size_t count, const char *buf)
+{
+	if (!*color)
+		return fwrite(buf, count, 1, fp) != 1;
+
+	while (count) {
+		char *p = memchr(buf, '\n', count);
+
+		if (p != buf && (fputs(color, fp) < 0 ||
+				fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 ||
+				fputs(PERF_COLOR_RESET, fp) < 0))
+			return -1;
+		if (!p)
+			return 0;
+		if (fputc('\n', fp) < 0)
+			return -1;
+		count -= p + 1 - buf;
+		buf = p + 1;
+	}
+	return 0;
+}
+
+const char *get_percent_color(double percent)
+{
+	const char *color = PERF_COLOR_NORMAL;
+
+	/*
+	 * We color high-overhead entries in red, mid-overhead
+	 * entries in green - and keep the low overhead places
+	 * normal:
+	 */
+	if (fabs(percent) >= MIN_RED)
+		color = PERF_COLOR_RED;
+	else {
+		if (fabs(percent) > MIN_GREEN)
+			color = PERF_COLOR_GREEN;
+	}
+	return color;
+}
+
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent)
+{
+	int r;
+	const char *color;
+
+	color = get_percent_color(percent);
+	r = color_fprintf(fp, color, fmt, percent);
+
+	return r;
+}
+
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value)
+{
+	const char *color = get_percent_color(value);
+	return color_snprintf(bf, size, color, fmt, value);
+}
+
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	double percent;
+
+	va_start(args, fmt);
+	percent = va_arg(args, double);
+	va_end(args);
+	return value_color_snprintf(bf, size, fmt, percent);
+}
+
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	double percent;
+	const char *color;
+
+	va_start(args, fmt);
+	len = va_arg(args, int);
+	percent = va_arg(args, double);
+	va_end(args);
+
+	color = get_percent_color(percent);
+	return color_snprintf(bf, size, color, fmt, len, percent);
+}
diff --git a/util/color.h b/util/color.h
new file mode 100644
index 0000000..22777b1
--- /dev/null
+++ b/util/color.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COLOR_H
+#define __PERF_COLOR_H
+
+#include <stdio.h>
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL	""
+#define PERF_COLOR_RESET	"\033[m"
+#define PERF_COLOR_BOLD		"\033[1m"
+#define PERF_COLOR_RED		"\033[31m"
+#define PERF_COLOR_GREEN	"\033[32m"
+#define PERF_COLOR_YELLOW	"\033[33m"
+#define PERF_COLOR_BLUE		"\033[34m"
+#define PERF_COLOR_MAGENTA	"\033[35m"
+#define PERF_COLOR_CYAN		"\033[36m"
+#define PERF_COLOR_BG_RED	"\033[41m"
+
+#define MIN_GREEN	0.5
+#define MIN_RED		5.0
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+int color_vsnprintf(char *bf, size_t size, const char *color,
+		    const char *fmt, va_list args);
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
+const char *get_percent_color(double percent);
+
+#endif /* __PERF_COLOR_H */
diff --git a/util/comm.c b/util/comm.c
new file mode 100644
index 0000000..7798a2c
--- /dev/null
+++ b/util/comm.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "comm.h"
+#include "util.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/refcount.h>
+#include "rwsem.h"
+
+struct comm_str {
+	char *str;
+	struct rb_node rb_node;
+	refcount_t refcnt;
+};
+
+/* Should perhaps be moved to struct machine */
+static struct rb_root comm_str_root;
+static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
+
+static struct comm_str *comm_str__get(struct comm_str *cs)
+{
+	if (cs)
+		refcount_inc(&cs->refcnt);
+	return cs;
+}
+
+static void comm_str__put(struct comm_str *cs)
+{
+	if (cs && refcount_dec_and_test(&cs->refcnt)) {
+		down_write(&comm_str_lock);
+		rb_erase(&cs->rb_node, &comm_str_root);
+		up_write(&comm_str_lock);
+		zfree(&cs->str);
+		free(cs);
+	}
+}
+
+static struct comm_str *comm_str__alloc(const char *str)
+{
+	struct comm_str *cs;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs)
+		return NULL;
+
+	cs->str = strdup(str);
+	if (!cs->str) {
+		free(cs);
+		return NULL;
+	}
+
+	refcount_set(&cs->refcnt, 1);
+
+	return cs;
+}
+
+static
+struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct comm_str *iter, *new;
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct comm_str, rb_node);
+
+		cmp = strcmp(str, iter->str);
+		if (!cmp)
+			return comm_str__get(iter);
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	new = comm_str__alloc(str);
+	if (!new)
+		return NULL;
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+
+	return new;
+}
+
+static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+{
+	struct comm_str *cs;
+
+	down_write(&comm_str_lock);
+	cs = __comm_str__findnew(str, root);
+	up_write(&comm_str_lock);
+
+	return cs;
+}
+
+struct comm *comm__new(const char *str, u64 timestamp, bool exec)
+{
+	struct comm *comm = zalloc(sizeof(*comm));
+
+	if (!comm)
+		return NULL;
+
+	comm->start = timestamp;
+	comm->exec = exec;
+
+	comm->comm_str = comm_str__findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		free(comm);
+		return NULL;
+	}
+
+	return comm;
+}
+
+int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec)
+{
+	struct comm_str *new, *old = comm->comm_str;
+
+	new = comm_str__findnew(str, &comm_str_root);
+	if (!new)
+		return -ENOMEM;
+
+	comm_str__put(old);
+	comm->comm_str = new;
+	comm->start = timestamp;
+	if (exec)
+		comm->exec = true;
+
+	return 0;
+}
+
+void comm__free(struct comm *comm)
+{
+	comm_str__put(comm->comm_str);
+	free(comm);
+}
+
+const char *comm__str(const struct comm *comm)
+{
+	return comm->comm_str->str;
+}
diff --git a/util/comm.h b/util/comm.h
new file mode 100644
index 0000000..3e5c438
--- /dev/null
+++ b/util/comm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COMM_H
+#define __PERF_COMM_H
+
+#include "../perf.h"
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+struct comm_str;
+
+struct comm {
+	struct comm_str *comm_str;
+	u64 start;
+	struct list_head list;
+	bool exec;
+	union { /* Tool specific area */
+		void	*priv;
+		u64	db_id;
+	};
+};
+
+void comm__free(struct comm *comm);
+struct comm *comm__new(const char *str, u64 timestamp, bool exec);
+const char *comm__str(const struct comm *comm);
+int comm__override(struct comm *comm, const char *str, u64 timestamp,
+		   bool exec);
+
+#endif  /* __PERF_COMM_H */
diff --git a/util/compress.h b/util/compress.h
new file mode 100644
index 0000000..ecca688
--- /dev/null
+++ b/util/compress.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_COMPRESS_H
+#define PERF_COMPRESS_H
+
+#ifdef HAVE_ZLIB_SUPPORT
+int gzip_decompress_to_file(const char *input, int output_fd);
+#endif
+
+#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_to_file(const char *input, int output_fd);
+#endif
+
+#endif /* PERF_COMPRESS_H */
diff --git a/util/config.c b/util/config.c
new file mode 100644
index 0000000..84eb939
--- /dev/null
+++ b/util/config.c
@@ -0,0 +1,828 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * config.c
+ *
+ * Helper functions for parsing config items.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include <errno.h>
+#include <sys/param.h>
+#include "util.h"
+#include "cache.h"
+#include <subcmd/exec-cmd.h>
+#include "util/hist.h"  /* perf_hist_config */
+#include "util/llvm-utils.h"   /* perf_llvm_config */
+#include "config.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/string.h>
+
+#include "sane_ctype.h"
+
+#define MAXNAME (256)
+
+#define DEBUG_CACHE_DIR ".debug"
+
+
+char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+static struct perf_config_set *config_set;
+
+const char *config_exclusive_filename;
+
+static int get_next_char(void)
+{
+	int c;
+	FILE *f;
+
+	c = '\n';
+	if ((f = config_file) != NULL) {
+		c = fgetc(f);
+		if (c == '\r') {
+			/* DOS like systems */
+			c = fgetc(f);
+			if (c != '\n') {
+				ungetc(c, f);
+				c = '\r';
+			}
+		}
+		if (c == '\n')
+			config_linenr++;
+		if (c == EOF) {
+			config_file_eof = 1;
+			c = '\n';
+		}
+	}
+	return c;
+}
+
+static char *parse_value(void)
+{
+	static char value[1024];
+	int quote = 0, comment = 0, space = 0;
+	size_t len = 0;
+
+	for (;;) {
+		int c = get_next_char();
+
+		if (len >= sizeof(value) - 1)
+			return NULL;
+		if (c == '\n') {
+			if (quote)
+				return NULL;
+			value[len] = 0;
+			return value;
+		}
+		if (comment)
+			continue;
+		if (isspace(c) && !quote) {
+			space = 1;
+			continue;
+		}
+		if (!quote) {
+			if (c == ';' || c == '#') {
+				comment = 1;
+				continue;
+			}
+		}
+		if (space) {
+			if (len)
+				value[len++] = ' ';
+			space = 0;
+		}
+		if (c == '\\') {
+			c = get_next_char();
+			switch (c) {
+			case '\n':
+				continue;
+			case 't':
+				c = '\t';
+				break;
+			case 'b':
+				c = '\b';
+				break;
+			case 'n':
+				c = '\n';
+				break;
+			/* Some characters escape as themselves */
+			case '\\': case '"':
+				break;
+			/* Reject unknown escape sequences */
+			default:
+				return NULL;
+			}
+			value[len++] = c;
+			continue;
+		}
+		if (c == '"') {
+			quote = 1-quote;
+			continue;
+		}
+		value[len++] = c;
+	}
+}
+
+static inline int iskeychar(int c)
+{
+	return isalnum(c) || c == '-' || c == '_';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+	int c;
+	char *value;
+
+	/* Get the full name */
+	for (;;) {
+		c = get_next_char();
+		if (config_file_eof)
+			break;
+		if (!iskeychar(c))
+			break;
+		name[len++] = c;
+		if (len >= MAXNAME)
+			return -1;
+	}
+	name[len] = 0;
+	while (c == ' ' || c == '\t')
+		c = get_next_char();
+
+	value = NULL;
+	if (c != '\n') {
+		if (c != '=')
+			return -1;
+		value = parse_value();
+		if (!value)
+			return -1;
+	}
+	return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+	do {
+		if (c == '\n')
+			return -1;
+		c = get_next_char();
+	} while (isspace(c));
+
+	/* We require the format to be '[base "extension"]' */
+	if (c != '"')
+		return -1;
+	name[baselen++] = '.';
+
+	for (;;) {
+		int ch = get_next_char();
+
+		if (ch == '\n')
+			return -1;
+		if (ch == '"')
+			break;
+		if (ch == '\\') {
+			ch = get_next_char();
+			if (ch == '\n')
+				return -1;
+		}
+		name[baselen++] = ch;
+		if (baselen > MAXNAME / 2)
+			return -1;
+	}
+
+	/* Final ']' */
+	if (get_next_char() != ']')
+		return -1;
+	return baselen;
+}
+
+static int get_base_var(char *name)
+{
+	int baselen = 0;
+
+	for (;;) {
+		int c = get_next_char();
+		if (config_file_eof)
+			return -1;
+		if (c == ']')
+			return baselen;
+		if (isspace(c))
+			return get_extended_base_var(name, baselen, c);
+		if (!iskeychar(c) && c != '.')
+			return -1;
+		if (baselen > MAXNAME / 2)
+			return -1;
+		name[baselen++] = tolower(c);
+	}
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+	int comment = 0;
+	int baselen = 0;
+	static char var[MAXNAME];
+
+	/* U+FEFF Byte Order Mark in UTF8 */
+	static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+	const unsigned char *bomptr = utf8_bom;
+
+	for (;;) {
+		int line, c = get_next_char();
+
+		if (bomptr && *bomptr) {
+			/* We are at the file beginning; skip UTF8-encoded BOM
+			 * if present. Sane editors won't put this in on their
+			 * own, but e.g. Windows Notepad will do it happily. */
+			if ((unsigned char) c == *bomptr) {
+				bomptr++;
+				continue;
+			} else {
+				/* Do not tolerate partial BOM. */
+				if (bomptr != utf8_bom)
+					break;
+				/* No BOM at file beginning. Cool. */
+				bomptr = NULL;
+			}
+		}
+		if (c == '\n') {
+			if (config_file_eof)
+				return 0;
+			comment = 0;
+			continue;
+		}
+		if (comment || isspace(c))
+			continue;
+		if (c == '#' || c == ';') {
+			comment = 1;
+			continue;
+		}
+		if (c == '[') {
+			baselen = get_base_var(var);
+			if (baselen <= 0)
+				break;
+			var[baselen++] = '.';
+			var[baselen] = 0;
+			continue;
+		}
+		if (!isalpha(c))
+			break;
+		var[baselen] = tolower(c);
+
+		/*
+		 * The get_value function might or might not reach the '\n',
+		 * so saving the current line number for error reporting.
+		 */
+		line = config_linenr;
+		if (get_value(fn, data, var, baselen+1) < 0) {
+			config_linenr = line;
+			break;
+		}
+	}
+	pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+	return -1;
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+	if (!*end)
+		return 1;
+	else if (!strcasecmp(end, "k")) {
+		*val *= 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "m")) {
+		*val *= 1024 * 1024;
+		return 1;
+	}
+	else if (!strcasecmp(end, "g")) {
+		*val *= 1024 * 1024 * 1024;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_llong(const char *value, long long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long long val = strtoll(value, &end, 0);
+		unsigned long factor = 1;
+
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+	if (value && *value) {
+		char *end;
+		long val = strtol(value, &end, 0);
+		unsigned long factor = 1;
+		if (!parse_unit_factor(end, &factor))
+			return 0;
+		*ret = val * factor;
+		return 1;
+	}
+	return 0;
+}
+
+static void bad_config(const char *name)
+{
+	if (config_file_name)
+		pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+	else
+		pr_warning("bad config value for '%s', ignoring...\n", name);
+}
+
+int perf_config_u64(u64 *dest, const char *name, const char *value)
+{
+	long long ret = 0;
+
+	if (!perf_parse_llong(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+
+	*dest = ret;
+	return 0;
+}
+
+int perf_config_int(int *dest, const char *name, const char *value)
+{
+	long ret = 0;
+	if (!perf_parse_long(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+	*dest = ret;
+	return 0;
+}
+
+static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+	int ret;
+
+	*is_bool = 1;
+	if (!value)
+		return 1;
+	if (!*value)
+		return 0;
+	if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+		return 1;
+	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+		return 0;
+	*is_bool = 0;
+	return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+	int discard;
+	return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+static const char *perf_config_dirname(const char *name, const char *value)
+{
+	if (!name)
+		return NULL;
+	return value;
+}
+
+static int perf_buildid_config(const char *var, const char *value)
+{
+	/* same dir for all commands */
+	if (!strcmp(var, "buildid.dir")) {
+		const char *dir = perf_config_dirname(var, value);
+
+		if (!dir) {
+			pr_err("Invalid buildid directory!\n");
+			return -1;
+		}
+		strncpy(buildid_dir, dir, MAXPATHLEN-1);
+		buildid_dir[MAXPATHLEN-1] = '\0';
+	}
+
+	return 0;
+}
+
+static int perf_default_core_config(const char *var __maybe_unused,
+				    const char *value __maybe_unused)
+{
+	/* Add other config variables here. */
+	return 0;
+}
+
+static int perf_ui_config(const char *var, const char *value)
+{
+	/* Add other config variables here. */
+	if (!strcmp(var, "ui.show-headers"))
+		symbol_conf.show_hist_headers = perf_config_bool(var, value);
+
+	return 0;
+}
+
+int perf_default_config(const char *var, const char *value,
+			void *dummy __maybe_unused)
+{
+	if (strstarts(var, "core."))
+		return perf_default_core_config(var, value);
+
+	if (strstarts(var, "hist."))
+		return perf_hist_config(var, value);
+
+	if (strstarts(var, "ui."))
+		return perf_ui_config(var, value);
+
+	if (strstarts(var, "call-graph."))
+		return perf_callchain_config(var, value);
+
+	if (strstarts(var, "llvm."))
+		return perf_llvm_config(var, value);
+
+	if (strstarts(var, "buildid."))
+		return perf_buildid_config(var, value);
+
+	/* Add other config variables here. */
+	return 0;
+}
+
+static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+	int ret;
+	FILE *f = fopen(filename, "r");
+
+	ret = -1;
+	if (f) {
+		config_file = f;
+		config_file_name = filename;
+		config_linenr = 1;
+		config_file_eof = 0;
+		ret = perf_parse_file(fn, data);
+		fclose(f);
+		config_file_name = NULL;
+	}
+	return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+	static const char *system_wide;
+	if (!system_wide)
+		system_wide = system_path(ETC_PERFCONFIG);
+	return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+	const char *v = getenv(k);
+	return v ? perf_config_bool(k, v) : def;
+}
+
+static int perf_config_system(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+static int perf_config_global(void)
+{
+	return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+static struct perf_config_section *find_section(struct list_head *sections,
+						const char *section_name)
+{
+	struct perf_config_section *section;
+
+	list_for_each_entry(section, sections, node)
+		if (!strcmp(section->name, section_name))
+			return section;
+
+	return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+						 struct perf_config_section *section)
+{
+	struct perf_config_item *item;
+
+	list_for_each_entry(item, &section->items, node)
+		if (!strcmp(item->name, name))
+			return item;
+
+	return NULL;
+}
+
+static struct perf_config_section *add_section(struct list_head *sections,
+					       const char *section_name)
+{
+	struct perf_config_section *section = zalloc(sizeof(*section));
+
+	if (!section)
+		return NULL;
+
+	INIT_LIST_HEAD(&section->items);
+	section->name = strdup(section_name);
+	if (!section->name) {
+		pr_debug("%s: strdup failed\n", __func__);
+		free(section);
+		return NULL;
+	}
+
+	list_add_tail(&section->node, sections);
+	return section;
+}
+
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+						const char *name)
+{
+	struct perf_config_item *item = zalloc(sizeof(*item));
+
+	if (!item)
+		return NULL;
+
+	item->name = strdup(name);
+	if (!item->name) {
+		pr_debug("%s: strdup failed\n", __func__);
+		free(item);
+		return NULL;
+	}
+
+	list_add_tail(&item->node, &section->items);
+	return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+	char *val = strdup(value);
+
+	if (!val)
+		return -1;
+
+	zfree(&item->value);
+	item->value = val;
+	return 0;
+}
+
+static int collect_config(const char *var, const char *value,
+			  void *perf_config_set)
+{
+	int ret = -1;
+	char *ptr, *key;
+	char *section_name, *name;
+	struct perf_config_section *section = NULL;
+	struct perf_config_item *item = NULL;
+	struct perf_config_set *set = perf_config_set;
+	struct list_head *sections;
+
+	if (set == NULL)
+		return -1;
+
+	sections = &set->sections;
+	key = ptr = strdup(var);
+	if (!key) {
+		pr_debug("%s: strdup failed\n", __func__);
+		return -1;
+	}
+
+	section_name = strsep(&ptr, ".");
+	name = ptr;
+	if (name == NULL || value == NULL)
+		goto out_free;
+
+	section = find_section(sections, section_name);
+	if (!section) {
+		section = add_section(sections, section_name);
+		if (!section)
+			goto out_free;
+	}
+
+	item = find_config_item(name, section);
+	if (!item) {
+		item = add_config_item(section, name);
+		if (!item)
+			goto out_free;
+	}
+
+	/* perf_config_set can contain both user and system config items.
+	 * So we should know where each value is from.
+	 * The classification would be needed when a particular config file
+	 * is overwrited by setting feature i.e. set_config().
+	 */
+	if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
+		section->from_system_config = true;
+		item->from_system_config = true;
+	} else {
+		section->from_system_config = false;
+		item->from_system_config = false;
+	}
+
+	ret = set_value(item, value);
+	return ret;
+
+out_free:
+	free(key);
+	return -1;
+}
+
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value)
+{
+	config_file_name = file_name;
+	return collect_config(var, value, set);
+}
+
+static int perf_config_set__init(struct perf_config_set *set)
+{
+	int ret = -1;
+	const char *home = NULL;
+	char *user_config;
+	struct stat st;
+
+	/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+	if (config_exclusive_filename)
+		return perf_config_from_file(collect_config, config_exclusive_filename, set);
+	if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+		if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+			goto out;
+	}
+
+	home = getenv("HOME");
+
+	/*
+	 * Skip reading user config if:
+	 *   - there is no place to read it from (HOME)
+	 *   - we are asked not to (PERF_CONFIG_NOGLOBAL=1)
+	 */
+	if (!home || !*home || !perf_config_global())
+		return 0;
+
+	user_config = strdup(mkpath("%s/.perfconfig", home));
+	if (user_config == NULL) {
+		pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
+		goto out;
+	}
+
+	if (stat(user_config, &st) < 0) {
+		if (errno == ENOENT)
+			ret = 0;
+		goto out_free;
+	}
+
+	ret = 0;
+
+	if (st.st_uid && (st.st_uid != geteuid())) {
+		pr_warning("File %s not owned by current user or root, ignoring it.", user_config);
+		goto out_free;
+	}
+
+	if (st.st_size)
+		ret = perf_config_from_file(collect_config, user_config, set);
+
+out_free:
+	free(user_config);
+out:
+	return ret;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+	struct perf_config_set *set = zalloc(sizeof(*set));
+
+	if (set) {
+		INIT_LIST_HEAD(&set->sections);
+		perf_config_set__init(set);
+	}
+
+	return set;
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+	int ret = 0;
+	char key[BUFSIZ];
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+
+	if (config_set == NULL)
+		return -1;
+
+	perf_config_set__for_each_entry(config_set, section, item) {
+		char *value = item->value;
+
+		if (value) {
+			scnprintf(key, sizeof(key), "%s.%s",
+				  section->name, item->name);
+			ret = fn(key, value, data);
+			if (ret < 0) {
+				pr_err("Error: wrong config key-value pair %s=%s\n",
+				       key, value);
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
+void perf_config__init(void)
+{
+	if (config_set == NULL)
+		config_set = perf_config_set__new();
+}
+
+void perf_config__exit(void)
+{
+	perf_config_set__delete(config_set);
+	config_set = NULL;
+}
+
+void perf_config__refresh(void)
+{
+	perf_config__exit();
+	perf_config__init();
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+	zfree(&item->name);
+	zfree(&item->value);
+	free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+	struct perf_config_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, &section->items, node) {
+		list_del_init(&item->node);
+		perf_config_item__delete(item);
+	}
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+	perf_config_section__purge(section);
+	zfree(&section->name);
+	free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+	struct perf_config_section *section, *tmp;
+
+	list_for_each_entry_safe(section, tmp, &set->sections, node) {
+		list_del_init(&section->node);
+		perf_config_section__delete(section);
+	}
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+	if (set == NULL)
+		return;
+
+	perf_config_set__purge(set);
+	free(set);
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+	pr_err("Missing value for '%s'", var);
+	return -1;
+}
+
+void set_buildid_dir(const char *dir)
+{
+	if (dir)
+		scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
+
+	/* default to $HOME/.debug */
+	if (buildid_dir[0] == '\0') {
+		char *home = getenv("HOME");
+
+		if (home) {
+			snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
+				 home, DEBUG_CACHE_DIR);
+		} else {
+			strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
+		}
+		buildid_dir[MAXPATHLEN-1] = '\0';
+	}
+	/* for communicating with external commands */
+	setenv("PERF_BUILDID_DIR", buildid_dir, 1);
+}
diff --git a/util/config.h b/util/config.h
new file mode 100644
index 0000000..baf82bf
--- /dev/null
+++ b/util/config.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+	char *name;
+	char *value;
+	bool from_system_config;
+	struct list_head node;
+};
+
+struct perf_config_section {
+	char *name;
+	struct list_head items;
+	bool from_system_config;
+	struct list_head node;
+};
+
+struct perf_config_set {
+	struct list_head sections;
+};
+
+extern const char *config_exclusive_filename;
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u64(u64 *dest, const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_etc_perfconfig(void);
+
+struct perf_config_set *perf_config_set__new(void);
+void perf_config_set__delete(struct perf_config_set *set);
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+			     const char *var, const char *value);
+void perf_config__init(void);
+void perf_config__exit(void);
+void perf_config__refresh(void);
+
+/**
+ * perf_config_sections__for_each - iterate thru all the sections
+ * @list: list_head instance to iterate
+ * @section: struct perf_config_section iterator
+ */
+#define perf_config_sections__for_each_entry(list, section)	\
+        list_for_each_entry(section, list, node)
+
+/**
+ * perf_config_items__for_each - iterate thru all the items
+ * @list: list_head instance to iterate
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_items__for_each_entry(list, item)	\
+        list_for_each_entry(item, list, node)
+
+/**
+ * perf_config_set__for_each - iterate thru all the config section-item pairs
+ * @set: evlist instance to iterate
+ * @section: struct perf_config_section iterator
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_set__for_each_entry(set, section, item)			\
+	perf_config_sections__for_each_entry(&set->sections, section)		\
+	perf_config_items__for_each_entry(&section->items, item)
+
+#endif /* __PERF_CONFIG_H */
diff --git a/util/counts.c b/util/counts.c
new file mode 100644
index 0000000..03032b4
--- /dev/null
+++ b/util/counts.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include "evsel.h"
+#include "counts.h"
+#include "util.h"
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads)
+{
+	struct perf_counts *counts = zalloc(sizeof(*counts));
+
+	if (counts) {
+		struct xyarray *values;
+
+		values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
+		if (!values) {
+			free(counts);
+			return NULL;
+		}
+
+		counts->values = values;
+	}
+
+	return counts;
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+	if (counts) {
+		xyarray__delete(counts->values);
+		free(counts);
+	}
+}
+
+static void perf_counts__reset(struct perf_counts *counts)
+{
+	xyarray__reset(counts->values);
+}
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel)
+{
+	perf_counts__reset(evsel->counts);
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	evsel->counts = perf_counts__new(ncpus, nthreads);
+	return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+	perf_counts__delete(evsel->counts);
+	evsel->counts = NULL;
+}
diff --git a/util/counts.h b/util/counts.h
new file mode 100644
index 0000000..0d1050c
--- /dev/null
+++ b/util/counts.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COUNTS_H
+#define __PERF_COUNTS_H
+
+#include "xyarray.h"
+
+struct perf_counts_values {
+	union {
+		struct {
+			u64 val;
+			u64 ena;
+			u64 run;
+		};
+		u64 values[3];
+	};
+	bool	loaded;
+};
+
+struct perf_counts {
+	s8			  scaled;
+	struct perf_counts_values aggr;
+	struct xyarray		  *values;
+};
+
+
+static inline struct perf_counts_values*
+perf_counts(struct perf_counts *counts, int cpu, int thread)
+{
+	return xyarray__entry(counts->values, cpu, thread);
+}
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads);
+void perf_counts__delete(struct perf_counts *counts);
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
+
+#endif /* __PERF_COUNTS_H */
diff --git a/util/cpumap.c b/util/cpumap.c
new file mode 100644
index 0000000..1ccbd33
--- /dev/null
+++ b/util/cpumap.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include <api/fs/fs.h>
+#include "../perf.h"
+#include "cpumap.h"
+#include <assert.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include "asm/bug.h"
+
+#include "sane_ctype.h"
+
+static int max_cpu_num;
+static int max_present_cpu_num;
+static int max_node_num;
+static int *cpunode_map;
+
+static struct cpu_map *cpu_map__default_new(void)
+{
+	struct cpu_map *cpus;
+	int nr_cpus;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (nr_cpus < 0)
+		return NULL;
+
+	cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+	if (cpus != NULL) {
+		int i;
+		for (i = 0; i < nr_cpus; ++i)
+			cpus->map[i] = i;
+
+		cpus->nr = nr_cpus;
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+{
+	size_t payload_size = nr_cpus * sizeof(int);
+	struct cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+
+	if (cpus != NULL) {
+		cpus->nr = nr_cpus;
+		memcpy(cpus->map, tmp_cpus, payload_size);
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+struct cpu_map *cpu_map__read(FILE *file)
+{
+	struct cpu_map *cpus = NULL;
+	int nr_cpus = 0;
+	int *tmp_cpus = NULL, *tmp;
+	int max_entries = 0;
+	int n, cpu, prev;
+	char sep;
+
+	sep = 0;
+	prev = -1;
+	for (;;) {
+		n = fscanf(file, "%u%c", &cpu, &sep);
+		if (n <= 0)
+			break;
+		if (prev >= 0) {
+			int new_max = nr_cpus + cpu - prev - 1;
+
+			if (new_max >= max_entries) {
+				max_entries = new_max + MAX_NR_CPUS / 2;
+				tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+				if (tmp == NULL)
+					goto out_free_tmp;
+				tmp_cpus = tmp;
+			}
+
+			while (++prev < cpu)
+				tmp_cpus[nr_cpus++] = prev;
+		}
+		if (nr_cpus == max_entries) {
+			max_entries += MAX_NR_CPUS;
+			tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+			if (tmp == NULL)
+				goto out_free_tmp;
+			tmp_cpus = tmp;
+		}
+
+		tmp_cpus[nr_cpus++] = cpu;
+		if (n == 2 && sep == '-')
+			prev = cpu;
+		else
+			prev = -1;
+		if (n == 1 || sep == '\n')
+			break;
+	}
+
+	if (nr_cpus > 0)
+		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+	else
+		cpus = cpu_map__default_new();
+out_free_tmp:
+	free(tmp_cpus);
+	return cpus;
+}
+
+static struct cpu_map *cpu_map__read_all_cpu_map(void)
+{
+	struct cpu_map *cpus = NULL;
+	FILE *onlnf;
+
+	onlnf = fopen("/sys/devices/system/cpu/online", "r");
+	if (!onlnf)
+		return cpu_map__default_new();
+
+	cpus = cpu_map__read(onlnf);
+	fclose(onlnf);
+	return cpus;
+}
+
+struct cpu_map *cpu_map__new(const char *cpu_list)
+{
+	struct cpu_map *cpus = NULL;
+	unsigned long start_cpu, end_cpu = 0;
+	char *p = NULL;
+	int i, nr_cpus = 0;
+	int *tmp_cpus = NULL, *tmp;
+	int max_entries = 0;
+
+	if (!cpu_list)
+		return cpu_map__read_all_cpu_map();
+
+	if (!isdigit(*cpu_list))
+		goto out;
+
+	while (isdigit(*cpu_list)) {
+		p = NULL;
+		start_cpu = strtoul(cpu_list, &p, 0);
+		if (start_cpu >= INT_MAX
+		    || (*p != '\0' && *p != ',' && *p != '-'))
+			goto invalid;
+
+		if (*p == '-') {
+			cpu_list = ++p;
+			p = NULL;
+			end_cpu = strtoul(cpu_list, &p, 0);
+
+			if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
+				goto invalid;
+
+			if (end_cpu < start_cpu)
+				goto invalid;
+		} else {
+			end_cpu = start_cpu;
+		}
+
+		for (; start_cpu <= end_cpu; start_cpu++) {
+			/* check for duplicates */
+			for (i = 0; i < nr_cpus; i++)
+				if (tmp_cpus[i] == (int)start_cpu)
+					goto invalid;
+
+			if (nr_cpus == max_entries) {
+				max_entries += MAX_NR_CPUS;
+				tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+				if (tmp == NULL)
+					goto invalid;
+				tmp_cpus = tmp;
+			}
+			tmp_cpus[nr_cpus++] = (int)start_cpu;
+		}
+		if (*p)
+			++p;
+
+		cpu_list = p;
+	}
+
+	if (nr_cpus > 0)
+		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+	else
+		cpus = cpu_map__default_new();
+invalid:
+	free(tmp_cpus);
+out:
+	return cpus;
+}
+
+static struct cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
+{
+	struct cpu_map *map;
+
+	map = cpu_map__empty_new(cpus->nr);
+	if (map) {
+		unsigned i;
+
+		for (i = 0; i < cpus->nr; i++) {
+			/*
+			 * Special treatment for -1, which is not real cpu number,
+			 * and we need to use (int) -1 to initialize map[i],
+			 * otherwise it would become 65535.
+			 */
+			if (cpus->cpu[i] == (u16) -1)
+				map->map[i] = -1;
+			else
+				map->map[i] = (int) cpus->cpu[i];
+		}
+	}
+
+	return map;
+}
+
+static struct cpu_map *cpu_map__from_mask(struct cpu_map_mask *mask)
+{
+	struct cpu_map *map;
+	int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE;
+
+	nr = bitmap_weight(mask->mask, nbits);
+
+	map = cpu_map__empty_new(nr);
+	if (map) {
+		int cpu, i = 0;
+
+		for_each_set_bit(cpu, mask->mask, nbits)
+			map->map[i++] = cpu;
+	}
+	return map;
+
+}
+
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data)
+{
+	if (data->type == PERF_CPU_MAP__CPUS)
+		return cpu_map__from_entries((struct cpu_map_entries *)data->data);
+	else
+		return cpu_map__from_mask((struct cpu_map_mask *)data->data);
+}
+
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
+{
+#define BUFSIZE 1024
+	char buf[BUFSIZE];
+
+	cpu_map__snprint(map, buf, sizeof(buf));
+	return fprintf(fp, "%s\n", buf);
+#undef BUFSIZE
+}
+
+struct cpu_map *cpu_map__dummy_new(void)
+{
+	struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+
+	if (cpus != NULL) {
+		cpus->nr = 1;
+		cpus->map[0] = -1;
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+struct cpu_map *cpu_map__empty_new(int nr)
+{
+	struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
+
+	if (cpus != NULL) {
+		int i;
+
+		cpus->nr = nr;
+		for (i = 0; i < nr; i++)
+			cpus->map[i] = -1;
+
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
+static void cpu_map__delete(struct cpu_map *map)
+{
+	if (map) {
+		WARN_ONCE(refcount_read(&map->refcnt) != 0,
+			  "cpu_map refcnt unbalanced\n");
+		free(map);
+	}
+}
+
+struct cpu_map *cpu_map__get(struct cpu_map *map)
+{
+	if (map)
+		refcount_inc(&map->refcnt);
+	return map;
+}
+
+void cpu_map__put(struct cpu_map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		cpu_map__delete(map);
+}
+
+static int cpu__get_topology_int(int cpu, const char *name, int *value)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, PATH_MAX,
+		"devices/system/cpu/cpu%d/topology/%s", cpu, name);
+
+	return sysfs__read_int(path, value);
+}
+
+int cpu_map__get_socket_id(int cpu)
+{
+	int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value);
+	return ret ?: value;
+}
+
+int cpu_map__get_socket(struct cpu_map *map, int idx, void *data __maybe_unused)
+{
+	int cpu;
+
+	if (idx > map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	return cpu_map__get_socket_id(cpu);
+}
+
+static int cmp_ids(const void *a, const void *b)
+{
+	return *(int *)a - *(int *)b;
+}
+
+int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+		       int (*f)(struct cpu_map *map, int cpu, void *data),
+		       void *data)
+{
+	struct cpu_map *c;
+	int nr = cpus->nr;
+	int cpu, s1, s2;
+
+	/* allocate as much as possible */
+	c = calloc(1, sizeof(*c) + nr * sizeof(int));
+	if (!c)
+		return -1;
+
+	for (cpu = 0; cpu < nr; cpu++) {
+		s1 = f(cpus, cpu, data);
+		for (s2 = 0; s2 < c->nr; s2++) {
+			if (s1 == c->map[s2])
+				break;
+		}
+		if (s2 == c->nr) {
+			c->map[c->nr] = s1;
+			c->nr++;
+		}
+	}
+	/* ensure we process id in increasing order */
+	qsort(c->map, c->nr, sizeof(int), cmp_ids);
+
+	refcount_set(&c->refcnt, 1);
+	*res = c;
+	return 0;
+}
+
+int cpu_map__get_core_id(int cpu)
+{
+	int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
+	return ret ?: value;
+}
+
+int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
+{
+	int cpu, s;
+
+	if (idx > map->nr)
+		return -1;
+
+	cpu = map->map[idx];
+
+	cpu = cpu_map__get_core_id(cpu);
+
+	s = cpu_map__get_socket(map, idx, data);
+	if (s == -1)
+		return -1;
+
+	/*
+	 * encode socket in upper 16 bits
+	 * core_id is relative to socket, and
+	 * we need a global id. So we combine
+	 * socket+ core id
+	 */
+	return (s << 16) | (cpu & 0xffff);
+}
+
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
+{
+	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
+}
+
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
+{
+	return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
+}
+
+/* setup simple routines to easily access node numbers given a cpu number */
+static int get_max_num(char *path, int *max)
+{
+	size_t num;
+	char *buf;
+	int err = 0;
+
+	if (filename__read_str(path, &buf, &num))
+		return -1;
+
+	buf[num] = '\0';
+
+	/* start on the right, to find highest node num */
+	while (--num) {
+		if ((buf[num] == ',') || (buf[num] == '-')) {
+			num++;
+			break;
+		}
+	}
+	if (sscanf(&buf[num], "%d", max) < 1) {
+		err = -1;
+		goto out;
+	}
+
+	/* convert from 0-based to 1-based */
+	(*max)++;
+
+out:
+	free(buf);
+	return err;
+}
+
+/* Determine highest possible cpu in the system for sparse allocation */
+static void set_max_cpu_num(void)
+{
+	const char *mnt;
+	char path[PATH_MAX];
+	int ret = -1;
+
+	/* set up default */
+	max_cpu_num = 4096;
+	max_present_cpu_num = 4096;
+
+	mnt = sysfs__mountpoint();
+	if (!mnt)
+		goto out;
+
+	/* get the highest possible cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
+	if (ret == PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_cpu_num);
+	if (ret)
+		goto out;
+
+	/* get the highest present cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+	if (ret == PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_present_cpu_num);
+
+out:
+	if (ret)
+		pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num);
+}
+
+/* Determine highest possible node in the system for sparse allocation */
+static void set_max_node_num(void)
+{
+	const char *mnt;
+	char path[PATH_MAX];
+	int ret = -1;
+
+	/* set up default */
+	max_node_num = 8;
+
+	mnt = sysfs__mountpoint();
+	if (!mnt)
+		goto out;
+
+	/* get the highest possible cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
+	if (ret == PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_node_num);
+
+out:
+	if (ret)
+		pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
+}
+
+int cpu__max_node(void)
+{
+	if (unlikely(!max_node_num))
+		set_max_node_num();
+
+	return max_node_num;
+}
+
+int cpu__max_cpu(void)
+{
+	if (unlikely(!max_cpu_num))
+		set_max_cpu_num();
+
+	return max_cpu_num;
+}
+
+int cpu__max_present_cpu(void)
+{
+	if (unlikely(!max_present_cpu_num))
+		set_max_cpu_num();
+
+	return max_present_cpu_num;
+}
+
+
+int cpu__get_node(int cpu)
+{
+	if (unlikely(cpunode_map == NULL)) {
+		pr_debug("cpu_map not initialized\n");
+		return -1;
+	}
+
+	return cpunode_map[cpu];
+}
+
+static int init_cpunode_map(void)
+{
+	int i;
+
+	set_max_cpu_num();
+	set_max_node_num();
+
+	cpunode_map = calloc(max_cpu_num, sizeof(int));
+	if (!cpunode_map) {
+		pr_err("%s: calloc failed\n", __func__);
+		return -1;
+	}
+
+	for (i = 0; i < max_cpu_num; i++)
+		cpunode_map[i] = -1;
+
+	return 0;
+}
+
+int cpu__setup_cpunode_map(void)
+{
+	struct dirent *dent1, *dent2;
+	DIR *dir1, *dir2;
+	unsigned int cpu, mem;
+	char buf[PATH_MAX];
+	char path[PATH_MAX];
+	const char *mnt;
+	int n;
+
+	/* initialize globals */
+	if (init_cpunode_map())
+		return -1;
+
+	mnt = sysfs__mountpoint();
+	if (!mnt)
+		return 0;
+
+	n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
+	if (n == PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		return -1;
+	}
+
+	dir1 = opendir(path);
+	if (!dir1)
+		return 0;
+
+	/* walk tree and setup map */
+	while ((dent1 = readdir(dir1)) != NULL) {
+		if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1)
+			continue;
+
+		n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
+		if (n == PATH_MAX) {
+			pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+			continue;
+		}
+
+		dir2 = opendir(buf);
+		if (!dir2)
+			continue;
+		while ((dent2 = readdir(dir2)) != NULL) {
+			if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+				continue;
+			cpunode_map[cpu] = mem;
+		}
+		closedir(dir2);
+	}
+	closedir(dir1);
+	return 0;
+}
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu)
+{
+	return cpu_map__idx(cpus, cpu) != -1;
+}
+
+int cpu_map__idx(struct cpu_map *cpus, int cpu)
+{
+	int i;
+
+	for (i = 0; i < cpus->nr; ++i) {
+		if (cpus->map[i] == cpu)
+			return i;
+	}
+
+	return -1;
+}
+
+int cpu_map__cpu(struct cpu_map *cpus, int idx)
+{
+	return cpus->map[idx];
+}
+
+size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size)
+{
+	int i, cpu, start = -1;
+	bool first = true;
+	size_t ret = 0;
+
+#define COMMA first ? "" : ","
+
+	for (i = 0; i < map->nr + 1; i++) {
+		bool last = i == map->nr;
+
+		cpu = last ? INT_MAX : map->map[i];
+
+		if (start == -1) {
+			start = i;
+			if (last) {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d", COMMA,
+						map->map[i]);
+			}
+		} else if (((i - start) != (cpu - map->map[start])) || last) {
+			int end = i - 1;
+
+			if (start == end) {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d", COMMA,
+						map->map[start]);
+			} else {
+				ret += snprintf(buf + ret, size - ret,
+						"%s%d-%d", COMMA,
+						map->map[start], map->map[end]);
+			}
+			first = false;
+			start = i;
+		}
+	}
+
+#undef COMMA
+
+	pr_debug("cpumask list: %s\n", buf);
+	return ret;
+}
+
+static char hex_char(unsigned char val)
+{
+	if (val < 10)
+		return val + '0';
+	if (val < 16)
+		return val - 10 + 'a';
+	return '?';
+}
+
+size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size)
+{
+	int i, cpu;
+	char *ptr = buf;
+	unsigned char *bitmap;
+	int last_cpu = cpu_map__cpu(map, map->nr - 1);
+
+	bitmap = zalloc((last_cpu + 7) / 8);
+	if (bitmap == NULL) {
+		buf[0] = '\0';
+		return 0;
+	}
+
+	for (i = 0; i < map->nr; i++) {
+		cpu = cpu_map__cpu(map, i);
+		bitmap[cpu / 8] |= 1 << (cpu % 8);
+	}
+
+	for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+		unsigned char bits = bitmap[cpu / 8];
+
+		if (cpu % 8)
+			bits >>= 4;
+		else
+			bits &= 0xf;
+
+		*ptr++ = hex_char(bits);
+		if ((cpu % 32) == 0 && cpu > 0)
+			*ptr++ = ',';
+	}
+	*ptr = '\0';
+	free(bitmap);
+
+	buf[size - 1] = '\0';
+	return ptr - buf;
+}
diff --git a/util/cpumap.h b/util/cpumap.h
new file mode 100644
index 0000000..ed8999d
--- /dev/null
+++ b/util/cpumap.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUMAP_H
+#define __PERF_CPUMAP_H
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/refcount.h>
+
+#include "perf.h"
+#include "util/debug.h"
+
+struct cpu_map {
+	refcount_t refcnt;
+	int nr;
+	int map[];
+};
+
+struct cpu_map *cpu_map__new(const char *cpu_list);
+struct cpu_map *cpu_map__empty_new(int nr);
+struct cpu_map *cpu_map__dummy_new(void);
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data);
+struct cpu_map *cpu_map__read(FILE *file);
+size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size);
+size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size);
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+int cpu_map__get_socket_id(int cpu);
+int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
+int cpu_map__get_core_id(int cpu);
+int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
+
+struct cpu_map *cpu_map__get(struct cpu_map *map);
+void cpu_map__put(struct cpu_map *map);
+
+static inline int cpu_map__socket(struct cpu_map *sock, int s)
+{
+	if (!sock || s > sock->nr || s < 0)
+		return 0;
+	return sock->map[s];
+}
+
+static inline int cpu_map__id_to_socket(int id)
+{
+	return id >> 16;
+}
+
+static inline int cpu_map__id_to_cpu(int id)
+{
+	return id & 0xffff;
+}
+
+static inline int cpu_map__nr(const struct cpu_map *map)
+{
+	return map ? map->nr : 1;
+}
+
+static inline bool cpu_map__empty(const struct cpu_map *map)
+{
+	return map ? map->map[0] == -1 : true;
+}
+
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+int cpu__max_cpu(void);
+int cpu__max_present_cpu(void);
+int cpu__get_node(int cpu);
+
+int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+		       int (*f)(struct cpu_map *map, int cpu, void *data),
+		       void *data);
+
+int cpu_map__cpu(struct cpu_map *cpus, int idx);
+bool cpu_map__has(struct cpu_map *cpus, int cpu);
+int cpu_map__idx(struct cpu_map *cpus, int cpu);
+#endif /* __PERF_CPUMAP_H */
diff --git a/util/cs-etm-decoder/Build b/util/cs-etm-decoder/Build
new file mode 100644
index 0000000..bc22c39
--- /dev/null
+++ b/util/cs-etm-decoder/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
diff --git a/util/cs-etm-decoder/cs-etm-decoder.c b/util/cs-etm-decoder/cs-etm-decoder.c
new file mode 100644
index 0000000..c8b98fa
--- /dev/null
+++ b/util/cs-etm-decoder/cs-etm-decoder.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/err.h>
+#include <linux/list.h>
+#include <stdlib.h>
+#include <opencsd/c_api/opencsd_c_api.h>
+#include <opencsd/etmv4/trc_pkt_types_etmv4.h>
+#include <opencsd/ocsd_if_types.h>
+
+#include "cs-etm.h"
+#include "cs-etm-decoder.h"
+#include "intlist.h"
+#include "util.h"
+
+#define MAX_BUFFER 1024
+
+/* use raw logging */
+#ifdef CS_DEBUG_RAW
+#define CS_LOG_RAW_FRAMES
+#ifdef CS_RAW_PACKED
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \
+			    OCSD_DFRMTR_PACKED_RAW_OUT)
+#else
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT)
+#endif
+#endif
+
+struct cs_etm_decoder {
+	void *data;
+	void (*packet_printer)(const char *msg);
+	bool trace_on;
+	dcd_tree_handle_t dcd_tree;
+	cs_etm_mem_cb_type mem_access;
+	ocsd_datapath_resp_t prev_return;
+	u32 packet_count;
+	u32 head;
+	u32 tail;
+	struct cs_etm_packet packet_buffer[MAX_BUFFER];
+};
+
+static u32
+cs_etm_decoder__mem_access(const void *context,
+			   const ocsd_vaddr_t address,
+			   const ocsd_mem_space_acc_t mem_space __maybe_unused,
+			   const u32 req_size,
+			   u8 *buffer)
+{
+	struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+
+	return decoder->mem_access(decoder->data,
+				   address,
+				   req_size,
+				   buffer);
+}
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+				      u64 start, u64 end,
+				      cs_etm_mem_cb_type cb_func)
+{
+	decoder->mem_access = cb_func;
+
+	if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end,
+					 OCSD_MEM_SPACE_ANY,
+					 cs_etm_decoder__mem_access, decoder))
+		return -1;
+
+	return 0;
+}
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
+{
+	ocsd_datapath_resp_t dp_ret;
+
+	decoder->prev_return = OCSD_RESP_CONT;
+
+	dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
+				      0, 0, NULL, NULL);
+	if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
+		return -1;
+
+	return 0;
+}
+
+int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+			       struct cs_etm_packet *packet)
+{
+	if (!decoder || !packet)
+		return -EINVAL;
+
+	/* Nothing to do, might as well just return */
+	if (decoder->packet_count == 0)
+		return 0;
+
+	*packet = decoder->packet_buffer[decoder->head];
+
+	decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
+
+	decoder->packet_count--;
+
+	return 1;
+}
+
+static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
+					     ocsd_etmv4_cfg *config)
+{
+	config->reg_configr = params->etmv4.reg_configr;
+	config->reg_traceidr = params->etmv4.reg_traceidr;
+	config->reg_idr0 = params->etmv4.reg_idr0;
+	config->reg_idr1 = params->etmv4.reg_idr1;
+	config->reg_idr2 = params->etmv4.reg_idr2;
+	config->reg_idr8 = params->etmv4.reg_idr8;
+	config->reg_idr9 = 0;
+	config->reg_idr10 = 0;
+	config->reg_idr11 = 0;
+	config->reg_idr12 = 0;
+	config->reg_idr13 = 0;
+	config->arch_ver = ARCH_V8;
+	config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__print_str_cb(const void *p_context,
+					 const char *msg,
+					 const int str_len)
+{
+	if (p_context && str_len)
+		((struct cs_etm_decoder *)p_context)->packet_printer(msg);
+}
+
+static int
+cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params,
+					 struct cs_etm_decoder *decoder)
+{
+	int ret = 0;
+
+	if (d_params->packet_printer == NULL)
+		return -1;
+
+	decoder->packet_printer = d_params->packet_printer;
+
+	/*
+	 * Set up a library default logger to process any printers
+	 * (packet/raw frame) we add later.
+	 */
+	ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+	if (ret != 0)
+		return -1;
+
+	/* no stdout / err / file output */
+	ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+	if (ret != 0)
+		return -1;
+
+	/*
+	 * Set the string CB for the default logger, passes strings to
+	 * perf print logger.
+	 */
+	ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+					      (void *)decoder,
+					      cs_etm_decoder__print_str_cb);
+	if (ret != 0)
+		ret = -1;
+
+	return 0;
+}
+
+#ifdef CS_LOG_RAW_FRAMES
+static void
+cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params,
+				       struct cs_etm_decoder *decoder)
+{
+	/* Only log these during a --dump operation */
+	if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+		/* set up a library default logger to process the
+		 *  raw frame printer we add later
+		 */
+		ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+
+		/* no stdout / err / file output */
+		ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+
+		/* set the string CB for the default logger,
+		 * passes strings to perf print logger.
+		 */
+		ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+						(void *)decoder,
+						cs_etm_decoder__print_str_cb);
+
+		/* use the built in library printer for the raw frames */
+		ocsd_dt_set_raw_frame_printer(decoder->dcd_tree,
+					      CS_RAW_DEBUG_FLAGS);
+	}
+}
+#else
+static void
+cs_etm_decoder__init_raw_frame_logging(
+		struct cs_etm_decoder_params *d_params __maybe_unused,
+		struct cs_etm_decoder *decoder __maybe_unused)
+{
+}
+#endif
+
+static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder,
+						 const char *decoder_name,
+						 void *trace_config)
+{
+	u8 csid;
+
+	if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name,
+				   OCSD_CREATE_FLG_PACKET_PROC,
+				   trace_config, &csid))
+		return -1;
+
+	if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
+		return -1;
+
+	return 0;
+}
+
+static int
+cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
+					  struct cs_etm_decoder *decoder)
+{
+	const char *decoder_name;
+	ocsd_etmv4_cfg trace_config_etmv4;
+	void *trace_config;
+
+	switch (t_params->protocol) {
+	case CS_ETM_PROTO_ETMV4i:
+		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
+		decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+		trace_config = &trace_config_etmv4;
+		break;
+	default:
+		return -1;
+	}
+
+	return cs_etm_decoder__create_packet_printer(decoder,
+						     decoder_name,
+						     trace_config);
+}
+
+static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
+{
+	int i;
+
+	decoder->head = 0;
+	decoder->tail = 0;
+	decoder->packet_count = 0;
+	for (i = 0; i < MAX_BUFFER; i++) {
+		decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
+		decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
+		decoder->packet_buffer[i].last_instr_taken_branch = false;
+		decoder->packet_buffer[i].exc = false;
+		decoder->packet_buffer[i].exc_ret = false;
+		decoder->packet_buffer[i].cpu = INT_MIN;
+	}
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
+			      const u8 trace_chan_id,
+			      enum cs_etm_sample_type sample_type)
+{
+	u32 et = 0;
+	struct int_node *inode = NULL;
+
+	if (decoder->packet_count >= MAX_BUFFER - 1)
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	/* Search the RB tree for the cpu associated with this traceID */
+	inode = intlist__find(traceid_list, trace_chan_id);
+	if (!inode)
+		return OCSD_RESP_FATAL_SYS_ERR;
+
+	et = decoder->tail;
+	et = (et + 1) & (MAX_BUFFER - 1);
+	decoder->tail = et;
+	decoder->packet_count++;
+
+	decoder->packet_buffer[et].sample_type = sample_type;
+	decoder->packet_buffer[et].exc = false;
+	decoder->packet_buffer[et].exc_ret = false;
+	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
+	decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
+	decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
+
+	if (decoder->packet_count == MAX_BUFFER - 1)
+		return OCSD_RESP_WAIT;
+
+	return OCSD_RESP_CONT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
+			     const ocsd_generic_trace_elem *elem,
+			     const uint8_t trace_chan_id)
+{
+	int ret = 0;
+	struct cs_etm_packet *packet;
+
+	ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+					    CS_ETM_RANGE);
+	if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+		return ret;
+
+	packet = &decoder->packet_buffer[decoder->tail];
+
+	packet->start_addr = elem->st_addr;
+	packet->end_addr = elem->en_addr;
+	switch (elem->last_i_type) {
+	case OCSD_INSTR_BR:
+	case OCSD_INSTR_BR_INDIRECT:
+		packet->last_instr_taken_branch = elem->last_instr_exec;
+		break;
+	case OCSD_INSTR_ISB:
+	case OCSD_INSTR_DSB_DMB:
+	case OCSD_INSTR_OTHER:
+	default:
+		packet->last_instr_taken_branch = false;
+		break;
+	}
+
+	return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
+				const uint8_t trace_chan_id)
+{
+	return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+					     CS_ETM_TRACE_ON);
+}
+
+static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
+				const void *context,
+				const ocsd_trc_index_t indx __maybe_unused,
+				const u8 trace_chan_id __maybe_unused,
+				const ocsd_generic_trace_elem *elem)
+{
+	ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
+	struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+
+	switch (elem->elem_type) {
+	case OCSD_GEN_TRC_ELEM_UNKNOWN:
+		break;
+	case OCSD_GEN_TRC_ELEM_NO_SYNC:
+		decoder->trace_on = false;
+		break;
+	case OCSD_GEN_TRC_ELEM_TRACE_ON:
+		resp = cs_etm_decoder__buffer_trace_on(decoder,
+						       trace_chan_id);
+		decoder->trace_on = true;
+		break;
+	case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
+		resp = cs_etm_decoder__buffer_range(decoder, elem,
+						    trace_chan_id);
+		break;
+	case OCSD_GEN_TRC_ELEM_EXCEPTION:
+		decoder->packet_buffer[decoder->tail].exc = true;
+		break;
+	case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+		decoder->packet_buffer[decoder->tail].exc_ret = true;
+		break;
+	case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+	case OCSD_GEN_TRC_ELEM_EO_TRACE:
+	case OCSD_GEN_TRC_ELEM_ADDR_NACC:
+	case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+	case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
+	case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
+	case OCSD_GEN_TRC_ELEM_EVENT:
+	case OCSD_GEN_TRC_ELEM_SWTRACE:
+	case OCSD_GEN_TRC_ELEM_CUSTOM:
+	default:
+		break;
+	}
+
+	return resp;
+}
+
+static int cs_etm_decoder__create_etm_packet_decoder(
+					struct cs_etm_trace_params *t_params,
+					struct cs_etm_decoder *decoder)
+{
+	const char *decoder_name;
+	ocsd_etmv4_cfg trace_config_etmv4;
+	void *trace_config;
+	u8 csid;
+
+	switch (t_params->protocol) {
+	case CS_ETM_PROTO_ETMV4i:
+		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
+		decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+		trace_config = &trace_config_etmv4;
+		break;
+	default:
+		return -1;
+	}
+
+	if (ocsd_dt_create_decoder(decoder->dcd_tree,
+				     decoder_name,
+				     OCSD_CREATE_FLG_FULL_DECODER,
+				     trace_config, &csid))
+		return -1;
+
+	if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
+				       cs_etm_decoder__gen_trace_elem_printer,
+				       decoder))
+		return -1;
+
+	return 0;
+}
+
+static int
+cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
+				   struct cs_etm_trace_params *t_params,
+				   struct cs_etm_decoder *decoder)
+{
+	if (d_params->operation == CS_ETM_OPERATION_PRINT)
+		return cs_etm_decoder__create_etm_packet_printer(t_params,
+								 decoder);
+	else if (d_params->operation == CS_ETM_OPERATION_DECODE)
+		return cs_etm_decoder__create_etm_packet_decoder(t_params,
+								 decoder);
+
+	return -1;
+}
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
+		    struct cs_etm_trace_params t_params[])
+{
+	struct cs_etm_decoder *decoder;
+	ocsd_dcd_tree_src_t format;
+	u32 flags;
+	int i, ret;
+
+	if ((!t_params) || (!d_params))
+		return NULL;
+
+	decoder = zalloc(sizeof(*decoder));
+
+	if (!decoder)
+		return NULL;
+
+	decoder->data = d_params->data;
+	decoder->prev_return = OCSD_RESP_CONT;
+	cs_etm_decoder__clear_buffer(decoder);
+	format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
+					 OCSD_TRC_SRC_SINGLE);
+	flags = 0;
+	flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0);
+	flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0);
+	flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0);
+
+	/*
+	 * Drivers may add barrier frames when used with perf, set up to
+	 * handle this. Barriers const of FSYNC packet repeated 4 times.
+	 */
+	flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC;
+
+	/* Create decode tree for the data source */
+	decoder->dcd_tree = ocsd_create_dcd_tree(format, flags);
+
+	if (decoder->dcd_tree == 0)
+		goto err_free_decoder;
+
+	/* init library print logging support */
+	ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
+	if (ret != 0)
+		goto err_free_decoder_tree;
+
+	/* init raw frame logging if required */
+	cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
+
+	for (i = 0; i < num_cpu; i++) {
+		ret = cs_etm_decoder__create_etm_decoder(d_params,
+							 &t_params[i],
+							 decoder);
+		if (ret != 0)
+			goto err_free_decoder_tree;
+	}
+
+	return decoder;
+
+err_free_decoder_tree:
+	ocsd_destroy_dcd_tree(decoder->dcd_tree);
+err_free_decoder:
+	free(decoder);
+	return NULL;
+}
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+				       u64 indx, const u8 *buf,
+				       size_t len, size_t *consumed)
+{
+	int ret = 0;
+	ocsd_datapath_resp_t cur = OCSD_RESP_CONT;
+	ocsd_datapath_resp_t prev_return = decoder->prev_return;
+	size_t processed = 0;
+	u32 count;
+
+	while (processed < len) {
+		if (OCSD_DATA_RESP_IS_WAIT(prev_return)) {
+			cur = ocsd_dt_process_data(decoder->dcd_tree,
+						   OCSD_OP_FLUSH,
+						   0,
+						   0,
+						   NULL,
+						   NULL);
+		} else if (OCSD_DATA_RESP_IS_CONT(prev_return)) {
+			cur = ocsd_dt_process_data(decoder->dcd_tree,
+						   OCSD_OP_DATA,
+						   indx + processed,
+						   len - processed,
+						   &buf[processed],
+						   &count);
+			processed += count;
+		} else {
+			ret = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Return to the input code if the packet buffer is full.
+		 * Flushing will get done once the packet buffer has been
+		 * processed.
+		 */
+		if (OCSD_DATA_RESP_IS_WAIT(cur))
+			break;
+
+		prev_return = cur;
+	}
+
+	decoder->prev_return = cur;
+	*consumed = processed;
+
+	return ret;
+}
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder)
+{
+	if (!decoder)
+		return;
+
+	ocsd_destroy_dcd_tree(decoder->dcd_tree);
+	decoder->dcd_tree = NULL;
+	free(decoder);
+}
diff --git a/util/cs-etm-decoder/cs-etm-decoder.h b/util/cs-etm-decoder/cs-etm-decoder.h
new file mode 100644
index 0000000..743f5f4
--- /dev/null
+++ b/util/cs-etm-decoder/cs-etm-decoder.h
@@ -0,0 +1,107 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__CS_ETM_DECODER_H__
+#define INCLUDE__CS_ETM_DECODER_H__
+
+#include <linux/types.h>
+#include <stdio.h>
+
+struct cs_etm_decoder;
+
+struct cs_etm_buffer {
+	const unsigned char *buf;
+	size_t len;
+	u64 offset;
+	u64 ref_timestamp;
+};
+
+enum cs_etm_sample_type {
+	CS_ETM_RANGE = 1 << 0,
+	CS_ETM_TRACE_ON = 1 << 1,
+};
+
+struct cs_etm_packet {
+	enum cs_etm_sample_type sample_type;
+	u64 start_addr;
+	u64 end_addr;
+	u8 last_instr_taken_branch;
+	u8 exc;
+	u8 exc_ret;
+	int cpu;
+};
+
+struct cs_etm_queue;
+
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64,
+				  size_t, u8 *);
+
+struct cs_etmv4_trace_params {
+	u32 reg_idr0;
+	u32 reg_idr1;
+	u32 reg_idr2;
+	u32 reg_idr8;
+	u32 reg_configr;
+	u32 reg_traceidr;
+};
+
+struct cs_etm_trace_params {
+	int protocol;
+	union {
+		struct cs_etmv4_trace_params etmv4;
+	};
+};
+
+struct cs_etm_decoder_params {
+	int operation;
+	void (*packet_printer)(const char *msg);
+	cs_etm_mem_cb_type mem_acc_cb;
+	u8 formatted;
+	u8 fsyncs;
+	u8 hsyncs;
+	u8 frame_aligned;
+	void *data;
+};
+
+/*
+ * The following enums are indexed starting with 1 to align with the
+ * open source coresight trace decoder library.
+ */
+enum {
+	CS_ETM_PROTO_ETMV3 = 1,
+	CS_ETM_PROTO_ETMV4i,
+	CS_ETM_PROTO_ETMV4d,
+};
+
+enum {
+	CS_ETM_OPERATION_PRINT = 1,
+	CS_ETM_OPERATION_DECODE,
+};
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+				       u64 indx, const u8 *buf,
+				       size_t len, size_t *consumed);
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int num_cpu,
+		    struct cs_etm_decoder_params *d_params,
+		    struct cs_etm_trace_params t_params[]);
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder);
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+				      u64 start, u64 end,
+				      cs_etm_mem_cb_type cb_func);
+
+int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+			       struct cs_etm_packet *packet);
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+
+#endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/util/cs-etm.c b/util/cs-etm.c
new file mode 100644
index 0000000..40020b1
--- /dev/null
+++ b/util/cs-etm.c
@@ -0,0 +1,1400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include <stdlib.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "cs-etm.h"
+#include "cs-etm-decoder/cs-etm-decoder.h"
+#include "debug.h"
+#include "evlist.h"
+#include "intlist.h"
+#include "machine.h"
+#include "map.h"
+#include "perf.h"
+#include "thread.h"
+#include "thread_map.h"
+#include "thread-stack.h"
+#include "util.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+/*
+ * A64 instructions are always 4 bytes
+ *
+ * Only A64 is supported, so can use this constant for converting between
+ * addresses and instruction counts, calculting offsets etc
+ */
+#define A64_INSTR_SIZE 4
+
+struct cs_etm_auxtrace {
+	struct auxtrace auxtrace;
+	struct auxtrace_queues queues;
+	struct auxtrace_heap heap;
+	struct itrace_synth_opts synth_opts;
+	struct perf_session *session;
+	struct machine *machine;
+	struct thread *unknown_thread;
+
+	u8 timeless_decoding;
+	u8 snapshot_mode;
+	u8 data_queued;
+	u8 sample_branches;
+	u8 sample_instructions;
+
+	int num_cpu;
+	u32 auxtrace_type;
+	u64 branches_sample_type;
+	u64 branches_id;
+	u64 instructions_sample_type;
+	u64 instructions_sample_period;
+	u64 instructions_id;
+	u64 **metadata;
+	u64 kernel_start;
+	unsigned int pmu_type;
+};
+
+struct cs_etm_queue {
+	struct cs_etm_auxtrace *etm;
+	struct thread *thread;
+	struct cs_etm_decoder *decoder;
+	struct auxtrace_buffer *buffer;
+	const struct cs_etm_state *state;
+	union perf_event *event_buf;
+	unsigned int queue_nr;
+	pid_t pid, tid;
+	int cpu;
+	u64 time;
+	u64 timestamp;
+	u64 offset;
+	u64 period_instructions;
+	struct branch_stack *last_branch;
+	struct branch_stack *last_branch_rb;
+	size_t last_branch_pos;
+	struct cs_etm_packet *prev_packet;
+	struct cs_etm_packet *packet;
+};
+
+static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+					   pid_t tid, u64 time_);
+
+static void cs_etm__packet_dump(const char *pkt_string)
+{
+	const char *color = PERF_COLOR_BLUE;
+	int len = strlen(pkt_string);
+
+	if (len && (pkt_string[len-1] == '\n'))
+		color_fprintf(stdout, color, "	%s", pkt_string);
+	else
+		color_fprintf(stdout, color, "	%s\n", pkt_string);
+
+	fflush(stdout);
+}
+
+static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
+			       struct auxtrace_buffer *buffer)
+{
+	int i, ret;
+	const char *color = PERF_COLOR_BLUE;
+	struct cs_etm_decoder_params d_params;
+	struct cs_etm_trace_params *t_params;
+	struct cs_etm_decoder *decoder;
+	size_t buffer_used = 0;
+
+	fprintf(stdout, "\n");
+	color_fprintf(stdout, color,
+		     ". ... CoreSight ETM Trace data: size %zu bytes\n",
+		     buffer->size);
+
+	/* Use metadata to fill in trace parameters for trace decoder */
+	t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+	for (i = 0; i < etm->num_cpu; i++) {
+		t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
+		t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0];
+		t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1];
+		t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2];
+		t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8];
+		t_params[i].etmv4.reg_configr =
+					etm->metadata[i][CS_ETMV4_TRCCONFIGR];
+		t_params[i].etmv4.reg_traceidr =
+					etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
+	}
+
+	/* Set decoder parameters to simply print the trace packets */
+	d_params.packet_printer = cs_etm__packet_dump;
+	d_params.operation = CS_ETM_OPERATION_PRINT;
+	d_params.formatted = true;
+	d_params.fsyncs = false;
+	d_params.hsyncs = false;
+	d_params.frame_aligned = true;
+
+	decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+
+	zfree(&t_params);
+
+	if (!decoder)
+		return;
+	do {
+		size_t consumed;
+
+		ret = cs_etm_decoder__process_data_block(
+				decoder, buffer->offset,
+				&((u8 *)buffer->data)[buffer_used],
+				buffer->size - buffer_used, &consumed);
+		if (ret)
+			break;
+
+		buffer_used += consumed;
+	} while (buffer_used < buffer->size);
+
+	cs_etm_decoder__free(decoder);
+}
+
+static int cs_etm__flush_events(struct perf_session *session,
+				struct perf_tool *tool)
+{
+	int ret;
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	if (!etm->timeless_decoding)
+		return -EINVAL;
+
+	ret = cs_etm__update_queues(etm);
+
+	if (ret < 0)
+		return ret;
+
+	return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1);
+}
+
+static void cs_etm__free_queue(void *priv)
+{
+	struct cs_etm_queue *etmq = priv;
+
+	if (!etmq)
+		return;
+
+	thread__zput(etmq->thread);
+	cs_etm_decoder__free(etmq->decoder);
+	zfree(&etmq->event_buf);
+	zfree(&etmq->last_branch);
+	zfree(&etmq->last_branch_rb);
+	zfree(&etmq->prev_packet);
+	zfree(&etmq->packet);
+	free(etmq);
+}
+
+static void cs_etm__free_events(struct perf_session *session)
+{
+	unsigned int i;
+	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	struct auxtrace_queues *queues = &aux->queues;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		cs_etm__free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+
+	auxtrace_queues__free(queues);
+}
+
+static void cs_etm__free(struct perf_session *session)
+{
+	int i;
+	struct int_node *inode, *tmp;
+	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	cs_etm__free_events(session);
+	session->auxtrace = NULL;
+
+	/* First remove all traceID/CPU# nodes for the RB tree */
+	intlist__for_each_entry_safe(inode, tmp, traceid_list)
+		intlist__remove(traceid_list, inode);
+	/* Then the RB tree itself */
+	intlist__delete(traceid_list);
+
+	for (i = 0; i < aux->num_cpu; i++)
+		zfree(&aux->metadata[i]);
+
+	zfree(&aux->metadata);
+	zfree(&aux);
+}
+
+static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
+			      size_t size, u8 *buffer)
+{
+	u8  cpumode;
+	u64 offset;
+	int len;
+	struct	 thread *thread;
+	struct	 machine *machine;
+	struct	 addr_location al;
+
+	if (!etmq)
+		return -1;
+
+	machine = etmq->etm->machine;
+	if (address >= etmq->etm->kernel_start)
+		cpumode = PERF_RECORD_MISC_KERNEL;
+	else
+		cpumode = PERF_RECORD_MISC_USER;
+
+	thread = etmq->thread;
+	if (!thread) {
+		if (cpumode != PERF_RECORD_MISC_KERNEL)
+			return -EINVAL;
+		thread = etmq->etm->unknown_thread;
+	}
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al);
+
+	if (!al.map || !al.map->dso)
+		return 0;
+
+	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+	    dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE))
+		return 0;
+
+	offset = al.map->map_ip(al.map, address);
+
+	map__load(al.map);
+
+	len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size);
+
+	if (len <= 0)
+		return 0;
+
+	return len;
+}
+
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+						unsigned int queue_nr)
+{
+	int i;
+	struct cs_etm_decoder_params d_params;
+	struct cs_etm_trace_params  *t_params;
+	struct cs_etm_queue *etmq;
+	size_t szp = sizeof(struct cs_etm_packet);
+
+	etmq = zalloc(sizeof(*etmq));
+	if (!etmq)
+		return NULL;
+
+	etmq->packet = zalloc(szp);
+	if (!etmq->packet)
+		goto out_free;
+
+	if (etm->synth_opts.last_branch || etm->sample_branches) {
+		etmq->prev_packet = zalloc(szp);
+		if (!etmq->prev_packet)
+			goto out_free;
+	}
+
+	if (etm->synth_opts.last_branch) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += etm->synth_opts.last_branch_sz *
+		      sizeof(struct branch_entry);
+		etmq->last_branch = zalloc(sz);
+		if (!etmq->last_branch)
+			goto out_free;
+		etmq->last_branch_rb = zalloc(sz);
+		if (!etmq->last_branch_rb)
+			goto out_free;
+	}
+
+	etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+	if (!etmq->event_buf)
+		goto out_free;
+
+	etmq->etm = etm;
+	etmq->queue_nr = queue_nr;
+	etmq->pid = -1;
+	etmq->tid = -1;
+	etmq->cpu = -1;
+
+	/* Use metadata to fill in trace parameters for trace decoder */
+	t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+
+	if (!t_params)
+		goto out_free;
+
+	for (i = 0; i < etm->num_cpu; i++) {
+		t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
+		t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0];
+		t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1];
+		t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2];
+		t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8];
+		t_params[i].etmv4.reg_configr =
+					etm->metadata[i][CS_ETMV4_TRCCONFIGR];
+		t_params[i].etmv4.reg_traceidr =
+					etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
+	}
+
+	/* Set decoder parameters to simply print the trace packets */
+	d_params.packet_printer = cs_etm__packet_dump;
+	d_params.operation = CS_ETM_OPERATION_DECODE;
+	d_params.formatted = true;
+	d_params.fsyncs = false;
+	d_params.hsyncs = false;
+	d_params.frame_aligned = true;
+	d_params.data = etmq;
+
+	etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+
+	zfree(&t_params);
+
+	if (!etmq->decoder)
+		goto out_free;
+
+	/*
+	 * Register a function to handle all memory accesses required by
+	 * the trace decoder library.
+	 */
+	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+					      0x0L, ((u64) -1L),
+					      cs_etm__mem_access))
+		goto out_free_decoder;
+
+	etmq->offset = 0;
+	etmq->period_instructions = 0;
+
+	return etmq;
+
+out_free_decoder:
+	cs_etm_decoder__free(etmq->decoder);
+out_free:
+	zfree(&etmq->event_buf);
+	zfree(&etmq->last_branch);
+	zfree(&etmq->last_branch_rb);
+	zfree(&etmq->prev_packet);
+	zfree(&etmq->packet);
+	free(etmq);
+
+	return NULL;
+}
+
+static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
+			       struct auxtrace_queue *queue,
+			       unsigned int queue_nr)
+{
+	struct cs_etm_queue *etmq = queue->priv;
+
+	if (list_empty(&queue->head) || etmq)
+		return 0;
+
+	etmq = cs_etm__alloc_queue(etm, queue_nr);
+
+	if (!etmq)
+		return -ENOMEM;
+
+	queue->priv = etmq;
+
+	if (queue->cpu != -1)
+		etmq->cpu = queue->cpu;
+
+	etmq->tid = queue->tid;
+
+	return 0;
+}
+
+static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < etm->queues.nr_queues; i++) {
+		ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
+{
+	if (etm->queues.new_data) {
+		etm->queues.new_data = false;
+		return cs_etm__setup_queues(etm);
+	}
+
+	return 0;
+}
+
+static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
+{
+	struct branch_stack *bs_src = etmq->last_branch_rb;
+	struct branch_stack *bs_dst = etmq->last_branch;
+	size_t nr = 0;
+
+	/*
+	 * Set the number of records before early exit: ->nr is used to
+	 * determine how many branches to copy from ->entries.
+	 */
+	bs_dst->nr = bs_src->nr;
+
+	/*
+	 * Early exit when there is nothing to copy.
+	 */
+	if (!bs_src->nr)
+		return;
+
+	/*
+	 * As bs_src->entries is a circular buffer, we need to copy from it in
+	 * two steps.  First, copy the branches from the most recently inserted
+	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
+	 */
+	nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
+	memcpy(&bs_dst->entries[0],
+	       &bs_src->entries[etmq->last_branch_pos],
+	       sizeof(struct branch_entry) * nr);
+
+	/*
+	 * If we wrapped around at least once, the branches from the beginning
+	 * of the bs_src->entries buffer and until the ->last_branch_pos element
+	 * are older valid branches: copy them over.  The total number of
+	 * branches copied over will be equal to the number of branches asked by
+	 * the user in last_branch_sz.
+	 */
+	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
+		memcpy(&bs_dst->entries[nr],
+		       &bs_src->entries[0],
+		       sizeof(struct branch_entry) * etmq->last_branch_pos);
+	}
+}
+
+static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
+{
+	etmq->last_branch_pos = 0;
+	etmq->last_branch_rb->nr = 0;
+}
+
+static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
+{
+	/*
+	 * The packet records the execution range with an exclusive end address
+	 *
+	 * A64 instructions are constant size, so the last executed
+	 * instruction is A64_INSTR_SIZE before the end address
+	 * Will need to do instruction level decode for T32 instructions as
+	 * they can be variable size (not yet supported).
+	 */
+	return packet->end_addr - A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+{
+	/*
+	 * Only A64 instructions are currently supported, so can get
+	 * instruction count by dividing.
+	 * Will need to do instruction level decode for T32 instructions as
+	 * they can be variable size (not yet supported).
+	 */
+	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+				     u64 offset)
+{
+	/*
+	 * Only A64 instructions are currently supported, so can get
+	 * instruction address by muliplying.
+	 * Will need to do instruction level decode for T32 instructions as
+	 * they can be variable size (not yet supported).
+	 */
+	return packet->start_addr + offset * A64_INSTR_SIZE;
+}
+
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
+{
+	struct branch_stack *bs = etmq->last_branch_rb;
+	struct branch_entry *be;
+
+	/*
+	 * The branches are recorded in a circular buffer in reverse
+	 * chronological order: we start recording from the last element of the
+	 * buffer down.  After writing the first element of the stack, move the
+	 * insert position back to the end of the buffer.
+	 */
+	if (!etmq->last_branch_pos)
+		etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+
+	etmq->last_branch_pos -= 1;
+
+	be       = &bs->entries[etmq->last_branch_pos];
+	be->from = cs_etm__last_executed_instr(etmq->prev_packet);
+	be->to	 = etmq->packet->start_addr;
+	/* No support for mispredict */
+	be->flags.mispred = 0;
+	be->flags.predicted = 1;
+
+	/*
+	 * Increment bs->nr until reaching the number of last branches asked by
+	 * the user on the command line.
+	 */
+	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
+		bs->nr += 1;
+}
+
+static int cs_etm__inject_event(union perf_event *event,
+			       struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+
+static int
+cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
+{
+	struct auxtrace_buffer *aux_buffer = etmq->buffer;
+	struct auxtrace_buffer *old_buffer = aux_buffer;
+	struct auxtrace_queue *queue;
+
+	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+
+	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
+
+	/* If no more data, drop the previous auxtrace_buffer and return */
+	if (!aux_buffer) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		buff->len = 0;
+		return 0;
+	}
+
+	etmq->buffer = aux_buffer;
+
+	/* If the aux_buffer doesn't have data associated, try to load it */
+	if (!aux_buffer->data) {
+		/* get the file desc associated with the perf data file */
+		int fd = perf_data__fd(etmq->etm->session->data);
+
+		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
+		if (!aux_buffer->data)
+			return -ENOMEM;
+	}
+
+	/* If valid, drop the previous buffer */
+	if (old_buffer)
+		auxtrace_buffer__drop_data(old_buffer);
+
+	buff->offset = aux_buffer->offset;
+	buff->len = aux_buffer->size;
+	buff->buf = aux_buffer->data;
+
+	buff->ref_timestamp = aux_buffer->reference;
+
+	return buff->len;
+}
+
+static void  cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
+				     struct auxtrace_queue *queue)
+{
+	struct cs_etm_queue *etmq = queue->priv;
+
+	/* CPU-wide tracing isn't supported yet */
+	if (queue->tid == -1)
+		return;
+
+	if ((!etmq->thread) && (etmq->tid != -1))
+		etmq->thread = machine__find_thread(etm->machine, -1,
+						    etmq->tid);
+
+	if (etmq->thread) {
+		etmq->pid = etmq->thread->pid_;
+		if (queue->cpu == -1)
+			etmq->cpu = etmq->thread->cpu;
+	}
+}
+
+static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+					    u64 addr, u64 period)
+{
+	int ret = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	union perf_event *event = etmq->event_buf;
+	struct perf_sample sample = {.ip = 0,};
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	sample.ip = addr;
+	sample.pid = etmq->pid;
+	sample.tid = etmq->tid;
+	sample.id = etmq->etm->instructions_id;
+	sample.stream_id = etmq->etm->instructions_id;
+	sample.period = period;
+	sample.cpu = etmq->packet->cpu;
+	sample.flags = 0;
+	sample.insn_len = 1;
+	sample.cpumode = event->header.misc;
+
+	if (etm->synth_opts.last_branch) {
+		cs_etm__copy_last_branch_rb(etmq);
+		sample.branch_stack = etmq->last_branch;
+	}
+
+	if (etm->synth_opts.inject) {
+		ret = cs_etm__inject_event(event, &sample,
+					   etm->instructions_sample_type);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+	if (ret)
+		pr_err(
+			"CS ETM Trace: failed to deliver instruction event, error %d\n",
+			ret);
+
+	if (etm->synth_opts.last_branch)
+		cs_etm__reset_last_branch_rb(etmq);
+
+	return ret;
+}
+
+/*
+ * The cs etm packet encodes an instruction range between a branch target
+ * and the next taken branch. Generate sample accordingly.
+ */
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
+{
+	int ret = 0;
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	struct perf_sample sample = {.ip = 0,};
+	union perf_event *event = etmq->event_buf;
+	struct dummy_branch_stack {
+		u64			nr;
+		struct branch_entry	entries;
+	} dummy_bs;
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+	sample.pid = etmq->pid;
+	sample.tid = etmq->tid;
+	sample.addr = etmq->packet->start_addr;
+	sample.id = etmq->etm->branches_id;
+	sample.stream_id = etmq->etm->branches_id;
+	sample.period = 1;
+	sample.cpu = etmq->packet->cpu;
+	sample.flags = 0;
+	sample.cpumode = PERF_RECORD_MISC_USER;
+
+	/*
+	 * perf report cannot handle events without a branch stack
+	 */
+	if (etm->synth_opts.last_branch) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
+
+	if (etm->synth_opts.inject) {
+		ret = cs_etm__inject_event(event, &sample,
+					   etm->branches_sample_type);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+	if (ret)
+		pr_err(
+		"CS ETM Trace: failed to deliver instruction event, error %d\n",
+		ret);
+
+	return ret;
+}
+
+struct cs_etm_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int cs_etm__event_synth(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine __maybe_unused)
+{
+	struct cs_etm_synth *cs_etm_synth =
+		      container_of(tool, struct cs_etm_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(cs_etm_synth->session,
+						 event, NULL);
+}
+
+static int cs_etm__synth_event(struct perf_session *session,
+			       struct perf_event_attr *attr, u64 id)
+{
+	struct cs_etm_synth cs_etm_synth;
+
+	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
+	cs_etm_synth.session = session;
+
+	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
+					   &id, cs_etm__event_synth);
+}
+
+static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
+				struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	bool found = false;
+	u64 id;
+	int err;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == etm->pmu_type) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pr_debug("No selected events with CoreSight Trace data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD;
+	if (etm->timeless_decoding)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	else
+		attr.sample_type |= PERF_SAMPLE_TIME;
+
+	attr.exclude_user = evsel->attr.exclude_user;
+	attr.exclude_kernel = evsel->attr.exclude_kernel;
+	attr.exclude_hv = evsel->attr.exclude_hv;
+	attr.exclude_host = evsel->attr.exclude_host;
+	attr.exclude_guest = evsel->attr.exclude_guest;
+	attr.sample_id_all = evsel->attr.sample_id_all;
+	attr.read_format = evsel->attr.read_format;
+
+	/* create new id val to be a fixed offset from evsel id */
+	id = evsel->id[0] + 1000000000;
+
+	if (!id)
+		id = 1;
+
+	if (etm->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		err = cs_etm__synth_event(session, &attr, id);
+		if (err)
+			return err;
+		etm->sample_branches = true;
+		etm->branches_sample_type = attr.sample_type;
+		etm->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (etm->synth_opts.last_branch)
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+	if (etm->synth_opts.instructions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		attr.sample_period = etm->synth_opts.period;
+		etm->instructions_sample_period = attr.sample_period;
+		err = cs_etm__synth_event(session, &attr, id);
+		if (err)
+			return err;
+		etm->sample_instructions = true;
+		etm->instructions_sample_type = attr.sample_type;
+		etm->instructions_id = id;
+		id += 1;
+	}
+
+	return 0;
+}
+
+static int cs_etm__sample(struct cs_etm_queue *etmq)
+{
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	struct cs_etm_packet *tmp;
+	int ret;
+	u64 instrs_executed;
+
+	instrs_executed = cs_etm__instr_count(etmq->packet);
+	etmq->period_instructions += instrs_executed;
+
+	/*
+	 * Record a branch when the last instruction in
+	 * PREV_PACKET is a branch.
+	 */
+	if (etm->synth_opts.last_branch &&
+	    etmq->prev_packet &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+	    etmq->prev_packet->last_instr_taken_branch)
+		cs_etm__update_last_branch_rb(etmq);
+
+	if (etm->sample_instructions &&
+	    etmq->period_instructions >= etm->instructions_sample_period) {
+		/*
+		 * Emit instruction sample periodically
+		 * TODO: allow period to be defined in cycles and clock time
+		 */
+
+		/* Get number of instructions executed after the sample point */
+		u64 instrs_over = etmq->period_instructions -
+			etm->instructions_sample_period;
+
+		/*
+		 * Calculate the address of the sampled instruction (-1 as
+		 * sample is reported as though instruction has just been
+		 * executed, but PC has not advanced to next instruction)
+		 */
+		u64 offset = (instrs_executed - instrs_over - 1);
+		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+
+		ret = cs_etm__synth_instruction_sample(
+			etmq, addr, etm->instructions_sample_period);
+		if (ret)
+			return ret;
+
+		/* Carry remaining instructions into next sample period */
+		etmq->period_instructions = instrs_over;
+	}
+
+	if (etm->sample_branches &&
+	    etmq->prev_packet &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+	    etmq->prev_packet->last_instr_taken_branch) {
+		ret = cs_etm__synth_branch_sample(etmq);
+		if (ret)
+			return ret;
+	}
+
+	if (etm->sample_branches || etm->synth_opts.last_branch) {
+		/*
+		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+		 * the next incoming packet.
+		 */
+		tmp = etmq->packet;
+		etmq->packet = etmq->prev_packet;
+		etmq->prev_packet = tmp;
+	}
+
+	return 0;
+}
+
+static int cs_etm__flush(struct cs_etm_queue *etmq)
+{
+	int err = 0;
+	struct cs_etm_packet *tmp;
+
+	if (etmq->etm->synth_opts.last_branch &&
+	    etmq->prev_packet &&
+	    etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+		/*
+		 * Generate a last branch event for the branches left in the
+		 * circular buffer at the end of the trace.
+		 *
+		 * Use the address of the end of the last reported execution
+		 * range
+		 */
+		u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+
+		err = cs_etm__synth_instruction_sample(
+			etmq, addr,
+			etmq->period_instructions);
+		etmq->period_instructions = 0;
+
+		/*
+		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+		 * the next incoming packet.
+		 */
+		tmp = etmq->packet;
+		etmq->packet = etmq->prev_packet;
+		etmq->prev_packet = tmp;
+	}
+
+	return err;
+}
+
+static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
+{
+	struct cs_etm_auxtrace *etm = etmq->etm;
+	struct cs_etm_buffer buffer;
+	size_t buffer_used, processed;
+	int err = 0;
+
+	if (!etm->kernel_start)
+		etm->kernel_start = machine__kernel_start(etm->machine);
+
+	/* Go through each buffer in the queue and decode them one by one */
+	while (1) {
+		buffer_used = 0;
+		memset(&buffer, 0, sizeof(buffer));
+		err = cs_etm__get_trace(&buffer, etmq);
+		if (err <= 0)
+			return err;
+		/*
+		 * We cannot assume consecutive blocks in the data file are
+		 * contiguous, reset the decoder to force re-sync.
+		 */
+		err = cs_etm_decoder__reset(etmq->decoder);
+		if (err != 0)
+			return err;
+
+		/* Run trace decoder until buffer consumed or end of trace */
+		do {
+			processed = 0;
+			err = cs_etm_decoder__process_data_block(
+				etmq->decoder,
+				etmq->offset,
+				&buffer.buf[buffer_used],
+				buffer.len - buffer_used,
+				&processed);
+			if (err)
+				return err;
+
+			etmq->offset += processed;
+			buffer_used += processed;
+
+			/* Process each packet in this chunk */
+			while (1) {
+				err = cs_etm_decoder__get_packet(etmq->decoder,
+								 etmq->packet);
+				if (err <= 0)
+					/*
+					 * Stop processing this chunk on
+					 * end of data or error
+					 */
+					break;
+
+				switch (etmq->packet->sample_type) {
+				case CS_ETM_RANGE:
+					/*
+					 * If the packet contains an instruction
+					 * range, generate instruction sequence
+					 * events.
+					 */
+					cs_etm__sample(etmq);
+					break;
+				case CS_ETM_TRACE_ON:
+					/*
+					 * Discontinuity in trace, flush
+					 * previous branch stack
+					 */
+					cs_etm__flush(etmq);
+					break;
+				default:
+					break;
+				}
+			}
+		} while (buffer.len > buffer_used);
+
+		if (err == 0)
+			/* Flush any remaining branch stack entries */
+			err = cs_etm__flush(etmq);
+	}
+
+	return err;
+}
+
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+					   pid_t tid, u64 time_)
+{
+	unsigned int i;
+	struct auxtrace_queues *queues = &etm->queues;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
+		struct cs_etm_queue *etmq = queue->priv;
+
+		if (etmq && ((tid == -1) || (etmq->tid == tid))) {
+			etmq->time = time_;
+			cs_etm__set_pid_tid_cpu(etm, queue);
+			cs_etm__run_decoder(etmq);
+		}
+	}
+
+	return 0;
+}
+
+static int cs_etm__process_event(struct perf_session *session,
+				 union perf_event *event,
+				 struct perf_sample *sample,
+				 struct perf_tool *tool)
+{
+	int err = 0;
+	u64 timestamp;
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("CoreSight ETM Trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (!etm->timeless_decoding)
+		return -EINVAL;
+
+	if (sample->time && (sample->time != (u64) -1))
+		timestamp = sample->time;
+	else
+		timestamp = 0;
+
+	if (timestamp || etm->timeless_decoding) {
+		err = cs_etm__update_queues(etm);
+		if (err)
+			return err;
+	}
+
+	if (event->header.type == PERF_RECORD_EXIT)
+		return cs_etm__process_timeless_queues(etm,
+						       event->fork.tid,
+						       sample->time);
+
+	return 0;
+}
+
+static int cs_etm__process_auxtrace_event(struct perf_session *session,
+					  union perf_event *event,
+					  struct perf_tool *tool __maybe_unused)
+{
+	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+						   struct cs_etm_auxtrace,
+						   auxtrace);
+	if (!etm->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t  data_offset;
+		int fd = perf_data__fd(session->data);
+		bool is_pipe = perf_data__is_pipe(session->data);
+		int err;
+
+		if (is_pipe)
+			data_offset = 0;
+		else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&etm->queues, session,
+						 event, data_offset, &buffer);
+		if (err)
+			return err;
+
+		if (dump_trace)
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				cs_etm__dump_event(etm, buffer);
+				auxtrace_buffer__put_data(buffer);
+			}
+	}
+
+	return 0;
+}
+
+static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm)
+{
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = etm->session->evlist;
+	bool timeless_decoding = true;
+
+	/*
+	 * Circle through the list of event and complain if we find one
+	 * with the time bit set.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->attr.sample_type & PERF_SAMPLE_TIME))
+			timeless_decoding = false;
+	}
+
+	return timeless_decoding;
+}
+
+static const char * const cs_etm_global_header_fmts[] = {
+	[CS_HEADER_VERSION_0]	= "	Header version		       %llx\n",
+	[CS_PMU_TYPE_CPUS]	= "	PMU type/num cpus	       %llx\n",
+	[CS_ETM_SNAPSHOT]	= "	Snapshot		       %llx\n",
+};
+
+static const char * const cs_etm_priv_fmts[] = {
+	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
+	[CS_ETM_CPU]		= "	CPU			       %lld\n",
+	[CS_ETM_ETMCR]		= "	ETMCR			       %llx\n",
+	[CS_ETM_ETMTRACEIDR]	= "	ETMTRACEIDR		       %llx\n",
+	[CS_ETM_ETMCCER]	= "	ETMCCER			       %llx\n",
+	[CS_ETM_ETMIDR]		= "	ETMIDR			       %llx\n",
+};
+
+static const char * const cs_etmv4_priv_fmts[] = {
+	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
+	[CS_ETM_CPU]		= "	CPU			       %lld\n",
+	[CS_ETMV4_TRCCONFIGR]	= "	TRCCONFIGR		       %llx\n",
+	[CS_ETMV4_TRCTRACEIDR]	= "	TRCTRACEIDR		       %llx\n",
+	[CS_ETMV4_TRCIDR0]	= "	TRCIDR0			       %llx\n",
+	[CS_ETMV4_TRCIDR1]	= "	TRCIDR1			       %llx\n",
+	[CS_ETMV4_TRCIDR2]	= "	TRCIDR2			       %llx\n",
+	[CS_ETMV4_TRCIDR8]	= "	TRCIDR8			       %llx\n",
+	[CS_ETMV4_TRCAUTHSTATUS] = "	TRCAUTHSTATUS		       %llx\n",
+};
+
+static void cs_etm__print_auxtrace_info(u64 *val, int num)
+{
+	int i, j, cpu = 0;
+
+	for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
+		fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+	for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) {
+		if (val[i] == __perf_cs_etmv3_magic)
+			for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++)
+				fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+		else if (val[i] == __perf_cs_etmv4_magic)
+			for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++)
+				fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+		else
+			/* failure.. return */
+			return;
+	}
+}
+
+int cs_etm__process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session)
+{
+	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+	struct cs_etm_auxtrace *etm = NULL;
+	struct int_node *inode;
+	unsigned int pmu_type;
+	int event_header_size = sizeof(struct perf_event_header);
+	int info_header_size;
+	int total_size = auxtrace_info->header.size;
+	int priv_size = 0;
+	int num_cpu;
+	int err = 0, idx = -1;
+	int i, j, k;
+	u64 *ptr, *hdr = NULL;
+	u64 **metadata = NULL;
+
+	/*
+	 * sizeof(auxtrace_info_event::type) +
+	 * sizeof(auxtrace_info_event::reserved) == 8
+	 */
+	info_header_size = 8;
+
+	if (total_size < (event_header_size + info_header_size))
+		return -EINVAL;
+
+	priv_size = total_size - event_header_size - info_header_size;
+
+	/* First the global part */
+	ptr = (u64 *) auxtrace_info->priv;
+
+	/* Look for version '0' of the header */
+	if (ptr[0] != 0)
+		return -EINVAL;
+
+	hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX);
+	if (!hdr)
+		return -ENOMEM;
+
+	/* Extract header information - see cs-etm.h for format */
+	for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
+		hdr[i] = ptr[i];
+	num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+	pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
+				    0xffffffff);
+
+	/*
+	 * Create an RB tree for traceID-CPU# tuple. Since the conversion has
+	 * to be made for each packet that gets decoded, optimizing access in
+	 * anything other than a sequential array is worth doing.
+	 */
+	traceid_list = intlist__new(NULL);
+	if (!traceid_list) {
+		err = -ENOMEM;
+		goto err_free_hdr;
+	}
+
+	metadata = zalloc(sizeof(*metadata) * num_cpu);
+	if (!metadata) {
+		err = -ENOMEM;
+		goto err_free_traceid_list;
+	}
+
+	/*
+	 * The metadata is stored in the auxtrace_info section and encodes
+	 * the configuration of the ARM embedded trace macrocell which is
+	 * required by the trace decoder to properly decode the trace due
+	 * to its highly compressed nature.
+	 */
+	for (j = 0; j < num_cpu; j++) {
+		if (ptr[i] == __perf_cs_etmv3_magic) {
+			metadata[j] = zalloc(sizeof(*metadata[j]) *
+					     CS_ETM_PRIV_MAX);
+			if (!metadata[j]) {
+				err = -ENOMEM;
+				goto err_free_metadata;
+			}
+			for (k = 0; k < CS_ETM_PRIV_MAX; k++)
+				metadata[j][k] = ptr[i + k];
+
+			/* The traceID is our handle */
+			idx = metadata[j][CS_ETM_ETMTRACEIDR];
+			i += CS_ETM_PRIV_MAX;
+		} else if (ptr[i] == __perf_cs_etmv4_magic) {
+			metadata[j] = zalloc(sizeof(*metadata[j]) *
+					     CS_ETMV4_PRIV_MAX);
+			if (!metadata[j]) {
+				err = -ENOMEM;
+				goto err_free_metadata;
+			}
+			for (k = 0; k < CS_ETMV4_PRIV_MAX; k++)
+				metadata[j][k] = ptr[i + k];
+
+			/* The traceID is our handle */
+			idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
+			i += CS_ETMV4_PRIV_MAX;
+		}
+
+		/* Get an RB node for this CPU */
+		inode = intlist__findnew(traceid_list, idx);
+
+		/* Something went wrong, no need to continue */
+		if (!inode) {
+			err = PTR_ERR(inode);
+			goto err_free_metadata;
+		}
+
+		/*
+		 * The node for that CPU should not be taken.
+		 * Back out if that's the case.
+		 */
+		if (inode->priv) {
+			err = -EINVAL;
+			goto err_free_metadata;
+		}
+		/* All good, associate the traceID with the CPU# */
+		inode->priv = &metadata[j][CS_ETM_CPU];
+	}
+
+	/*
+	 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and
+	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
+	 * global metadata, and each cpu's metadata respectively.
+	 * The following tests if the correct number of double words was
+	 * present in the auxtrace info section.
+	 */
+	if (i * 8 != priv_size) {
+		err = -EINVAL;
+		goto err_free_metadata;
+	}
+
+	etm = zalloc(sizeof(*etm));
+
+	if (!etm) {
+		err = -ENOMEM;
+		goto err_free_metadata;
+	}
+
+	err = auxtrace_queues__init(&etm->queues);
+	if (err)
+		goto err_free_etm;
+
+	etm->session = session;
+	etm->machine = &session->machines.host;
+
+	etm->num_cpu = num_cpu;
+	etm->pmu_type = pmu_type;
+	etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0);
+	etm->metadata = metadata;
+	etm->auxtrace_type = auxtrace_info->type;
+	etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
+
+	etm->auxtrace.process_event = cs_etm__process_event;
+	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
+	etm->auxtrace.flush_events = cs_etm__flush_events;
+	etm->auxtrace.free_events = cs_etm__free_events;
+	etm->auxtrace.free = cs_etm__free;
+	session->auxtrace = &etm->auxtrace;
+
+	if (dump_trace) {
+		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
+		return 0;
+	}
+
+	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+		etm->synth_opts = *session->itrace_synth_opts;
+	} else {
+		itrace_synth_opts__set_default(&etm->synth_opts);
+		etm->synth_opts.callchain = false;
+	}
+
+	err = cs_etm__synth_events(etm, session);
+	if (err)
+		goto err_free_queues;
+
+	err = auxtrace_queues__process_index(&etm->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	etm->data_queued = etm->queues.populated;
+
+	return 0;
+
+err_free_queues:
+	auxtrace_queues__free(&etm->queues);
+	session->auxtrace = NULL;
+err_free_etm:
+	zfree(&etm);
+err_free_metadata:
+	/* No need to check @metadata[j], free(NULL) is supported */
+	for (j = 0; j < num_cpu; j++)
+		free(metadata[j]);
+	zfree(&metadata);
+err_free_traceid_list:
+	intlist__delete(traceid_list);
+err_free_hdr:
+	zfree(&hdr);
+
+	return -EINVAL;
+}
diff --git a/util/cs-etm.h b/util/cs-etm.h
new file mode 100644
index 0000000..37f8d48
--- /dev/null
+++ b/util/cs-etm.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+#include "util/event.h"
+#include "util/session.h"
+
+/* Versionning header in case things need tro change in the future.  That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+	/* Starting with 0x0 */
+	CS_HEADER_VERSION_0,
+	/* PMU->type (32 bit), total # of CPUs (32 bit) */
+	CS_PMU_TYPE_CPUS,
+	CS_ETM_SNAPSHOT,
+	CS_HEADER_VERSION_0_MAX,
+};
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+	CS_ETM_MAGIC,
+	CS_ETM_CPU,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETM_ETMCR = CS_ETM_CPU + 1,
+	CS_ETM_ETMTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETM_ETMCCER,
+	CS_ETM_ETMIDR,
+	CS_ETM_PRIV_MAX,
+};
+
+/* ETMv4 metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+	CS_ETMV4_TRCTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETMV4_TRCIDR0,
+	CS_ETMV4_TRCIDR1,
+	CS_ETMV4_TRCIDR2,
+	CS_ETMV4_TRCIDR8,
+	CS_ETMV4_TRCAUTHSTATUS,
+	CS_ETMV4_PRIV_MAX,
+};
+
+/* RB tree for quick conversion between traceID and CPUs */
+struct intlist *traceid_list;
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+
+static const u64 __perf_cs_etmv3_magic   = 0x3030303030303030ULL;
+static const u64 __perf_cs_etmv4_magic   = 0x4040404040404040ULL;
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+
+#ifdef HAVE_CSTRACE_SUPPORT
+int cs_etm__process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session);
+#else
+static inline int
+cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
+			      struct perf_session *session __maybe_unused)
+{
+	return -1;
+}
+#endif
+
+#endif
diff --git a/util/ctype.c b/util/ctype.c
new file mode 100644
index 0000000..ee4c1e8
--- /dev/null
+++ b/util/ctype.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sane locale-independent, ASCII ctype.
+ *
+ * No surprises, and works with signed and unsigned chars.
+ */
+#include "sane_ctype.h"
+
+enum {
+	S = GIT_SPACE,
+	A = GIT_ALPHA,
+	D = GIT_DIGIT,
+	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
+	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | * */
+	P = GIT_PRINT_EXTRA,	/* printable - alpha - digit - glob - regex */
+
+	PS = GIT_SPACE | GIT_PRINT_EXTRA,
+};
+
+unsigned char sane_ctype[256] = {
+/*	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F			    */
+
+	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
+	PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
+	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
+	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
+	A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P,		/*  80.. 95 */
+	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
+	A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0,		/* 112..127 */
+	/* Nothing in the 128.. range */
+};
+
+const char *graph_line =
+	"_____________________________________________________________________"
+	"_____________________________________________________________________"
+	"_____________________________________________________________________";
+const char *graph_dotted_line =
+	"---------------------------------------------------------------------"
+	"---------------------------------------------------------------------"
+	"---------------------------------------------------------------------";
+const char *spaces =
+	"                                                                     "
+	"                                                                     "
+	"                                                                     ";
+const char *dots =
+	"....................................................................."
+	"....................................................................."
+	".....................................................................";
diff --git a/util/data-convert-bt.c b/util/data-convert-bt.c
new file mode 100644
index 0000000..5744c12
--- /dev/null
+++ b/util/data-convert-bt.c
@@ -0,0 +1,1677 @@
+/*
+ * CTF writing support via babeltrace.
+ *
+ * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com>
+ * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-writer/clock.h>
+#include <babeltrace/ctf-writer/stream.h>
+#include <babeltrace/ctf-writer/event.h>
+#include <babeltrace/ctf-writer/event-types.h>
+#include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
+#include <babeltrace/ctf/events.h>
+#include <traceevent/event-parse.h>
+#include "asm/bug.h"
+#include "data-convert-bt.h"
+#include "session.h"
+#include "util.h"
+#include "debug.h"
+#include "tool.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "config.h"
+#include "sane_ctype.h"
+
+#define pr_N(n, fmt, ...) \
+	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...)  pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+struct evsel_priv {
+	struct bt_ctf_event_class *event_class;
+};
+
+#define MAX_CPUS	4096
+
+struct ctf_stream {
+	struct bt_ctf_stream *stream;
+	int cpu;
+	u32 count;
+};
+
+struct ctf_writer {
+	/* writer primitives */
+	struct bt_ctf_writer		 *writer;
+	struct ctf_stream		**stream;
+	int				  stream_cnt;
+	struct bt_ctf_stream_class	 *stream_class;
+	struct bt_ctf_clock		 *clock;
+
+	/* data types */
+	union {
+		struct {
+			struct bt_ctf_field_type	*s64;
+			struct bt_ctf_field_type	*u64;
+			struct bt_ctf_field_type	*s32;
+			struct bt_ctf_field_type	*u32;
+			struct bt_ctf_field_type	*string;
+			struct bt_ctf_field_type	*u32_hex;
+			struct bt_ctf_field_type	*u64_hex;
+		};
+		struct bt_ctf_field_type *array[6];
+	} data;
+	struct bt_ctf_event_class	*comm_class;
+	struct bt_ctf_event_class	*exit_class;
+	struct bt_ctf_event_class	*fork_class;
+	struct bt_ctf_event_class	*mmap_class;
+	struct bt_ctf_event_class	*mmap2_class;
+};
+
+struct convert {
+	struct perf_tool	tool;
+	struct ctf_writer	writer;
+
+	u64			events_size;
+	u64			events_count;
+	u64			non_sample_count;
+
+	/* Ordered events configured queue size. */
+	u64			queue_size;
+};
+
+static int value_set(struct bt_ctf_field_type *type,
+		     struct bt_ctf_event *event,
+		     const char *name, u64 val)
+{
+	struct bt_ctf_field *field;
+	bool sign = bt_ctf_field_type_integer_get_signed(type);
+	int ret;
+
+	field = bt_ctf_field_create(type);
+	if (!field) {
+		pr_err("failed to create a field %s\n", name);
+		return -1;
+	}
+
+	if (sign) {
+		ret = bt_ctf_field_signed_integer_set_value(field, val);
+		if (ret) {
+			pr_err("failed to set field value %s\n", name);
+			goto err;
+		}
+	} else {
+		ret = bt_ctf_field_unsigned_integer_set_value(field, val);
+		if (ret) {
+			pr_err("failed to set field value %s\n", name);
+			goto err;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, name, field);
+	if (ret) {
+		pr_err("failed to set payload %s\n", name);
+		goto err;
+	}
+
+	pr2("  SET [%s = %" PRIu64 "]\n", name, val);
+
+err:
+	bt_ctf_field_put(field);
+	return ret;
+}
+
+#define __FUNC_VALUE_SET(_name, _val_type)				\
+static __maybe_unused int value_set_##_name(struct ctf_writer *cw,	\
+			     struct bt_ctf_event *event,		\
+			     const char *name,				\
+			     _val_type val)				\
+{									\
+	struct bt_ctf_field_type *type = cw->data._name;		\
+	return value_set(type, event, name, (u64) val);			\
+}
+
+#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name)
+
+FUNC_VALUE_SET(s32)
+FUNC_VALUE_SET(u32)
+FUNC_VALUE_SET(s64)
+FUNC_VALUE_SET(u64)
+__FUNC_VALUE_SET(u64_hex, u64)
+
+static int string_set_value(struct bt_ctf_field *field, const char *string);
+static __maybe_unused int
+value_set_string(struct ctf_writer *cw, struct bt_ctf_event *event,
+		 const char *name, const char *string)
+{
+	struct bt_ctf_field_type *type = cw->data.string;
+	struct bt_ctf_field *field;
+	int ret = 0;
+
+	field = bt_ctf_field_create(type);
+	if (!field) {
+		pr_err("failed to create a field %s\n", name);
+		return -1;
+	}
+
+	ret = string_set_value(field, string);
+	if (ret) {
+		pr_err("failed to set value %s\n", name);
+		goto err_put_field;
+	}
+
+	ret = bt_ctf_event_set_payload(event, name, field);
+	if (ret)
+		pr_err("failed to set payload %s\n", name);
+
+err_put_field:
+	bt_ctf_field_put(field);
+	return ret;
+}
+
+static struct bt_ctf_field_type*
+get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
+{
+	unsigned long flags = field->flags;
+
+	if (flags & FIELD_IS_STRING)
+		return cw->data.string;
+
+	if (!(flags & FIELD_IS_SIGNED)) {
+		/* unsigned long are mostly pointers */
+		if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER)
+			return cw->data.u64_hex;
+	}
+
+	if (flags & FIELD_IS_SIGNED) {
+		if (field->size == 8)
+			return cw->data.s64;
+		else
+			return cw->data.s32;
+	}
+
+	if (field->size == 8)
+		return cw->data.u64;
+	else
+		return cw->data.u32;
+}
+
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+	unsigned long long value_mask;
+
+	/*
+	 * value_mask = (1 << (size * 8 - 1)) - 1.
+	 * Directly set value_mask for code readers.
+	 */
+	switch (size) {
+	case 1:
+		value_mask = 0x7fULL;
+		break;
+	case 2:
+		value_mask = 0x7fffULL;
+		break;
+	case 4:
+		value_mask = 0x7fffffffULL;
+		break;
+	case 8:
+		/*
+		 * For 64 bit value, return it self. There is no need
+		 * to fill high bit.
+		 */
+		/* Fall through */
+	default:
+		/* BUG! */
+		return value_int;
+	}
+
+	/* If it is a positive value, don't adjust. */
+	if ((value_int & (~0ULL - value_mask)) == 0)
+		return value_int;
+
+	/* Fill upper part of value_int with 1 to make it a negative long long. */
+	return (value_int & value_mask) | ~value_mask;
+}
+
+static int string_set_value(struct bt_ctf_field *field, const char *string)
+{
+	char *buffer = NULL;
+	size_t len = strlen(string), i, p;
+	int err;
+
+	for (i = p = 0; i < len; i++, p++) {
+		if (isprint(string[i])) {
+			if (!buffer)
+				continue;
+			buffer[p] = string[i];
+		} else {
+			char numstr[5];
+
+			snprintf(numstr, sizeof(numstr), "\\x%02x",
+				 (unsigned int)(string[i]) & 0xff);
+
+			if (!buffer) {
+				buffer = zalloc(i + (len - i) * 4 + 2);
+				if (!buffer) {
+					pr_err("failed to set unprintable string '%s'\n", string);
+					return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING");
+				}
+				if (i > 0)
+					strncpy(buffer, string, i);
+			}
+			strncat(buffer + p, numstr, 4);
+			p += 3;
+		}
+	}
+
+	if (!buffer)
+		return bt_ctf_field_string_set_value(field, string);
+	err = bt_ctf_field_string_set_value(field, buffer);
+	free(buffer);
+	return err;
+}
+
+static int add_tracepoint_field_value(struct ctf_writer *cw,
+				      struct bt_ctf_event_class *event_class,
+				      struct bt_ctf_event *event,
+				      struct perf_sample *sample,
+				      struct format_field *fmtf)
+{
+	struct bt_ctf_field_type *type;
+	struct bt_ctf_field *array_field;
+	struct bt_ctf_field *field;
+	const char *name = fmtf->name;
+	void *data = sample->raw_data;
+	unsigned long flags = fmtf->flags;
+	unsigned int n_items;
+	unsigned int i;
+	unsigned int offset;
+	unsigned int len;
+	int ret;
+
+	name = fmtf->alias;
+	offset = fmtf->offset;
+	len = fmtf->size;
+	if (flags & FIELD_IS_STRING)
+		flags &= ~FIELD_IS_ARRAY;
+
+	if (flags & FIELD_IS_DYNAMIC) {
+		unsigned long long tmp_val;
+
+		tmp_val = pevent_read_number(fmtf->event->pevent,
+				data + offset, len);
+		offset = tmp_val;
+		len = offset >> 16;
+		offset &= 0xffff;
+	}
+
+	if (flags & FIELD_IS_ARRAY) {
+
+		type = bt_ctf_event_class_get_field_by_name(
+				event_class, name);
+		array_field = bt_ctf_field_create(type);
+		bt_ctf_field_type_put(type);
+		if (!array_field) {
+			pr_err("Failed to create array type %s\n", name);
+			return -1;
+		}
+
+		len = fmtf->size / fmtf->arraylen;
+		n_items = fmtf->arraylen;
+	} else {
+		n_items = 1;
+		array_field = NULL;
+	}
+
+	type = get_tracepoint_field_type(cw, fmtf);
+
+	for (i = 0; i < n_items; i++) {
+		if (flags & FIELD_IS_ARRAY)
+			field = bt_ctf_field_array_get_field(array_field, i);
+		else
+			field = bt_ctf_field_create(type);
+
+		if (!field) {
+			pr_err("failed to create a field %s\n", name);
+			return -1;
+		}
+
+		if (flags & FIELD_IS_STRING)
+			ret = string_set_value(field, data + offset + i * len);
+		else {
+			unsigned long long value_int;
+
+			value_int = pevent_read_number(
+					fmtf->event->pevent,
+					data + offset + i * len, len);
+
+			if (!(flags & FIELD_IS_SIGNED))
+				ret = bt_ctf_field_unsigned_integer_set_value(
+						field, value_int);
+			else
+				ret = bt_ctf_field_signed_integer_set_value(
+						field, adjust_signedness(value_int, len));
+		}
+
+		if (ret) {
+			pr_err("failed to set file value %s\n", name);
+			goto err_put_field;
+		}
+		if (!(flags & FIELD_IS_ARRAY)) {
+			ret = bt_ctf_event_set_payload(event, name, field);
+			if (ret) {
+				pr_err("failed to set payload %s\n", name);
+				goto err_put_field;
+			}
+		}
+		bt_ctf_field_put(field);
+	}
+	if (flags & FIELD_IS_ARRAY) {
+		ret = bt_ctf_event_set_payload(event, name, array_field);
+		if (ret) {
+			pr_err("Failed add payload array %s\n", name);
+			return -1;
+		}
+		bt_ctf_field_put(array_field);
+	}
+	return 0;
+
+err_put_field:
+	bt_ctf_field_put(field);
+	return -1;
+}
+
+static int add_tracepoint_fields_values(struct ctf_writer *cw,
+					struct bt_ctf_event_class *event_class,
+					struct bt_ctf_event *event,
+					struct format_field *fields,
+					struct perf_sample *sample)
+{
+	struct format_field *field;
+	int ret;
+
+	for (field = fields; field; field = field->next) {
+		ret = add_tracepoint_field_value(cw, event_class, event, sample,
+				field);
+		if (ret)
+			return -1;
+	}
+	return 0;
+}
+
+static int add_tracepoint_values(struct ctf_writer *cw,
+				 struct bt_ctf_event_class *event_class,
+				 struct bt_ctf_event *event,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample)
+{
+	struct format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct format_field *fields        = evsel->tp_format->format.fields;
+	int ret;
+
+	ret = add_tracepoint_fields_values(cw, event_class, event,
+					   common_fields, sample);
+	if (!ret)
+		ret = add_tracepoint_fields_values(cw, event_class, event,
+						   fields, sample);
+
+	return ret;
+}
+
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct perf_sample *sample)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int raw_size = sample->raw_size;
+	unsigned int nr_elements = raw_size / sizeof(u32);
+	unsigned int i;
+	int ret;
+
+	if (nr_elements * sizeof(u32) != raw_size)
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
+			   raw_size, nr_elements * sizeof(u32) - raw_size);
+
+	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'raw_len' for bpf output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for raw_len\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+	if (ret) {
+		pr_err("failed to set payload to raw_len\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'raw_data' for bpf output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'raw_data'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u32 *)(sample->raw_data))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set raw_data[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
+static int
+add_callchain_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct ip_callchain *callchain)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int nr_elements = callchain->nr;
+	unsigned int i;
+	int ret;
+
+	len_type = bt_ctf_event_class_get_field_by_name(
+			event_class, "perf_callchain_size");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'perf_callchain_size' for callchain output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for perf_callchain_size\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "perf_callchain_size", len_field);
+	if (ret) {
+		pr_err("failed to set payload to perf_callchain_size\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(
+			event_class, "perf_callchain");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'perf_callchain' for callchain output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'perf_callchain'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u64 *)(callchain->ips))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set callchain[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "perf_callchain", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
+static int add_generic_values(struct ctf_writer *cw,
+			      struct bt_ctf_event *event,
+			      struct perf_evsel *evsel,
+			      struct perf_sample *sample)
+{
+	u64 type = evsel->attr.sample_type;
+	int ret;
+
+	/*
+	 * missing:
+	 *   PERF_SAMPLE_TIME         - not needed as we have it in
+	 *                              ctf event header
+	 *   PERF_SAMPLE_READ         - TODO
+	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_BRANCH_STACK - TODO
+	 *   PERF_SAMPLE_REGS_USER    - TODO
+	 *   PERF_SAMPLE_STACK_USER   - TODO
+	 */
+
+	if (type & PERF_SAMPLE_IP) {
+		ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		ret = value_set_s32(cw, event, "perf_tid", sample->tid);
+		if (ret)
+			return -1;
+
+		ret = value_set_s32(cw, event, "perf_pid", sample->pid);
+		if (ret)
+			return -1;
+	}
+
+	if ((type & PERF_SAMPLE_ID) ||
+	    (type & PERF_SAMPLE_IDENTIFIER)) {
+		ret = value_set_u64(cw, event, "perf_id", sample->id);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		ret = value_set_u64(cw, event, "perf_period", sample->period);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT) {
+		ret = value_set_u64(cw, event, "perf_weight", sample->weight);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		ret = value_set_u64(cw, event, "perf_data_src",
+				sample->data_src);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		ret = value_set_u64(cw, event, "perf_transaction",
+				sample->transaction);
+		if (ret)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+	int err = 0;
+
+	if (cs) {
+		err = bt_ctf_stream_flush(cs->stream);
+		if (err)
+			pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+		pr("Flush stream for cpu %d (%u samples)\n",
+		   cs->cpu, cs->count);
+
+		cs->count = 0;
+	}
+
+	return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+	struct ctf_stream *cs;
+	struct bt_ctf_field *pkt_ctx   = NULL;
+	struct bt_ctf_field *cpu_field = NULL;
+	struct bt_ctf_stream *stream   = NULL;
+	int ret;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs) {
+		pr_err("Failed to allocate ctf stream\n");
+		return NULL;
+	}
+
+	stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+	if (!stream) {
+		pr_err("Failed to create CTF stream\n");
+		goto out;
+	}
+
+	pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+	if (!pkt_ctx) {
+		pr_err("Failed to obtain packet context\n");
+		goto out;
+	}
+
+	cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+	bt_ctf_field_put(pkt_ctx);
+	if (!cpu_field) {
+		pr_err("Failed to obtain cpu field\n");
+		goto out;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+	if (ret) {
+		pr_err("Failed to update CPU number\n");
+		goto out;
+	}
+
+	bt_ctf_field_put(cpu_field);
+
+	cs->cpu    = cpu;
+	cs->stream = stream;
+	return cs;
+
+out:
+	if (cpu_field)
+		bt_ctf_field_put(cpu_field);
+	if (stream)
+		bt_ctf_stream_put(stream);
+
+	free(cs);
+	return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+	if (cs) {
+		bt_ctf_stream_put(cs->stream);
+		free(cs);
+	}
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+	struct ctf_stream *cs = cw->stream[cpu];
+
+	if (!cs) {
+		cs = ctf_stream__create(cw, cpu);
+		cw->stream[cpu] = cs;
+	}
+
+	return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+			  struct perf_evsel *evsel)
+{
+	int cpu = 0;
+
+	if (evsel->attr.sample_type & PERF_SAMPLE_CPU)
+		cpu = sample->cpu;
+
+	if (cpu > cw->stream_cnt) {
+		pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+			cpu, cw->stream_cnt);
+		cpu = 0;
+	}
+
+	return cpu;
+}
+
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+	return cs->count >= STREAM_FLUSH_COUNT;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+				union perf_event *_event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel,
+				struct machine *machine __maybe_unused)
+{
+	struct convert *c = container_of(tool, struct convert, tool);
+	struct evsel_priv *priv = evsel->priv;
+	struct ctf_writer *cw = &c->writer;
+	struct ctf_stream *cs;
+	struct bt_ctf_event_class *event_class;
+	struct bt_ctf_event *event;
+	int ret;
+	unsigned long type = evsel->attr.sample_type;
+
+	if (WARN_ONCE(!priv, "Failed to setup all events.\n"))
+		return 0;
+
+	event_class = priv->event_class;
+
+	/* update stats */
+	c->events_count++;
+	c->events_size += _event->header.size;
+
+	pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count);
+
+	event = bt_ctf_event_create(event_class);
+	if (!event) {
+		pr_err("Failed to create an CTF event\n");
+		return -1;
+	}
+
+	bt_ctf_clock_set_time(cw->clock, sample->time);
+
+	ret = add_generic_values(cw, event, evsel, sample);
+	if (ret)
+		return -1;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		ret = add_tracepoint_values(cw, event_class, event,
+					    evsel, sample);
+		if (ret)
+			return -1;
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		ret = add_callchain_output_values(event_class,
+				event, sample->callchain);
+		if (ret)
+			return -1;
+	}
+
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_values(event_class, event, sample);
+		if (ret)
+			return -1;
+	}
+
+	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+	if (cs) {
+		if (is_flush_needed(cs))
+			ctf_stream__flush(cs);
+
+		cs->count++;
+		bt_ctf_stream_append_event(cs->stream, event);
+	}
+
+	bt_ctf_event_put(event);
+	return cs ? 0 : -1;
+}
+
+#define __NON_SAMPLE_SET_FIELD(_name, _type, _field) 	\
+do {							\
+	ret = value_set_##_type(cw, event, #_field, _event->_name._field);\
+	if (ret)					\
+		return -1;				\
+} while(0)
+
+#define __FUNC_PROCESS_NON_SAMPLE(_name, body) 	\
+static int process_##_name##_event(struct perf_tool *tool,	\
+				   union perf_event *_event,	\
+				   struct perf_sample *sample,	\
+				   struct machine *machine)	\
+{								\
+	struct convert *c = container_of(tool, struct convert, tool);\
+	struct ctf_writer *cw = &c->writer;			\
+	struct bt_ctf_event_class *event_class = cw->_name##_class;\
+	struct bt_ctf_event *event;				\
+	struct ctf_stream *cs;					\
+	int ret;						\
+								\
+	c->non_sample_count++;					\
+	c->events_size += _event->header.size;			\
+	event = bt_ctf_event_create(event_class);		\
+	if (!event) {						\
+		pr_err("Failed to create an CTF event\n");	\
+		return -1;					\
+	}							\
+								\
+	bt_ctf_clock_set_time(cw->clock, sample->time);		\
+	body							\
+	cs = ctf_stream(cw, 0);					\
+	if (cs) {						\
+		if (is_flush_needed(cs))			\
+			ctf_stream__flush(cs);			\
+								\
+		cs->count++;					\
+		bt_ctf_stream_append_event(cs->stream, event);	\
+	}							\
+	bt_ctf_event_put(event);				\
+								\
+	return perf_event__process_##_name(tool, _event, sample, machine);\
+}
+
+__FUNC_PROCESS_NON_SAMPLE(comm,
+	__NON_SAMPLE_SET_FIELD(comm, u32, pid);
+	__NON_SAMPLE_SET_FIELD(comm, u32, tid);
+	__NON_SAMPLE_SET_FIELD(comm, string, comm);
+)
+__FUNC_PROCESS_NON_SAMPLE(fork,
+	__NON_SAMPLE_SET_FIELD(fork, u32, pid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, tid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+	__NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+
+__FUNC_PROCESS_NON_SAMPLE(exit,
+	__NON_SAMPLE_SET_FIELD(fork, u32, pid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, tid);
+	__NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+	__NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap,
+	__NON_SAMPLE_SET_FIELD(mmap, u32, pid);
+	__NON_SAMPLE_SET_FIELD(mmap, u32, tid);
+	__NON_SAMPLE_SET_FIELD(mmap, u64_hex, start);
+	__NON_SAMPLE_SET_FIELD(mmap, string, filename);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap2,
+	__NON_SAMPLE_SET_FIELD(mmap2, u32, pid);
+	__NON_SAMPLE_SET_FIELD(mmap2, u32, tid);
+	__NON_SAMPLE_SET_FIELD(mmap2, u64_hex, start);
+	__NON_SAMPLE_SET_FIELD(mmap2, string, filename);
+)
+#undef __NON_SAMPLE_SET_FIELD
+#undef __FUNC_PROCESS_NON_SAMPLE
+
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+	char *new_name = NULL;
+	size_t len;
+
+	if (!name)
+		name = orig_name;
+
+	if (dup >= 10)
+		goto out;
+	/*
+	 * Add '_' prefix to potential keywork.  According to
+	 * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
+	 * futher CTF spec updating may require us to use '$'.
+	 */
+	if (dup < 0)
+		len = strlen(name) + sizeof("_");
+	else
+		len = strlen(orig_name) + sizeof("_dupl_X");
+
+	new_name = malloc(len);
+	if (!new_name)
+		goto out;
+
+	if (dup < 0)
+		snprintf(new_name, len, "_%s", name);
+	else
+		snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+	if (name != orig_name)
+		free(name);
+	return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+		struct bt_ctf_field_type *type,
+		struct format_field *field)
+{
+	struct bt_ctf_field_type *t = NULL;
+	char *name;
+	int dup = 1;
+	int ret;
+
+	/* alias was already assigned */
+	if (field->alias != field->name)
+		return bt_ctf_event_class_add_field(event_class, type,
+				(char *)field->alias);
+
+	name = field->name;
+
+	/* If 'name' is a keywork, add prefix. */
+	if (bt_ctf_validate_identifier(name))
+		name = change_name(name, field->name, -1);
+
+	if (!name) {
+		pr_err("Failed to fix invalid identifier.");
+		return -1;
+	}
+	while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+		bt_ctf_field_type_put(t);
+		name = change_name(name, field->name, dup++);
+		if (!name) {
+			pr_err("Failed to create dup name for '%s'\n", field->name);
+			return -1;
+		}
+	}
+
+	ret = bt_ctf_event_class_add_field(event_class, type, name);
+	if (!ret)
+		field->alias = name;
+
+	return ret;
+}
+
+static int add_tracepoint_fields_types(struct ctf_writer *cw,
+				       struct format_field *fields,
+				       struct bt_ctf_event_class *event_class)
+{
+	struct format_field *field;
+	int ret;
+
+	for (field = fields; field; field = field->next) {
+		struct bt_ctf_field_type *type;
+		unsigned long flags = field->flags;
+
+		pr2("  field '%s'\n", field->name);
+
+		type = get_tracepoint_field_type(cw, field);
+		if (!type)
+			return -1;
+
+		/*
+		 * A string is an array of chars. For this we use the string
+		 * type and don't care that it is an array. What we don't
+		 * support is an array of strings.
+		 */
+		if (flags & FIELD_IS_STRING)
+			flags &= ~FIELD_IS_ARRAY;
+
+		if (flags & FIELD_IS_ARRAY)
+			type = bt_ctf_field_type_array_create(type, field->arraylen);
+
+		ret = event_class_add_field(event_class, type, field);
+
+		if (flags & FIELD_IS_ARRAY)
+			bt_ctf_field_type_put(type);
+
+		if (ret) {
+			pr_err("Failed to add field '%s': %d\n",
+					field->name, ret);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int add_tracepoint_types(struct ctf_writer *cw,
+				struct perf_evsel *evsel,
+				struct bt_ctf_event_class *class)
+{
+	struct format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct format_field *fields        = evsel->tp_format->format.fields;
+	int ret;
+
+	ret = add_tracepoint_fields_types(cw, common_fields, class);
+	if (!ret)
+		ret = add_tracepoint_fields_types(cw, fields, class);
+
+	return ret;
+}
+
+static int add_bpf_output_types(struct ctf_writer *cw,
+				struct bt_ctf_event_class *class)
+{
+	struct bt_ctf_field_type *len_type = cw->data.u32;
+	struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+	struct bt_ctf_field_type *seq_type;
+	int ret;
+
+	ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+	if (ret)
+		return ret;
+
+	seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+	if (!seq_type)
+		return -1;
+
+	return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
+static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
+			     struct bt_ctf_event_class *event_class)
+{
+	u64 type = evsel->attr.sample_type;
+
+	/*
+	 * missing:
+	 *   PERF_SAMPLE_TIME         - not needed as we have it in
+	 *                              ctf event header
+	 *   PERF_SAMPLE_READ         - TODO
+	 *   PERF_SAMPLE_CALLCHAIN    - TODO
+	 *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+	 *                              are handled separately
+	 *   PERF_SAMPLE_BRANCH_STACK - TODO
+	 *   PERF_SAMPLE_REGS_USER    - TODO
+	 *   PERF_SAMPLE_STACK_USER   - TODO
+	 */
+
+#define ADD_FIELD(cl, t, n)						\
+	do {								\
+		pr2("  field '%s'\n", n);				\
+		if (bt_ctf_event_class_add_field(cl, t, n)) {		\
+			pr_err("Failed to add field '%s';\n", n);	\
+			return -1;					\
+		}							\
+	} while (0)
+
+	if (type & PERF_SAMPLE_IP)
+		ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip");
+
+	if (type & PERF_SAMPLE_TID) {
+		ADD_FIELD(event_class, cw->data.s32, "perf_tid");
+		ADD_FIELD(event_class, cw->data.s32, "perf_pid");
+	}
+
+	if ((type & PERF_SAMPLE_ID) ||
+	    (type & PERF_SAMPLE_IDENTIFIER))
+		ADD_FIELD(event_class, cw->data.u64, "perf_id");
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
+
+	if (type & PERF_SAMPLE_PERIOD)
+		ADD_FIELD(event_class, cw->data.u64, "perf_period");
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		ADD_FIELD(event_class, cw->data.u64, "perf_weight");
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		ADD_FIELD(event_class, cw->data.u64, "perf_data_src");
+
+	if (type & PERF_SAMPLE_TRANSACTION)
+		ADD_FIELD(event_class, cw->data.u64, "perf_transaction");
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		ADD_FIELD(event_class, cw->data.u32, "perf_callchain_size");
+		ADD_FIELD(event_class,
+			bt_ctf_field_type_sequence_create(
+				cw->data.u64_hex, "perf_callchain_size"),
+			"perf_callchain");
+	}
+
+#undef ADD_FIELD
+	return 0;
+}
+
+static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
+{
+	struct bt_ctf_event_class *event_class;
+	struct evsel_priv *priv;
+	const char *name = perf_evsel__name(evsel);
+	int ret;
+
+	pr("Adding event '%s' (type %d)\n", name, evsel->attr.type);
+
+	event_class = bt_ctf_event_class_create(name);
+	if (!event_class)
+		return -1;
+
+	ret = add_generic_types(cw, evsel, event_class);
+	if (ret)
+		goto err;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		ret = add_tracepoint_types(cw, evsel, event_class);
+		if (ret)
+			goto err;
+	}
+
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_types(cw, event_class);
+		if (ret)
+			goto err;
+	}
+
+	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
+	if (ret) {
+		pr("Failed to add event class into stream.\n");
+		goto err;
+	}
+
+	priv = malloc(sizeof(*priv));
+	if (!priv)
+		goto err;
+
+	priv->event_class = event_class;
+	evsel->priv       = priv;
+	return 0;
+
+err:
+	bt_ctf_event_class_put(event_class);
+	pr_err("Failed to add event '%s'.\n", name);
+	return -1;
+}
+
+static int setup_events(struct ctf_writer *cw, struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+	int ret;
+
+	evlist__for_each_entry(evlist, evsel) {
+		ret = add_event(cw, evsel);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+#define __NON_SAMPLE_ADD_FIELD(t, n)						\
+	do {							\
+		pr2("  field '%s'\n", #n);			\
+		if (bt_ctf_event_class_add_field(event_class, cw->data.t, #n)) {\
+			pr_err("Failed to add field '%s';\n", #n);\
+			return -1;				\
+		}						\
+	} while(0)
+
+#define __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(_name, body) 		\
+static int add_##_name##_event(struct ctf_writer *cw)		\
+{								\
+	struct bt_ctf_event_class *event_class;			\
+	int ret;						\
+								\
+	pr("Adding "#_name" event\n");				\
+	event_class = bt_ctf_event_class_create("perf_" #_name);\
+	if (!event_class)					\
+		return -1;					\
+	body							\
+								\
+	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);\
+	if (ret) {						\
+		pr("Failed to add event class '"#_name"' into stream.\n");\
+		return ret;					\
+	}							\
+								\
+	cw->_name##_class = event_class;			\
+	bt_ctf_event_class_put(event_class);			\
+	return 0;						\
+}
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(comm,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(string, comm);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(fork,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, ppid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u32, ptid);
+	__NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, ppid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u32, ptid);
+	__NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u64_hex, start);
+	__NON_SAMPLE_ADD_FIELD(string, filename);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap2,
+	__NON_SAMPLE_ADD_FIELD(u32, pid);
+	__NON_SAMPLE_ADD_FIELD(u32, tid);
+	__NON_SAMPLE_ADD_FIELD(u64_hex, start);
+	__NON_SAMPLE_ADD_FIELD(string, filename);
+)
+#undef __NON_SAMPLE_ADD_FIELD
+#undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS
+
+static int setup_non_sample_events(struct ctf_writer *cw,
+				   struct perf_session *session __maybe_unused)
+{
+	int ret;
+
+	ret = add_comm_event(cw);
+	if (ret)
+		return ret;
+	ret = add_exit_event(cw);
+	if (ret)
+		return ret;
+	ret = add_fork_event(cw);
+	if (ret)
+		return ret;
+	ret = add_mmap_event(cw);
+	if (ret)
+		return ret;
+	ret = add_mmap2_event(cw);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static void cleanup_events(struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		struct evsel_priv *priv;
+
+		priv = evsel->priv;
+		bt_ctf_event_class_put(priv->event_class);
+		zfree(&evsel->priv);
+	}
+
+	perf_evlist__delete(evlist);
+	session->evlist = NULL;
+}
+
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+	struct ctf_stream **stream;
+	struct perf_header *ph = &session->header;
+	int ncpus;
+
+	/*
+	 * Try to get the number of cpus used in the data file,
+	 * if not present fallback to the MAX_CPUS.
+	 */
+	ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+	stream = zalloc(sizeof(*stream) * ncpus);
+	if (!stream) {
+		pr_err("Failed to allocate streams.\n");
+		return -ENOMEM;
+	}
+
+	cw->stream     = stream;
+	cw->stream_cnt = ncpus;
+	return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+		ctf_stream__delete(cw->stream[cpu]);
+
+	free(cw->stream);
+}
+
+static int ctf_writer__setup_env(struct ctf_writer *cw,
+				 struct perf_session *session)
+{
+	struct perf_header *header = &session->header;
+	struct bt_ctf_writer *writer = cw->writer;
+
+#define ADD(__n, __v)							\
+do {									\
+	if (bt_ctf_writer_add_environment_field(writer, __n, __v))	\
+		return -1;						\
+} while (0)
+
+	ADD("host",    header->env.hostname);
+	ADD("sysname", "Linux");
+	ADD("release", header->env.os_release);
+	ADD("version", header->env.version);
+	ADD("machine", header->env.arch);
+	ADD("domain", "kernel");
+	ADD("tracer_name", "perf");
+
+#undef ADD
+	return 0;
+}
+
+static int ctf_writer__setup_clock(struct ctf_writer *cw)
+{
+	struct bt_ctf_clock *clock = cw->clock;
+
+	bt_ctf_clock_set_description(clock, "perf clock");
+
+#define SET(__n, __v)				\
+do {						\
+	if (bt_ctf_clock_set_##__n(clock, __v))	\
+		return -1;			\
+} while (0)
+
+	SET(frequency,   1000000000);
+	SET(offset_s,    0);
+	SET(offset,      0);
+	SET(precision,   10);
+	SET(is_absolute, 0);
+
+#undef SET
+	return 0;
+}
+
+static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
+{
+	struct bt_ctf_field_type *type;
+
+	type = bt_ctf_field_type_integer_create(size);
+	if (!type)
+		return NULL;
+
+	if (sign &&
+	    bt_ctf_field_type_integer_set_signed(type, 1))
+		goto err;
+
+	if (hex &&
+	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
+		goto err;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
+	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
+	    size, sign ? "un" : "", hex ? "hex" : "");
+	return type;
+
+err:
+	bt_ctf_field_type_put(type);
+	return NULL;
+}
+
+static void ctf_writer__cleanup_data(struct ctf_writer *cw)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cw->data.array); i++)
+		bt_ctf_field_type_put(cw->data.array[i]);
+}
+
+static int ctf_writer__init_data(struct ctf_writer *cw)
+{
+#define CREATE_INT_TYPE(type, size, sign, hex)		\
+do {							\
+	(type) = create_int_type(size, sign, hex);	\
+	if (!(type))					\
+		goto err;				\
+} while (0)
+
+	CREATE_INT_TYPE(cw->data.s64, 64, true,  false);
+	CREATE_INT_TYPE(cw->data.u64, 64, false, false);
+	CREATE_INT_TYPE(cw->data.s32, 32, true,  false);
+	CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+	CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
+	CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
+
+	cw->data.string  = bt_ctf_field_type_string_create();
+	if (cw->data.string)
+		return 0;
+
+err:
+	ctf_writer__cleanup_data(cw);
+	pr_err("Failed to create data types.\n");
+	return -1;
+}
+
+static void ctf_writer__cleanup(struct ctf_writer *cw)
+{
+	ctf_writer__cleanup_data(cw);
+
+	bt_ctf_clock_put(cw->clock);
+	free_streams(cw);
+	bt_ctf_stream_class_put(cw->stream_class);
+	bt_ctf_writer_put(cw->writer);
+
+	/* and NULL all the pointers */
+	memset(cw, 0, sizeof(*cw));
+}
+
+static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+{
+	struct bt_ctf_writer		*writer;
+	struct bt_ctf_stream_class	*stream_class;
+	struct bt_ctf_clock		*clock;
+	struct bt_ctf_field_type	*pkt_ctx_type;
+	int				ret;
+
+	/* CTF writer */
+	writer = bt_ctf_writer_create(path);
+	if (!writer)
+		goto err;
+
+	cw->writer = writer;
+
+	/* CTF clock */
+	clock = bt_ctf_clock_create("perf_clock");
+	if (!clock) {
+		pr("Failed to create CTF clock.\n");
+		goto err_cleanup;
+	}
+
+	cw->clock = clock;
+
+	if (ctf_writer__setup_clock(cw)) {
+		pr("Failed to setup CTF clock.\n");
+		goto err_cleanup;
+	}
+
+	/* CTF stream class */
+	stream_class = bt_ctf_stream_class_create("perf_stream");
+	if (!stream_class) {
+		pr("Failed to create CTF stream class.\n");
+		goto err_cleanup;
+	}
+
+	cw->stream_class = stream_class;
+
+	/* CTF clock stream setup */
+	if (bt_ctf_stream_class_set_clock(stream_class, clock)) {
+		pr("Failed to assign CTF clock to stream class.\n");
+		goto err_cleanup;
+	}
+
+	if (ctf_writer__init_data(cw))
+		goto err_cleanup;
+
+	/* Add cpu_id for packet context */
+	pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+	if (!pkt_ctx_type)
+		goto err_cleanup;
+
+	ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+	bt_ctf_field_type_put(pkt_ctx_type);
+	if (ret)
+		goto err_cleanup;
+
+	/* CTF clock writer setup */
+	if (bt_ctf_writer_add_clock(writer, clock)) {
+		pr("Failed to assign CTF clock to writer.\n");
+		goto err_cleanup;
+	}
+
+	return 0;
+
+err_cleanup:
+	ctf_writer__cleanup(cw);
+err:
+	pr_err("Failed to setup CTF writer.\n");
+	return -1;
+}
+
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+	int cpu, ret = 0;
+
+	for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+		ret = ctf_stream__flush(cw->stream[cpu]);
+
+	return ret;
+}
+
+static int convert__config(const char *var, const char *value, void *cb)
+{
+	struct convert *c = cb;
+
+	if (!strcmp(var, "convert.queue-size"))
+		return perf_config_u64(&c->queue_size, var, value);
+
+	return 0;
+}
+
+int bt_convert__perf2ctf(const char *input, const char *path,
+			 struct perf_data_convert_opts *opts)
+{
+	struct perf_session *session;
+	struct perf_data data = {
+		.file.path = input,
+		.mode      = PERF_DATA_MODE_READ,
+		.force     = opts->force,
+	};
+	struct convert c = {
+		.tool = {
+			.sample          = process_sample_event,
+			.mmap            = perf_event__process_mmap,
+			.mmap2           = perf_event__process_mmap2,
+			.comm            = perf_event__process_comm,
+			.exit            = perf_event__process_exit,
+			.fork            = perf_event__process_fork,
+			.lost            = perf_event__process_lost,
+			.tracing_data    = perf_event__process_tracing_data,
+			.build_id        = perf_event__process_build_id,
+			.namespaces      = perf_event__process_namespaces,
+			.ordered_events  = true,
+			.ordering_requires_timestamps = true,
+		},
+	};
+	struct ctf_writer *cw = &c.writer;
+	int err;
+
+	if (opts->all) {
+		c.tool.comm = process_comm_event;
+		c.tool.exit = process_exit_event;
+		c.tool.fork = process_fork_event;
+		c.tool.mmap = process_mmap_event;
+		c.tool.mmap2 = process_mmap2_event;
+	}
+
+	err = perf_config(convert__config, &c);
+	if (err)
+		return err;
+
+	/* CTF writer */
+	if (ctf_writer__init(cw, path))
+		return -1;
+
+	err = -1;
+	/* perf.data session */
+	session = perf_session__new(&data, 0, &c.tool);
+	if (!session)
+		goto free_writer;
+
+	if (c.queue_size) {
+		ordered_events__set_alloc_size(&session->ordered_events,
+					       c.queue_size);
+	}
+
+	/* CTF writer env/clock setup  */
+	if (ctf_writer__setup_env(cw, session))
+		goto free_session;
+
+	/* CTF events setup */
+	if (setup_events(cw, session))
+		goto free_session;
+
+	if (opts->all && setup_non_sample_events(cw, session))
+		goto free_session;
+
+	if (setup_streams(cw, session))
+		goto free_session;
+
+	err = perf_session__process_events(session);
+	if (!err)
+		err = ctf_writer__flush_streams(cw);
+	else
+		pr_err("Error during conversion.\n");
+
+	fprintf(stderr,
+		"[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
+		data.file.path, path);
+
+	fprintf(stderr,
+		"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
+		(double) c.events_size / 1024.0 / 1024.0,
+		c.events_count);
+
+	if (!c.non_sample_count)
+		fprintf(stderr, ") ]\n");
+	else
+		fprintf(stderr, ", %" PRIu64 " non-samples) ]\n", c.non_sample_count);
+
+	cleanup_events(session);
+	perf_session__delete(session);
+	ctf_writer__cleanup(cw);
+
+	return err;
+
+free_session:
+	perf_session__delete(session);
+free_writer:
+	ctf_writer__cleanup(cw);
+	pr_err("Error during conversion setup.\n");
+	return err;
+}
diff --git a/util/data-convert-bt.h b/util/data-convert-bt.h
new file mode 100644
index 0000000..821674d
--- /dev/null
+++ b/util/data-convert-bt.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DATA_CONVERT_BT_H
+#define __DATA_CONVERT_BT_H
+#include "data-convert.h"
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+			 struct perf_data_convert_opts *opts);
+
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+#endif /* __DATA_CONVERT_BT_H */
diff --git a/util/data-convert.h b/util/data-convert.h
new file mode 100644
index 0000000..af90b60
--- /dev/null
+++ b/util/data-convert.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DATA_CONVERT_H
+#define __DATA_CONVERT_H
+
+struct perf_data_convert_opts {
+	bool force;
+	bool all;
+};
+
+#endif /* __DATA_CONVERT_H */
diff --git a/util/data.c b/util/data.c
new file mode 100644
index 0000000..d8cfc19
--- /dev/null
+++ b/util/data.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "data.h"
+#include "util.h"
+#include "debug.h"
+
+static bool check_pipe(struct perf_data *data)
+{
+	struct stat st;
+	bool is_pipe = false;
+	int fd = perf_data__is_read(data) ?
+		 STDIN_FILENO : STDOUT_FILENO;
+
+	if (!data->file.path) {
+		if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
+			is_pipe = true;
+	} else {
+		if (!strcmp(data->file.path, "-"))
+			is_pipe = true;
+	}
+
+	if (is_pipe)
+		data->file.fd = fd;
+
+	return data->is_pipe = is_pipe;
+}
+
+static int check_backup(struct perf_data *data)
+{
+	struct stat st;
+
+	if (!stat(data->file.path, &st) && st.st_size) {
+		/* TODO check errors properly */
+		char oldname[PATH_MAX];
+		snprintf(oldname, sizeof(oldname), "%s.old",
+			 data->file.path);
+		unlink(oldname);
+		rename(data->file.path, oldname);
+	}
+
+	return 0;
+}
+
+static int open_file_read(struct perf_data *data)
+{
+	struct stat st;
+	int fd;
+	char sbuf[STRERR_BUFSIZE];
+
+	fd = open(data->file.path, O_RDONLY);
+	if (fd < 0) {
+		int err = errno;
+
+		pr_err("failed to open %s: %s", data->file.path,
+			str_error_r(err, sbuf, sizeof(sbuf)));
+		if (err == ENOENT && !strcmp(data->file.path, "perf.data"))
+			pr_err("  (try 'perf record' first)");
+		pr_err("\n");
+		return -err;
+	}
+
+	if (fstat(fd, &st) < 0)
+		goto out_close;
+
+	if (!data->force && st.st_uid && (st.st_uid != geteuid())) {
+		pr_err("File %s not owned by current user or root (use -f to override)\n",
+		       data->file.path);
+		goto out_close;
+	}
+
+	if (!st.st_size) {
+		pr_info("zero-sized data (%s), nothing to do!\n",
+			data->file.path);
+		goto out_close;
+	}
+
+	data->size = st.st_size;
+	return fd;
+
+ out_close:
+	close(fd);
+	return -1;
+}
+
+static int open_file_write(struct perf_data *data)
+{
+	int fd;
+	char sbuf[STRERR_BUFSIZE];
+
+	if (check_backup(data))
+		return -1;
+
+	fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
+		  S_IRUSR|S_IWUSR);
+
+	if (fd < 0)
+		pr_err("failed to open %s : %s\n", data->file.path,
+			str_error_r(errno, sbuf, sizeof(sbuf)));
+
+	return fd;
+}
+
+static int open_file(struct perf_data *data)
+{
+	int fd;
+
+	fd = perf_data__is_read(data) ?
+	     open_file_read(data) : open_file_write(data);
+
+	data->file.fd = fd;
+	return fd < 0 ? -1 : 0;
+}
+
+int perf_data__open(struct perf_data *data)
+{
+	if (check_pipe(data))
+		return 0;
+
+	if (!data->file.path)
+		data->file.path = "perf.data";
+
+	return open_file(data);
+}
+
+void perf_data__close(struct perf_data *data)
+{
+	close(data->file.fd);
+}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+			      void *buf, size_t size)
+{
+	return writen(file->fd, buf, size);
+}
+
+ssize_t perf_data__write(struct perf_data *data,
+			      void *buf, size_t size)
+{
+	return perf_data_file__write(&data->file, buf, size);
+}
+
+int perf_data__switch(struct perf_data *data,
+			   const char *postfix,
+			   size_t pos, bool at_exit)
+{
+	char *new_filepath;
+	int ret;
+
+	if (check_pipe(data))
+		return -EINVAL;
+	if (perf_data__is_read(data))
+		return -EINVAL;
+
+	if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0)
+		return -ENOMEM;
+
+	/*
+	 * Only fire a warning, don't return error, continue fill
+	 * original file.
+	 */
+	if (rename(data->file.path, new_filepath))
+		pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath);
+
+	if (!at_exit) {
+		close(data->file.fd);
+		ret = perf_data__open(data);
+		if (ret < 0)
+			goto out;
+
+		if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
+			ret = -errno;
+			pr_debug("Failed to lseek to %zu: %s",
+				 pos, strerror(errno));
+			goto out;
+		}
+	}
+	ret = data->file.fd;
+out:
+	free(new_filepath);
+	return ret;
+}
diff --git a/util/data.h b/util/data.h
new file mode 100644
index 0000000..4828f7f
--- /dev/null
+++ b/util/data.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DATA_H
+#define __PERF_DATA_H
+
+#include <stdbool.h>
+
+enum perf_data_mode {
+	PERF_DATA_MODE_WRITE,
+	PERF_DATA_MODE_READ,
+};
+
+struct perf_data_file {
+	const char	*path;
+	int		 fd;
+};
+
+struct perf_data {
+	struct perf_data_file	 file;
+	bool			 is_pipe;
+	bool			 force;
+	unsigned long		 size;
+	enum perf_data_mode	 mode;
+};
+
+static inline bool perf_data__is_read(struct perf_data *data)
+{
+	return data->mode == PERF_DATA_MODE_READ;
+}
+
+static inline bool perf_data__is_write(struct perf_data *data)
+{
+	return data->mode == PERF_DATA_MODE_WRITE;
+}
+
+static inline int perf_data__is_pipe(struct perf_data *data)
+{
+	return data->is_pipe;
+}
+
+static inline int perf_data__fd(struct perf_data *data)
+{
+	return data->file.fd;
+}
+
+static inline unsigned long perf_data__size(struct perf_data *data)
+{
+	return data->size;
+}
+
+int perf_data__open(struct perf_data *data);
+void perf_data__close(struct perf_data *data);
+ssize_t perf_data__write(struct perf_data *data,
+			      void *buf, size_t size);
+ssize_t perf_data_file__write(struct perf_data_file *file,
+			      void *buf, size_t size);
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original data.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data__switch(struct perf_data *data,
+			   const char *postfix,
+			   size_t pos, bool at_exit);
+#endif /* __PERF_DATA_H */
diff --git a/util/db-export.c b/util/db-export.c
new file mode 100644
index 0000000..b0c2b5c
--- /dev/null
+++ b/util/db-export.c
@@ -0,0 +1,508 @@
+/*
+ * db-export.c: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+
+#include "evsel.h"
+#include "machine.h"
+#include "thread.h"
+#include "comm.h"
+#include "symbol.h"
+#include "event.h"
+#include "util.h"
+#include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
+#include "db-export.h"
+
+struct deferred_export {
+	struct list_head node;
+	struct comm *comm;
+};
+
+static int db_export__deferred(struct db_export *dbe)
+{
+	struct deferred_export *de;
+	int err;
+
+	while (!list_empty(&dbe->deferred)) {
+		de = list_entry(dbe->deferred.next, struct deferred_export,
+				node);
+		err = dbe->export_comm(dbe, de->comm);
+		list_del(&de->node);
+		free(de);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void db_export__free_deferred(struct db_export *dbe)
+{
+	struct deferred_export *de;
+
+	while (!list_empty(&dbe->deferred)) {
+		de = list_entry(dbe->deferred.next, struct deferred_export,
+				node);
+		list_del(&de->node);
+		free(de);
+	}
+}
+
+static int db_export__defer_comm(struct db_export *dbe, struct comm *comm)
+{
+	struct deferred_export *de;
+
+	de = zalloc(sizeof(struct deferred_export));
+	if (!de)
+		return -ENOMEM;
+
+	de->comm = comm;
+	list_add_tail(&de->node, &dbe->deferred);
+
+	return 0;
+}
+
+int db_export__init(struct db_export *dbe)
+{
+	memset(dbe, 0, sizeof(struct db_export));
+	INIT_LIST_HEAD(&dbe->deferred);
+	return 0;
+}
+
+int db_export__flush(struct db_export *dbe)
+{
+	return db_export__deferred(dbe);
+}
+
+void db_export__exit(struct db_export *dbe)
+{
+	db_export__free_deferred(dbe);
+	call_return_processor__free(dbe->crp);
+	dbe->crp = NULL;
+}
+
+int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel)
+{
+	if (evsel->db_id)
+		return 0;
+
+	evsel->db_id = ++dbe->evsel_last_db_id;
+
+	if (dbe->export_evsel)
+		return dbe->export_evsel(dbe, evsel);
+
+	return 0;
+}
+
+int db_export__machine(struct db_export *dbe, struct machine *machine)
+{
+	if (machine->db_id)
+		return 0;
+
+	machine->db_id = ++dbe->machine_last_db_id;
+
+	if (dbe->export_machine)
+		return dbe->export_machine(dbe, machine);
+
+	return 0;
+}
+
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+		      struct machine *machine, struct comm *comm)
+{
+	struct thread *main_thread;
+	u64 main_thread_db_id = 0;
+	int err;
+
+	if (thread->db_id)
+		return 0;
+
+	thread->db_id = ++dbe->thread_last_db_id;
+
+	if (thread->pid_ != -1) {
+		if (thread->pid_ == thread->tid) {
+			main_thread = thread;
+		} else {
+			main_thread = machine__findnew_thread(machine,
+							      thread->pid_,
+							      thread->pid_);
+			if (!main_thread)
+				return -ENOMEM;
+			err = db_export__thread(dbe, main_thread, machine,
+						comm);
+			if (err)
+				goto out_put;
+			if (comm) {
+				err = db_export__comm_thread(dbe, comm, thread);
+				if (err)
+					goto out_put;
+			}
+		}
+		main_thread_db_id = main_thread->db_id;
+		if (main_thread != thread)
+			thread__put(main_thread);
+	}
+
+	if (dbe->export_thread)
+		return dbe->export_thread(dbe, thread, main_thread_db_id,
+					  machine);
+
+	return 0;
+
+out_put:
+	thread__put(main_thread);
+	return err;
+}
+
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+		    struct thread *main_thread)
+{
+	int err;
+
+	if (comm->db_id)
+		return 0;
+
+	comm->db_id = ++dbe->comm_last_db_id;
+
+	if (dbe->export_comm) {
+		if (main_thread->comm_set)
+			err = dbe->export_comm(dbe, comm);
+		else
+			err = db_export__defer_comm(dbe, comm);
+		if (err)
+			return err;
+	}
+
+	return db_export__comm_thread(dbe, comm, main_thread);
+}
+
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+			   struct thread *thread)
+{
+	u64 db_id;
+
+	db_id = ++dbe->comm_thread_last_db_id;
+
+	if (dbe->export_comm_thread)
+		return dbe->export_comm_thread(dbe, db_id, comm, thread);
+
+	return 0;
+}
+
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+		   struct machine *machine)
+{
+	if (dso->db_id)
+		return 0;
+
+	dso->db_id = ++dbe->dso_last_db_id;
+
+	if (dbe->export_dso)
+		return dbe->export_dso(dbe, dso, machine);
+
+	return 0;
+}
+
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+		      struct dso *dso)
+{
+	u64 *sym_db_id = symbol__priv(sym);
+
+	if (*sym_db_id)
+		return 0;
+
+	*sym_db_id = ++dbe->symbol_last_db_id;
+
+	if (dbe->export_symbol)
+		return dbe->export_symbol(dbe, sym, dso);
+
+	return 0;
+}
+
+static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
+			  u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
+{
+	int err;
+
+	if (al->map) {
+		struct dso *dso = al->map->dso;
+
+		err = db_export__dso(dbe, dso, al->machine);
+		if (err)
+			return err;
+		*dso_db_id = dso->db_id;
+
+		if (!al->sym) {
+			al->sym = symbol__new(al->addr, 0, 0, "unknown");
+			if (al->sym)
+				dso__insert_symbol(dso, al->map->type, al->sym);
+		}
+
+		if (al->sym) {
+			u64 *db_id = symbol__priv(al->sym);
+
+			err = db_export__symbol(dbe, al->sym, dso);
+			if (err)
+				return err;
+			*sym_db_id = *db_id;
+			*offset = al->addr - al->sym->start;
+		}
+	}
+
+	return 0;
+}
+
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+					       struct machine *machine,
+					       struct thread *thread,
+					       struct perf_sample *sample,
+					       struct perf_evsel *evsel)
+{
+	u64 kernel_start = machine__kernel_start(machine);
+	struct call_path *current = &dbe->cpr->call_path;
+	enum chain_order saved_order = callchain_param.order;
+	int err;
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		return NULL;
+
+	/*
+	 * Since the call path tree must be built starting with the root, we
+	 * must use ORDER_CALL for call chain resolution, in order to process
+	 * the callchain starting with the root node and ending with the leaf.
+	 */
+	callchain_param.order = ORDER_CALLER;
+	err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+					sample, NULL, NULL, PERF_MAX_STACK_DEPTH);
+	if (err) {
+		callchain_param.order = saved_order;
+		return NULL;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+	while (1) {
+		struct callchain_cursor_node *node;
+		struct addr_location al;
+		u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+		memset(&al, 0, sizeof(al));
+
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+		/*
+		 * Handle export of symbol and dso for this node by
+		 * constructing an addr_location struct and then passing it to
+		 * db_ids_from_al() to perform the export.
+		 */
+		al.sym = node->sym;
+		al.map = node->map;
+		al.machine = machine;
+		al.addr = node->ip;
+
+		if (al.map && !al.sym)
+			al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION,
+						  al.addr);
+
+		db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+		/* add node to the call path tree if it doesn't exist */
+		current = call_path__findnew(dbe->cpr, current,
+					     al.sym, node->ip,
+					     kernel_start);
+
+		callchain_cursor_advance(&callchain_cursor);
+	}
+
+	/* Reset the callchain order to its prior value. */
+	callchain_param.order = saved_order;
+
+	if (current == &dbe->cpr->call_path) {
+		/* Bail because the callchain was empty. */
+		return NULL;
+	}
+
+	return current;
+}
+
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+			   const char *name)
+{
+	if (dbe->export_branch_type)
+		return dbe->export_branch_type(dbe, branch_type, name);
+
+	return 0;
+}
+
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct perf_evsel *evsel,
+		      struct addr_location *al)
+{
+	struct thread* thread = al->thread;
+	struct export_sample es = {
+		.event = event,
+		.sample = sample,
+		.evsel = evsel,
+		.al = al,
+	};
+	struct thread *main_thread;
+	struct comm *comm = NULL;
+	int err;
+
+	err = db_export__evsel(dbe, evsel);
+	if (err)
+		return err;
+
+	err = db_export__machine(dbe, al->machine);
+	if (err)
+		return err;
+
+	main_thread = thread__main_thread(al->machine, thread);
+	if (main_thread)
+		comm = machine__thread_exec_comm(al->machine, main_thread);
+
+	err = db_export__thread(dbe, thread, al->machine, comm);
+	if (err)
+		goto out_put;
+
+	if (comm) {
+		err = db_export__comm(dbe, comm, main_thread);
+		if (err)
+			goto out_put;
+		es.comm_db_id = comm->db_id;
+	}
+
+	es.db_id = ++dbe->sample_last_db_id;
+
+	err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
+	if (err)
+		goto out_put;
+
+	if (dbe->cpr) {
+		struct call_path *cp = call_path_from_sample(dbe, al->machine,
+							     thread, sample,
+							     evsel);
+		if (cp) {
+			db_export__call_path(dbe, cp);
+			es.call_path_id = cp->db_id;
+		}
+	}
+
+	if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
+	    sample_addr_correlates_sym(&evsel->attr)) {
+		struct addr_location addr_al;
+
+		thread__resolve(thread, &addr_al, sample);
+		err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
+				     &es.addr_sym_db_id, &es.addr_offset);
+		if (err)
+			goto out_put;
+		if (dbe->crp) {
+			err = thread_stack__process(thread, comm, sample, al,
+						    &addr_al, es.db_id,
+						    dbe->crp);
+			if (err)
+				goto out_put;
+		}
+	}
+
+	if (dbe->export_sample)
+		err = dbe->export_sample(dbe, &es);
+
+out_put:
+	thread__put(main_thread);
+	return err;
+}
+
+static struct {
+	u32 branch_type;
+	const char *name;
+} branch_types[] = {
+	{0, "no branch"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "conditional jump"},
+	{PERF_IP_FLAG_BRANCH, "unconditional jump"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT,
+	 "software interrupt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT,
+	 "return from interrupt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET,
+	 "system call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET,
+	 "return from system call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "asynchronous branch"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+	 PERF_IP_FLAG_INTERRUPT, "hardware interrupt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"},
+	{0, NULL}
+};
+
+int db_export__branch_types(struct db_export *dbe)
+{
+	int i, err = 0;
+
+	for (i = 0; branch_types[i].name ; i++) {
+		err = db_export__branch_type(dbe, branch_types[i].branch_type,
+					     branch_types[i].name);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp)
+{
+	int err;
+
+	if (cp->db_id)
+		return 0;
+
+	if (cp->parent) {
+		err = db_export__call_path(dbe, cp->parent);
+		if (err)
+			return err;
+	}
+
+	cp->db_id = ++dbe->call_path_last_db_id;
+
+	if (dbe->export_call_path)
+		return dbe->export_call_path(dbe, cp);
+
+	return 0;
+}
+
+int db_export__call_return(struct db_export *dbe, struct call_return *cr)
+{
+	int err;
+
+	if (cr->db_id)
+		return 0;
+
+	err = db_export__call_path(dbe, cr->cp);
+	if (err)
+		return err;
+
+	cr->db_id = ++dbe->call_return_last_db_id;
+
+	if (dbe->export_call_return)
+		return dbe->export_call_return(dbe, cr);
+
+	return 0;
+}
diff --git a/util/db-export.h b/util/db-export.h
new file mode 100644
index 0000000..67bc6b8
--- /dev/null
+++ b/util/db-export.h
@@ -0,0 +1,109 @@
+/*
+ * db-export.h: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DB_EXPORT_H
+#define __PERF_DB_EXPORT_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct perf_evsel;
+struct machine;
+struct thread;
+struct comm;
+struct dso;
+struct perf_sample;
+struct addr_location;
+struct call_return_processor;
+struct call_path_root;
+struct call_path;
+struct call_return;
+
+struct export_sample {
+	union perf_event	*event;
+	struct perf_sample	*sample;
+	struct perf_evsel	*evsel;
+	struct addr_location	*al;
+	u64			db_id;
+	u64			comm_db_id;
+	u64			dso_db_id;
+	u64			sym_db_id;
+	u64			offset; /* ip offset from symbol start */
+	u64			addr_dso_db_id;
+	u64			addr_sym_db_id;
+	u64			addr_offset; /* addr offset from symbol start */
+	u64			call_path_id;
+};
+
+struct db_export {
+	int (*export_evsel)(struct db_export *dbe, struct perf_evsel *evsel);
+	int (*export_machine)(struct db_export *dbe, struct machine *machine);
+	int (*export_thread)(struct db_export *dbe, struct thread *thread,
+			     u64 main_thread_db_id, struct machine *machine);
+	int (*export_comm)(struct db_export *dbe, struct comm *comm);
+	int (*export_comm_thread)(struct db_export *dbe, u64 db_id,
+				  struct comm *comm, struct thread *thread);
+	int (*export_dso)(struct db_export *dbe, struct dso *dso,
+			  struct machine *machine);
+	int (*export_symbol)(struct db_export *dbe, struct symbol *sym,
+			     struct dso *dso);
+	int (*export_branch_type)(struct db_export *dbe, u32 branch_type,
+				  const char *name);
+	int (*export_sample)(struct db_export *dbe, struct export_sample *es);
+	int (*export_call_path)(struct db_export *dbe, struct call_path *cp);
+	int (*export_call_return)(struct db_export *dbe,
+				  struct call_return *cr);
+	struct call_return_processor *crp;
+	struct call_path_root *cpr;
+	u64 evsel_last_db_id;
+	u64 machine_last_db_id;
+	u64 thread_last_db_id;
+	u64 comm_last_db_id;
+	u64 comm_thread_last_db_id;
+	u64 dso_last_db_id;
+	u64 symbol_last_db_id;
+	u64 sample_last_db_id;
+	u64 call_path_last_db_id;
+	u64 call_return_last_db_id;
+	struct list_head deferred;
+};
+
+int db_export__init(struct db_export *dbe);
+int db_export__flush(struct db_export *dbe);
+void db_export__exit(struct db_export *dbe);
+int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel);
+int db_export__machine(struct db_export *dbe, struct machine *machine);
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+		      struct machine *machine, struct comm *comm);
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+		    struct thread *main_thread);
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+			   struct thread *thread);
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+		   struct machine *machine);
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+		      struct dso *dso);
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+			   const char *name);
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct perf_evsel *evsel,
+		      struct addr_location *al);
+
+int db_export__branch_types(struct db_export *dbe);
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp);
+int db_export__call_return(struct db_export *dbe, struct call_return *cr);
+
+#endif
diff --git a/util/debug.c b/util/debug.c
new file mode 100644
index 0000000..3d64596
--- /dev/null
+++ b/util/debug.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/* For general debugging purposes */
+
+#include "../perf.h"
+
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/wait.h>
+#include <api/debug.h>
+#include <linux/time64.h>
+#ifdef HAVE_BACKTRACE_SUPPORT
+#include <execinfo.h>
+#endif
+#include "cache.h"
+#include "color.h"
+#include "event.h"
+#include "debug.h"
+#include "print_binary.h"
+#include "util.h"
+#include "target.h"
+
+#include "sane_ctype.h"
+
+int verbose;
+bool dump_trace = false, quiet = false;
+int debug_ordered_events;
+static int redirect_to_stderr;
+int debug_data_convert;
+
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+	int ret = 0;
+
+	if (var >= level) {
+		if (use_browser >= 1 && !redirect_to_stderr)
+			ui_helpline__vshow(fmt, args);
+		else
+			ret = vfprintf(stderr, fmt, args);
+	}
+
+	return ret;
+}
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = veprintf(level, var, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+static int veprintf_time(u64 t, const char *fmt, va_list args)
+{
+	int ret = 0;
+	u64 secs, usecs, nsecs = t;
+
+	secs   = nsecs / NSEC_PER_SEC;
+	nsecs -= secs  * NSEC_PER_SEC;
+	usecs  = nsecs / NSEC_PER_USEC;
+
+	ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
+		      secs, usecs);
+	ret += vfprintf(stderr, fmt, args);
+	return ret;
+}
+
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
+{
+	int ret = 0;
+	va_list args;
+
+	if (var >= level) {
+		va_start(args, fmt);
+		ret = veprintf_time(t, fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
+/*
+ * Overloading libtraceevent standard info print
+ * function, display with -v in perf.
+ */
+void pr_stat(const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	veprintf(1, verbose, fmt, args);
+	va_end(args);
+	eprintf(1, verbose, "\n");
+}
+
+int dump_printf(const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (dump_trace) {
+		va_start(args, fmt);
+		ret = vprintf(fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
+static int trace_event_printer(enum binary_printer_ops op,
+			       unsigned int val, void *extra, FILE *fp)
+{
+	const char *color = PERF_COLOR_BLUE;
+	union perf_event *event = (union perf_event *)extra;
+	unsigned char ch = (unsigned char)val;
+	int printed = 0;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printed += fprintf(fp, ".");
+		printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n",
+					 event->header.size);
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printed += fprintf(fp, ".");
+		break;
+	case BINARY_PRINT_ADDR:
+		printed += color_fprintf(fp, color, "  %04x: ", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		printed += color_fprintf(fp, color, " %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		printed += color_fprintf(fp, color, "   ");
+		break;
+	case BINARY_PRINT_SEP:
+		printed += color_fprintf(fp, color, "  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		printed += color_fprintf(fp, color, "%c",
+			      isprint(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		printed += color_fprintf(fp, color, " ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		printed += color_fprintf(fp, color, "\n");
+		break;
+	case BINARY_PRINT_DATA_END:
+		printed += fprintf(fp, "\n");
+		break;
+	default:
+		break;
+	}
+
+	return printed;
+}
+
+void trace_event(union perf_event *event)
+{
+	unsigned char *raw_event = (void *)event;
+
+	if (!dump_trace)
+		return;
+
+	print_binary(raw_event, event->header.size, 16,
+		     trace_event_printer, event);
+}
+
+static struct debug_variable {
+	const char *name;
+	int *ptr;
+} debug_variables[] = {
+	{ .name = "verbose",		.ptr = &verbose },
+	{ .name = "ordered-events",	.ptr = &debug_ordered_events},
+	{ .name = "stderr",		.ptr = &redirect_to_stderr},
+	{ .name = "data-convert",	.ptr = &debug_data_convert },
+	{ .name = NULL, }
+};
+
+int perf_debug_option(const char *str)
+{
+	struct debug_variable *var = &debug_variables[0];
+	char *vstr, *s = strdup(str);
+	int v = 1;
+
+	vstr = strchr(s, '=');
+	if (vstr)
+		*vstr++ = 0;
+
+	while (var->name) {
+		if (!strcmp(s, var->name))
+			break;
+		var++;
+	}
+
+	if (!var->name) {
+		pr_err("Unknown debug variable name '%s'\n", s);
+		free(s);
+		return -1;
+	}
+
+	if (vstr) {
+		v = atoi(vstr);
+		/*
+		 * Allow only values in range (0, 10),
+		 * otherwise set 0.
+		 */
+		v = (v < 0) || (v > 10) ? 0 : v;
+	}
+
+	if (quiet)
+		v = -1;
+
+	*var->ptr = v;
+	free(s);
+	return 0;
+}
+
+int perf_quiet_option(void)
+{
+	struct debug_variable *var = &debug_variables[0];
+
+	/* disable all debug messages */
+	while (var->name) {
+		*var->ptr = -1;
+		var++;
+	}
+
+	return 0;
+}
+
+#define DEBUG_WRAPPER(__n, __l)				\
+static int pr_ ## __n ## _wrapper(const char *fmt, ...)	\
+{							\
+	va_list args;					\
+	int ret;					\
+							\
+	va_start(args, fmt);				\
+	ret = veprintf(__l, verbose, fmt, args);	\
+	va_end(args);					\
+	return ret;					\
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+	libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
+
+/* Obtain a backtrace and print it to stdout. */
+#ifdef HAVE_BACKTRACE_SUPPORT
+void dump_stack(void)
+{
+	void *array[16];
+	size_t size = backtrace(array, ARRAY_SIZE(array));
+	char **strings = backtrace_symbols(array, size);
+	size_t i;
+
+	printf("Obtained %zd stack frames.\n", size);
+
+	for (i = 0; i < size; i++)
+		printf("%s\n", strings[i]);
+
+	free(strings);
+}
+#else
+void dump_stack(void) {}
+#endif
+
+void sighandler_dump_stack(int sig)
+{
+	psignal(sig, "perf");
+	dump_stack();
+	signal(sig, SIG_DFL);
+	raise(sig);
+}
diff --git a/util/debug.h b/util/debug.h
new file mode 100644
index 0000000..77445df
--- /dev/null
+++ b/util/debug.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* For debugging general purposes */
+#ifndef __PERF_DEBUG_H
+#define __PERF_DEBUG_H
+
+#include <stdbool.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include "event.h"
+#include "../ui/helpline.h"
+#include "../ui/progress.h"
+#include "../ui/util.h"
+
+extern int verbose;
+extern bool quiet, dump_trace;
+extern int debug_ordered_events;
+extern int debug_data_convert;
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_err(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug(fmt, ...) \
+	eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+	eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time_N(n, var, t, fmt, ...) \
+	eprintf_time(n, var, t, fmt, ##__VA_ARGS__)
+
+#define pr_oe_time(t, fmt, ...)  pr_time_N(1, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_oe_time2(t, fmt, ...) pr_time_N(2, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define STRERR_BUFSIZE	128	/* For the buffer size of str_error_r */
+
+int dump_printf(const char *fmt, ...) __printf(1, 2);
+void trace_event(union perf_event *event);
+
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
+
+void pr_stat(const char *fmt, ...);
+
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
+int veprintf(int level, int var, const char *fmt, va_list args);
+
+int perf_debug_option(const char *str);
+void perf_debug_setup(void);
+int perf_quiet_option(void);
+
+void dump_stack(void);
+void sighandler_dump_stack(int sig);
+
+#endif	/* __PERF_DEBUG_H */
diff --git a/util/demangle-java.c b/util/demangle-java.c
new file mode 100644
index 0000000..e4c4867
--- /dev/null
+++ b/util/demangle-java.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+#include "sane_ctype.h"
+
+enum {
+	MODE_PREFIX = 0,
+	MODE_CLASS  = 1,
+	MODE_FUNC   = 2,
+	MODE_TYPE   = 3,
+	MODE_CTYPE  = 3, /* class arg */
+};
+
+#define BASE_ENT(c, n)	[c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+	BASE_ENT('B', "byte" ),
+	BASE_ENT('C', "char" ),
+	BASE_ENT('D', "double" ),
+	BASE_ENT('F', "float" ),
+	BASE_ENT('I', "int" ),
+	BASE_ENT('J', "long" ),
+	BASE_ENT('S', "short" ),
+	BASE_ENT('Z', "bool" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ *  Return:
+ *	success: buf
+ *	error  : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+	int rlen = 0;
+	int array = 0;
+	int narg = 0;
+	const char *q;
+
+	if (!end)
+		end = str + strlen(str);
+
+	for (q = str; q != end; q++) {
+
+		if (rlen == (maxlen - 1))
+			break;
+
+		switch (*q) {
+		case 'L':
+			if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
+				if (mode == MODE_CTYPE) {
+					if (narg)
+						rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+					narg++;
+				}
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
+				if (mode == MODE_PREFIX)
+					mode = MODE_CLASS;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'F':
+		case 'I':
+		case 'J':
+		case 'S':
+		case 'Z':
+			if (mode == MODE_TYPE) {
+				if (narg)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+				narg++;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'V':
+			if (mode == MODE_TYPE) {
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case '[':
+			if (mode != MODE_TYPE)
+				goto error;
+			array++;
+			break;
+		case '(':
+			if (mode != MODE_FUNC)
+				goto error;
+			buf[rlen++] = *q;
+			mode = MODE_TYPE;
+			break;
+		case ')':
+			if (mode != MODE_TYPE)
+				goto error;
+			buf[rlen++] = *q;
+			narg = 0;
+			break;
+		case ';':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			/* safe because at least one other char to process */
+			if (isalpha(*(q + 1)))
+				rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			if (mode == MODE_CLASS)
+				mode = MODE_FUNC;
+			else if (mode == MODE_CTYPE)
+				mode = MODE_TYPE;
+			break;
+		case '/':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			break;
+		default :
+			buf[rlen++] = *q;
+		}
+	}
+	buf[rlen] = '\0';
+	return buf;
+error:
+	return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ * 	str: string to parse. String is not modified
+ *    flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ *	if input can be demangled, then a newly allocated string is returned.
+ *	if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+	char *buf, *ptr;
+	char *p;
+	size_t len, l1 = 0;
+
+	if (!str)
+		return NULL;
+
+	/* find start of retunr type */
+	p = strrchr(str, ')');
+	if (!p)
+		return NULL;
+
+	/*
+	 * expansion factor estimated to 3x
+	 */
+	len = strlen(str) * 3 + 1;
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	buf[0] = '\0';
+	if (!(flags & JAVA_DEMANGLE_NORET)) {
+		/*
+		 * get return type first
+		 */
+		ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+		if (!ptr)
+			goto error;
+
+		/* add space between return type and function prototype */
+		l1 = strlen(buf);
+		buf[l1++] = ' ';
+	}
+
+	/* process function up to return type */
+	ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+	if (!ptr)
+		goto error;
+
+	return buf;
+error:
+	free(buf);
+	return NULL;
+}
diff --git a/util/demangle-java.h b/util/demangle-java.h
new file mode 100644
index 0000000..f936c8e
--- /dev/null
+++ b/util/demangle-java.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET	0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
diff --git a/util/demangle-rust.c b/util/demangle-rust.c
new file mode 100644
index 0000000..423afbb
--- /dev/null
+++ b/util/demangle-rust.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+
+#include "demangle-rust.h"
+
+/*
+ * Mangled Rust symbols look like this:
+ *
+ *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
+ *
+ * The original symbol is:
+ *
+ *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
+ *
+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
+ * with "h". Rust does not have a global namespace between crates, an illusion
+ * which Rust maintains by using the hash to distinguish things that would
+ * otherwise have the same symbol.
+ *
+ * Any path component not starting with a XID_Start character is prefixed with
+ * "_".
+ *
+ * The following escape sequences are used:
+ *
+ *     ","  =>  $C$
+ *     "@"  =>  $SP$
+ *     "*"  =>  $BP$
+ *     "&"  =>  $RF$
+ *     "<"  =>  $LT$
+ *     ">"  =>  $GT$
+ *     "("  =>  $LP$
+ *     ")"  =>  $RP$
+ *     " "  =>  $u20$
+ *     "'"  =>  $u27$
+ *     "["  =>  $u5b$
+ *     "]"  =>  $u5d$
+ *     "~"  =>  $u7e$
+ *
+ * A double ".." means "::" and a single "." means "-".
+ *
+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
+ */
+
+static const char *hash_prefix = "::h";
+static const size_t hash_prefix_len = 3;
+static const size_t hash_len = 16;
+
+static bool is_prefixed_hash(const char *start);
+static bool looks_like_rust(const char *sym, size_t len);
+static bool unescape(const char **in, char **out, const char *seq, char value);
+
+/*
+ * INPUT:
+ *     sym: symbol that has been through BFD-demangling
+ *
+ * This function looks for the following indicators:
+ *
+ *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
+ *
+ *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
+ *     hex digits. This is true of 99.9998% of hashes so once in your life you
+ *     may see a false negative. The point is to notice path components that
+ *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
+ *     this case a false positive (non-Rust symbol has an important path
+ *     component removed because it looks like a Rust hash) is worse than a
+ *     false negative (the rare Rust symbol is not demangled) so this sets the
+ *     balance in favor of false negatives.
+ *
+ *  3. There must be no characters other than a-zA-Z0-9 and _.:$
+ *
+ *  4. There must be no unrecognized $-sign sequences.
+ *
+ *  5. There must be no sequence of three or more dots in a row ("...").
+ */
+bool
+rust_is_mangled(const char *sym)
+{
+	size_t len, len_without_hash;
+
+	if (!sym)
+		return false;
+
+	len = strlen(sym);
+	if (len <= hash_prefix_len + hash_len)
+		/* Not long enough to contain "::h" + hash + something else */
+		return false;
+
+	len_without_hash = len - (hash_prefix_len + hash_len);
+	if (!is_prefixed_hash(sym + len_without_hash))
+		return false;
+
+	return looks_like_rust(sym, len_without_hash);
+}
+
+/*
+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
+ */
+static bool is_prefixed_hash(const char *str)
+{
+	const char *end;
+	bool seen[16];
+	size_t i;
+	int count;
+
+	if (strncmp(str, hash_prefix, hash_prefix_len))
+		return false;
+	str += hash_prefix_len;
+
+	memset(seen, false, sizeof(seen));
+	for (end = str + hash_len; str < end; str++)
+		if (*str >= '0' && *str <= '9')
+			seen[*str - '0'] = true;
+		else if (*str >= 'a' && *str <= 'f')
+			seen[*str - 'a' + 10] = true;
+		else
+			return false;
+
+	/* Count how many distinct digits seen */
+	count = 0;
+	for (i = 0; i < 16; i++)
+		if (seen[i])
+			count++;
+
+	return count >= 5 && count <= 15;
+}
+
+static bool looks_like_rust(const char *str, size_t len)
+{
+	const char *end = str + len;
+
+	while (str < end)
+		switch (*str) {
+		case '$':
+			if (!strncmp(str, "$C$", 3))
+				str += 3;
+			else if (!strncmp(str, "$SP$", 4)
+					|| !strncmp(str, "$BP$", 4)
+					|| !strncmp(str, "$RF$", 4)
+					|| !strncmp(str, "$LT$", 4)
+					|| !strncmp(str, "$GT$", 4)
+					|| !strncmp(str, "$LP$", 4)
+					|| !strncmp(str, "$RP$", 4))
+				str += 4;
+			else if (!strncmp(str, "$u20$", 5)
+					|| !strncmp(str, "$u27$", 5)
+					|| !strncmp(str, "$u5b$", 5)
+					|| !strncmp(str, "$u5d$", 5)
+					|| !strncmp(str, "$u7e$", 5))
+				str += 5;
+			else
+				return false;
+			break;
+		case '.':
+			/* Do not allow three or more consecutive dots */
+			if (!strncmp(str, "...", 3))
+				return false;
+			/* Fall through */
+		case 'a' ... 'z':
+		case 'A' ... 'Z':
+		case '0' ... '9':
+		case '_':
+		case ':':
+			str++;
+			break;
+		default:
+			return false;
+		}
+
+	return true;
+}
+
+/*
+ * INPUT:
+ *     sym: symbol for which rust_is_mangled(sym) returns true
+ *
+ * The input is demangled in-place because the mangled name is always longer
+ * than the demangled one.
+ */
+void
+rust_demangle_sym(char *sym)
+{
+	const char *in;
+	char *out;
+	const char *end;
+
+	if (!sym)
+		return;
+
+	in = sym;
+	out = sym;
+	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
+
+	while (in < end)
+		switch (*in) {
+		case '$':
+			if (!(unescape(&in, &out, "$C$", ',')
+					|| unescape(&in, &out, "$SP$", '@')
+					|| unescape(&in, &out, "$BP$", '*')
+					|| unescape(&in, &out, "$RF$", '&')
+					|| unescape(&in, &out, "$LT$", '<')
+					|| unescape(&in, &out, "$GT$", '>')
+					|| unescape(&in, &out, "$LP$", '(')
+					|| unescape(&in, &out, "$RP$", ')')
+					|| unescape(&in, &out, "$u20$", ' ')
+					|| unescape(&in, &out, "$u27$", '\'')
+					|| unescape(&in, &out, "$u5b$", '[')
+					|| unescape(&in, &out, "$u5d$", ']')
+					|| unescape(&in, &out, "$u7e$", '~'))) {
+				pr_err("demangle-rust: unexpected escape sequence");
+				goto done;
+			}
+			break;
+		case '_':
+			/*
+			 * If this is the start of a path component and the next
+			 * character is an escape sequence, ignore the
+			 * underscore. The mangler inserts an underscore to make
+			 * sure the path component begins with a XID_Start
+			 * character.
+			 */
+			if ((in == sym || in[-1] == ':') && in[1] == '$')
+				in++;
+			else
+				*out++ = *in++;
+			break;
+		case '.':
+			if (in[1] == '.') {
+				/* ".." becomes "::" */
+				*out++ = ':';
+				*out++ = ':';
+				in += 2;
+			} else {
+				/* "." becomes "-" */
+				*out++ = '-';
+				in++;
+			}
+			break;
+		case 'a' ... 'z':
+		case 'A' ... 'Z':
+		case '0' ... '9':
+		case ':':
+			*out++ = *in++;
+			break;
+		default:
+			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
+				*in);
+			goto done;
+		}
+
+done:
+	*out = '\0';
+}
+
+static bool unescape(const char **in, char **out, const char *seq, char value)
+{
+	size_t len = strlen(seq);
+
+	if (strncmp(*in, seq, len))
+		return false;
+
+	**out = value;
+
+	*in += len;
+	*out += 1;
+
+	return true;
+}
diff --git a/util/demangle-rust.h b/util/demangle-rust.h
new file mode 100644
index 0000000..2fca618
--- /dev/null
+++ b/util/demangle-rust.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_RUST
+#define __PERF_DEMANGLE_RUST 1
+
+bool rust_is_mangled(const char *str);
+void rust_demangle_sym(char *str);
+
+#endif /* __PERF_DEMANGLE_RUST */
diff --git a/util/drv_configs.c b/util/drv_configs.c
new file mode 100644
index 0000000..eec7542
--- /dev/null
+++ b/util/drv_configs.c
@@ -0,0 +1,78 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+#include <errno.h>
+
+static int
+perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
+			      struct perf_evsel_config_term **err_term)
+{
+	bool found = false;
+	int err = 0;
+	struct perf_evsel_config_term *term;
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->attr.type) {
+			found = true;
+			break;
+		}
+
+	list_for_each_entry(term, &evsel->config_terms, list) {
+		if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+			continue;
+
+		/*
+		 * We have a configuration term, report an error if we
+		 * can't find the PMU or if the PMU driver doesn't support
+		 * cmd line driver configuration.
+		 */
+		if (!found || !pmu->set_drv_config) {
+			err = -EINVAL;
+			*err_term = term;
+			break;
+		}
+
+		err = pmu->set_drv_config(term);
+		if (err) {
+			*err_term = term;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+				   struct perf_evsel **err_evsel,
+				   struct perf_evsel_config_term **err_term)
+{
+	struct perf_evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		err = perf_evsel__apply_drv_configs(evsel, err_term);
+		if (err) {
+			*err_evsel = evsel;
+			break;
+		}
+	}
+
+	return err;
+}
diff --git a/util/drv_configs.h b/util/drv_configs.h
new file mode 100644
index 0000000..32bc9ba
--- /dev/null
+++ b/util/drv_configs.h
@@ -0,0 +1,26 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DRV_CONFIGS_H
+#define __PERF_DRV_CONFIGS_H
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+				   struct perf_evsel **err_evsel,
+				   struct perf_evsel_config_term **term);
+#endif
diff --git a/util/dso.c b/util/dso.c
new file mode 100644
index 0000000..36ef45b
--- /dev/null
+++ b/util/dso.c
@@ -0,0 +1,1534 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/bug.h>
+#include <linux/kernel.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include "compress.h"
+#include "path.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dso.h"
+#include "machine.h"
+#include "auxtrace.h"
+#include "util.h"
+#include "debug.h"
+#include "string2.h"
+#include "vdso.h"
+
+static const char * const debuglink_paths[] = {
+	"%.0s%s",
+	"%s/%s",
+	"%s/.debug/%s",
+	"/usr/lib/debug%s/%s"
+};
+
+char dso__symtab_origin(const struct dso *dso)
+{
+	static const char origin[] = {
+		[DSO_BINARY_TYPE__KALLSYMS]			= 'k',
+		[DSO_BINARY_TYPE__VMLINUX]			= 'v',
+		[DSO_BINARY_TYPE__JAVA_JIT]			= 'j',
+		[DSO_BINARY_TYPE__DEBUGLINK]			= 'l',
+		[DSO_BINARY_TYPE__BUILD_ID_CACHE]		= 'B',
+		[DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO]	= 'D',
+		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]		= 'f',
+		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]		= 'u',
+		[DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO]	= 'o',
+		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]		= 'b',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]		= 'd',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE]		= 'K',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP]	= 'm',
+		[DSO_BINARY_TYPE__GUEST_KALLSYMS]		= 'g',
+		[DSO_BINARY_TYPE__GUEST_KMODULE]		= 'G',
+		[DSO_BINARY_TYPE__GUEST_KMODULE_COMP]		= 'M',
+		[DSO_BINARY_TYPE__GUEST_VMLINUX]		= 'V',
+	};
+
+	if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
+		return '!';
+	return origin[dso->symtab_type];
+}
+
+int dso__read_binary_type_filename(const struct dso *dso,
+				   enum dso_binary_type type,
+				   char *root_dir, char *filename, size_t size)
+{
+	char build_id_hex[SBUILD_ID_SIZE];
+	int ret = 0;
+	size_t len;
+
+	switch (type) {
+	case DSO_BINARY_TYPE__DEBUGLINK:
+	{
+		const char *last_slash;
+		char dso_dir[PATH_MAX];
+		char symfile[PATH_MAX];
+		unsigned int i;
+
+		len = __symbol__join_symfs(filename, size, dso->long_name);
+		last_slash = filename + len;
+		while (last_slash != filename && *last_slash != '/')
+			last_slash--;
+
+		strncpy(dso_dir, filename, last_slash - filename);
+		dso_dir[last_slash-filename] = '\0';
+
+		if (!is_regular_file(filename)) {
+			ret = -1;
+			break;
+		}
+
+		ret = filename__read_debuglink(filename, symfile, PATH_MAX);
+		if (ret)
+			break;
+
+		/* Check predefined locations where debug file might reside */
+		ret = -1;
+		for (i = 0; i < ARRAY_SIZE(debuglink_paths); i++) {
+			snprintf(filename, size,
+					debuglink_paths[i], dso_dir, symfile);
+			if (is_regular_file(filename)) {
+				ret = 0;
+				break;
+			}
+		}
+
+		break;
+	}
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+		if (dso__build_id_filename(dso, filename, size, false) == NULL)
+			ret = -1;
+		break;
+
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+		if (dso__build_id_filename(dso, filename, size, true) == NULL)
+			ret = -1;
+		break;
+
+	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+		snprintf(filename + len, size - len, "%s.debug", dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+		snprintf(filename + len, size - len, "%s", dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+	{
+		const char *last_slash;
+		size_t dir_size;
+
+		last_slash = dso->long_name + dso->long_name_len;
+		while (last_slash != dso->long_name && *last_slash != '/')
+			last_slash--;
+
+		len = __symbol__join_symfs(filename, size, "");
+		dir_size = last_slash - dso->long_name + 2;
+		if (dir_size > (size - len)) {
+			ret = -1;
+			break;
+		}
+		len += scnprintf(filename + len, dir_size, "%s",  dso->long_name);
+		len += scnprintf(filename + len , size - len, ".debug%s",
+								last_slash);
+		break;
+	}
+
+	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+		if (!dso->has_build_id) {
+			ret = -1;
+			break;
+		}
+
+		build_id__sprintf(dso->build_id,
+				  sizeof(dso->build_id),
+				  build_id_hex);
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
+		snprintf(filename + len, size - len, "%.2s/%s.debug",
+			 build_id_hex, build_id_hex + 2);
+		break;
+
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+		__symbol__join_symfs(filename, size, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__GUEST_KMODULE:
+	case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+		path__join3(filename, size, symbol_conf.symfs,
+			    root_dir, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+		__symbol__join_symfs(filename, size, dso->long_name);
+		break;
+
+	case DSO_BINARY_TYPE__KCORE:
+	case DSO_BINARY_TYPE__GUEST_KCORE:
+		snprintf(filename, size, "%s", dso->long_name);
+		break;
+
+	default:
+	case DSO_BINARY_TYPE__KALLSYMS:
+	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+	case DSO_BINARY_TYPE__JAVA_JIT:
+	case DSO_BINARY_TYPE__NOT_FOUND:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct {
+	const char *fmt;
+	int (*decompress)(const char *input, int output);
+} compressions[] = {
+#ifdef HAVE_ZLIB_SUPPORT
+	{ "gz", gzip_decompress_to_file },
+#endif
+#ifdef HAVE_LZMA_SUPPORT
+	{ "xz", lzma_decompress_to_file },
+#endif
+	{ NULL, NULL },
+};
+
+bool is_supported_compression(const char *ext)
+{
+	unsigned i;
+
+	for (i = 0; compressions[i].fmt; i++) {
+		if (!strcmp(ext, compressions[i].fmt))
+			return true;
+	}
+	return false;
+}
+
+bool is_kernel_module(const char *pathname, int cpumode)
+{
+	struct kmod_path m;
+	int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	WARN_ONCE(mode != cpumode,
+		  "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+		  cpumode);
+
+	switch (mode) {
+	case PERF_RECORD_MISC_USER:
+	case PERF_RECORD_MISC_HYPERVISOR:
+	case PERF_RECORD_MISC_GUEST_USER:
+		return false;
+	/* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+	default:
+		if (kmod_path__parse(&m, pathname)) {
+			pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+					pathname);
+			return true;
+		}
+	}
+
+	return m.kmod;
+}
+
+bool decompress_to_file(const char *ext, const char *filename, int output_fd)
+{
+	unsigned i;
+
+	for (i = 0; compressions[i].fmt; i++) {
+		if (!strcmp(ext, compressions[i].fmt))
+			return !compressions[i].decompress(filename,
+							   output_fd);
+	}
+	return false;
+}
+
+bool dso__needs_decompress(struct dso *dso)
+{
+	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+}
+
+static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf)
+{
+	int fd = -1;
+	struct kmod_path m;
+
+	if (!dso__needs_decompress(dso))
+		return -1;
+
+	if (kmod_path__parse_ext(&m, dso->long_name))
+		return -1;
+
+	if (!m.comp)
+		goto out;
+
+	fd = mkstemp(tmpbuf);
+	if (fd < 0) {
+		dso->load_errno = errno;
+		goto out;
+	}
+
+	if (!decompress_to_file(m.ext, name, fd)) {
+		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+		close(fd);
+		fd = -1;
+	}
+
+out:
+	free(m.ext);
+	return fd;
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	unlink(tmpbuf);
+	return fd;
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	if (fd < 0) {
+		unlink(tmpbuf);
+		return -1;
+	}
+
+	strncpy(pathname, tmpbuf, len);
+	close(fd);
+	return 0;
+}
+
+/*
+ * Parses kernel module specified in @path and updates
+ * @m argument like:
+ *
+ *    @comp - true if @path contains supported compression suffix,
+ *            false otherwise
+ *    @kmod - true if @path contains '.ko' suffix in right position,
+ *            false otherwise
+ *    @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name
+ *            of the kernel module without suffixes, otherwise strudup-ed
+ *            base name of @path
+ *    @ext  - if (@alloc_ext && @comp) is true, it contains strdup-ed string
+ *            the compression suffix
+ *
+ * Returns 0 if there's no strdup error, -ENOMEM otherwise.
+ */
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+		       bool alloc_name, bool alloc_ext)
+{
+	const char *name = strrchr(path, '/');
+	const char *ext  = strrchr(path, '.');
+	bool is_simple_name = false;
+
+	memset(m, 0x0, sizeof(*m));
+	name = name ? name + 1 : path;
+
+	/*
+	 * '.' is also a valid character for module name. For example:
+	 * [aaa.bbb] is a valid module name. '[' should have higher
+	 * priority than '.ko' suffix.
+	 *
+	 * The kernel names are from machine__mmap_name. Such
+	 * name should belong to kernel itself, not kernel module.
+	 */
+	if (name[0] == '[') {
+		is_simple_name = true;
+		if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+		    (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+		    (strncmp(name, "[vdso]", 6) == 0) ||
+		    (strncmp(name, "[vsyscall]", 10) == 0)) {
+			m->kmod = false;
+
+		} else
+			m->kmod = true;
+	}
+
+	/* No extension, just return name. */
+	if ((ext == NULL) || is_simple_name) {
+		if (alloc_name) {
+			m->name = strdup(name);
+			return m->name ? 0 : -ENOMEM;
+		}
+		return 0;
+	}
+
+	if (is_supported_compression(ext + 1)) {
+		m->comp = true;
+		ext -= 3;
+	}
+
+	/* Check .ko extension only if there's enough name left. */
+	if (ext > name)
+		m->kmod = !strncmp(ext, ".ko", 3);
+
+	if (alloc_name) {
+		if (m->kmod) {
+			if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1)
+				return -ENOMEM;
+		} else {
+			if (asprintf(&m->name, "%s", name) == -1)
+				return -ENOMEM;
+		}
+
+		strxfrchar(m->name, '-', '_');
+	}
+
+	if (alloc_ext && m->comp) {
+		m->ext = strdup(ext + 4);
+		if (!m->ext) {
+			free((void *) m->name);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine)
+{
+	if (machine__is_host(machine))
+		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+	/* _KMODULE_COMP should be next to _KMODULE */
+	if (m->kmod && m->comp)
+		dso->symtab_type++;
+
+	dso__set_short_name(dso, strdup(m->name), true);
+}
+
+/*
+ * Global list of open DSOs and the counter.
+ */
+static LIST_HEAD(dso__data_open);
+static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void dso__list_add(struct dso *dso)
+{
+	list_add_tail(&dso->data.open_entry, &dso__data_open);
+	dso__data_open_cnt++;
+}
+
+static void dso__list_del(struct dso *dso)
+{
+	list_del(&dso->data.open_entry);
+	WARN_ONCE(dso__data_open_cnt <= 0,
+		  "DSO data fd counter out of bounds.");
+	dso__data_open_cnt--;
+}
+
+static void close_first_dso(void);
+
+static int do_open(char *name)
+{
+	int fd;
+	char sbuf[STRERR_BUFSIZE];
+
+	do {
+		fd = open(name, O_RDONLY|O_CLOEXEC);
+		if (fd >= 0)
+			return fd;
+
+		pr_debug("dso open failed: %s\n",
+			 str_error_r(errno, sbuf, sizeof(sbuf)));
+		if (!dso__data_open_cnt || errno != EMFILE)
+			break;
+
+		close_first_dso();
+	} while (1);
+
+	return -1;
+}
+
+static int __open_dso(struct dso *dso, struct machine *machine)
+{
+	int fd = -EINVAL;
+	char *root_dir = (char *)"";
+	char *name = malloc(PATH_MAX);
+
+	if (!name)
+		return -ENOMEM;
+
+	if (machine)
+		root_dir = machine->root_dir;
+
+	if (dso__read_binary_type_filename(dso, dso->binary_type,
+					    root_dir, name, PATH_MAX))
+		goto out;
+
+	if (!is_regular_file(name))
+		goto out;
+
+	if (dso__needs_decompress(dso)) {
+		char newpath[KMOD_DECOMP_LEN];
+		size_t len = sizeof(newpath);
+
+		if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+			fd = -dso->load_errno;
+			goto out;
+		}
+
+		strcpy(name, newpath);
+	}
+
+	fd = do_open(name);
+
+	if (dso__needs_decompress(dso))
+		unlink(name);
+
+out:
+	free(name);
+	return fd;
+}
+
+static void check_data_close(void);
+
+/**
+ * dso_close - Open DSO data file
+ * @dso: dso object
+ *
+ * Open @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static int open_dso(struct dso *dso, struct machine *machine)
+{
+	int fd;
+	struct nscookie nsc;
+
+	if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+		nsinfo__mountns_enter(dso->nsinfo, &nsc);
+	fd = __open_dso(dso, machine);
+	if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+		nsinfo__mountns_exit(&nsc);
+
+	if (fd >= 0) {
+		dso__list_add(dso);
+		/*
+		 * Check if we crossed the allowed number
+		 * of opened DSOs and close one if needed.
+		 */
+		check_data_close();
+	}
+
+	return fd;
+}
+
+static void close_data_fd(struct dso *dso)
+{
+	if (dso->data.fd >= 0) {
+		close(dso->data.fd);
+		dso->data.fd = -1;
+		dso->data.file_size = 0;
+		dso__list_del(dso);
+	}
+}
+
+/**
+ * dso_close - Close DSO data file
+ * @dso: dso object
+ *
+ * Close @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static void close_dso(struct dso *dso)
+{
+	close_data_fd(dso);
+}
+
+static void close_first_dso(void)
+{
+	struct dso *dso;
+
+	dso = list_first_entry(&dso__data_open, struct dso, data.open_entry);
+	close_dso(dso);
+}
+
+static rlim_t get_fd_limit(void)
+{
+	struct rlimit l;
+	rlim_t limit = 0;
+
+	/* Allow half of the current open fd limit. */
+	if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+		if (l.rlim_cur == RLIM_INFINITY)
+			limit = l.rlim_cur;
+		else
+			limit = l.rlim_cur / 2;
+	} else {
+		pr_err("failed to get fd limit\n");
+		limit = 1;
+	}
+
+	return limit;
+}
+
+static rlim_t fd_limit;
+
+/*
+ * Used only by tests/dso-data.c to reset the environment
+ * for tests. I dont expect we should change this during
+ * standard runtime.
+ */
+void reset_fd_limit(void)
+{
+	fd_limit = 0;
+}
+
+static bool may_cache_fd(void)
+{
+	if (!fd_limit)
+		fd_limit = get_fd_limit();
+
+	if (fd_limit == RLIM_INFINITY)
+		return true;
+
+	return fd_limit > (rlim_t) dso__data_open_cnt;
+}
+
+/*
+ * Check and close LRU dso if we crossed allowed limit
+ * for opened dso file descriptors. The limit is half
+ * of the RLIMIT_NOFILE files opened.
+*/
+static void check_data_close(void)
+{
+	bool cache_fd = may_cache_fd();
+
+	if (!cache_fd)
+		close_first_dso();
+}
+
+/**
+ * dso__data_close - Close DSO data file
+ * @dso: dso object
+ *
+ * External interface to close @dso's data file descriptor.
+ */
+void dso__data_close(struct dso *dso)
+{
+	pthread_mutex_lock(&dso__data_open_lock);
+	close_dso(dso);
+	pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
+{
+	enum dso_binary_type binary_type_data[] = {
+		DSO_BINARY_TYPE__BUILD_ID_CACHE,
+		DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+		DSO_BINARY_TYPE__NOT_FOUND,
+	};
+	int i = 0;
+
+	if (dso->data.fd >= 0)
+		return;
+
+	if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
+		dso->data.fd = open_dso(dso, machine);
+		goto out;
+	}
+
+	do {
+		dso->binary_type = binary_type_data[i++];
+
+		dso->data.fd = open_dso(dso, machine);
+		if (dso->data.fd >= 0)
+			goto out;
+
+	} while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND);
+out:
+	if (dso->data.fd >= 0)
+		dso->data.status = DSO_DATA_STATUS_OK;
+	else
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+}
+
+/**
+ * dso__data_get_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor.  It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
+ */
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
+{
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+		return -1;
+
+	try_to_open_dso(dso, machine);
+
+	if (dso->data.fd < 0)
+		pthread_mutex_unlock(&dso__data_open_lock);
+
+	return dso->data.fd;
+}
+
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+	pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
+{
+	u32 flag = 1 << by;
+
+	if (dso->data.status_seen & flag)
+		return true;
+
+	dso->data.status_seen |= flag;
+
+	return false;
+}
+
+static void
+dso_cache__free(struct dso *dso)
+{
+	struct rb_root *root = &dso->data.cache;
+	struct rb_node *next = rb_first(root);
+
+	pthread_mutex_lock(&dso->lock);
+	while (next) {
+		struct dso_cache *cache;
+
+		cache = rb_entry(next, struct dso_cache, rb_node);
+		next = rb_next(&cache->rb_node);
+		rb_erase(&cache->rb_node, root);
+		free(cache);
+	}
+	pthread_mutex_unlock(&dso->lock);
+}
+
+static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
+{
+	const struct rb_root *root = &dso->data.cache;
+	struct rb_node * const *p = &root->rb_node;
+	const struct rb_node *parent = NULL;
+	struct dso_cache *cache;
+
+	while (*p != NULL) {
+		u64 end;
+
+		parent = *p;
+		cache = rb_entry(parent, struct dso_cache, rb_node);
+		end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+		if (offset < cache->offset)
+			p = &(*p)->rb_left;
+		else if (offset >= end)
+			p = &(*p)->rb_right;
+		else
+			return cache;
+	}
+
+	return NULL;
+}
+
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
+{
+	struct rb_root *root = &dso->data.cache;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct dso_cache *cache;
+	u64 offset = new->offset;
+
+	pthread_mutex_lock(&dso->lock);
+	while (*p != NULL) {
+		u64 end;
+
+		parent = *p;
+		cache = rb_entry(parent, struct dso_cache, rb_node);
+		end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+		if (offset < cache->offset)
+			p = &(*p)->rb_left;
+		else if (offset >= end)
+			p = &(*p)->rb_right;
+		else
+			goto out;
+	}
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+
+	cache = NULL;
+out:
+	pthread_mutex_unlock(&dso->lock);
+	return cache;
+}
+
+static ssize_t
+dso_cache__memcpy(struct dso_cache *cache, u64 offset,
+		  u8 *data, u64 size)
+{
+	u64 cache_offset = offset - cache->offset;
+	u64 cache_size   = min(cache->size - cache_offset, size);
+
+	memcpy(data, cache->data + cache_offset, cache_size);
+	return cache_size;
+}
+
+static ssize_t
+dso_cache__read(struct dso *dso, struct machine *machine,
+		u64 offset, u8 *data, ssize_t size)
+{
+	struct dso_cache *cache;
+	struct dso_cache *old;
+	ssize_t ret;
+
+	do {
+		u64 cache_offset;
+
+		cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+		if (!cache)
+			return -ENOMEM;
+
+		pthread_mutex_lock(&dso__data_open_lock);
+
+		/*
+		 * dso->data.fd might be closed if other thread opened another
+		 * file (dso) due to open file limit (RLIMIT_NOFILE).
+		 */
+		try_to_open_dso(dso, machine);
+
+		if (dso->data.fd < 0) {
+			ret = -errno;
+			dso->data.status = DSO_DATA_STATUS_ERROR;
+			break;
+		}
+
+		cache_offset = offset & DSO__DATA_CACHE_MASK;
+
+		ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
+		if (ret <= 0)
+			break;
+
+		cache->offset = cache_offset;
+		cache->size   = ret;
+	} while (0);
+
+	pthread_mutex_unlock(&dso__data_open_lock);
+
+	if (ret > 0) {
+		old = dso_cache__insert(dso, cache);
+		if (old) {
+			/* we lose the race */
+			free(cache);
+			cache = old;
+		}
+
+		ret = dso_cache__memcpy(cache, offset, data, size);
+	}
+
+	if (ret <= 0)
+		free(cache);
+
+	return ret;
+}
+
+static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size)
+{
+	struct dso_cache *cache;
+
+	cache = dso_cache__find(dso, offset);
+	if (cache)
+		return dso_cache__memcpy(cache, offset, data, size);
+	else
+		return dso_cache__read(dso, machine, offset, data, size);
+}
+
+/*
+ * Reads and caches dso data DSO__DATA_CACHE_SIZE size chunks
+ * in the rb_tree. Any read to already cached data is served
+ * by cached data.
+ */
+static ssize_t cached_read(struct dso *dso, struct machine *machine,
+			   u64 offset, u8 *data, ssize_t size)
+{
+	ssize_t r = 0;
+	u8 *p = data;
+
+	do {
+		ssize_t ret;
+
+		ret = dso_cache_read(dso, machine, offset, p, size);
+		if (ret < 0)
+			return ret;
+
+		/* Reached EOF, return what we have. */
+		if (!ret)
+			break;
+
+		BUG_ON(ret > size);
+
+		r      += ret;
+		p      += ret;
+		offset += ret;
+		size   -= ret;
+
+	} while (size);
+
+	return r;
+}
+
+static int data_file_size(struct dso *dso, struct machine *machine)
+{
+	int ret = 0;
+	struct stat st;
+	char sbuf[STRERR_BUFSIZE];
+
+	if (dso->data.file_size)
+		return 0;
+
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	pthread_mutex_lock(&dso__data_open_lock);
+
+	/*
+	 * dso->data.fd might be closed if other thread opened another
+	 * file (dso) due to open file limit (RLIMIT_NOFILE).
+	 */
+	try_to_open_dso(dso, machine);
+
+	if (dso->data.fd < 0) {
+		ret = -errno;
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		goto out;
+	}
+
+	if (fstat(dso->data.fd, &st) < 0) {
+		ret = -errno;
+		pr_err("dso cache fstat failed: %s\n",
+		       str_error_r(errno, sbuf, sizeof(sbuf)));
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		goto out;
+	}
+	dso->data.file_size = st.st_size;
+
+out:
+	pthread_mutex_unlock(&dso__data_open_lock);
+	return ret;
+}
+
+/**
+ * dso__data_size - Return dso data size
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * Return: dso data size
+ */
+off_t dso__data_size(struct dso *dso, struct machine *machine)
+{
+	if (data_file_size(dso, machine))
+		return -1;
+
+	/* For now just estimate dso data size is close to file size */
+	return dso->data.file_size;
+}
+
+static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
+				u64 offset, u8 *data, ssize_t size)
+{
+	if (data_file_size(dso, machine))
+		return -1;
+
+	/* Check the offset sanity. */
+	if (offset > dso->data.file_size)
+		return -1;
+
+	if (offset + size < offset)
+		return -1;
+
+	return cached_read(dso, machine, offset, data, size);
+}
+
+/**
+ * dso__data_read_offset - Read data from dso file offset
+ * @dso: dso object
+ * @machine: machine object
+ * @offset: file offset
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso file offset. Open
+ * dso data file and use cached_read to get the data.
+ */
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size)
+{
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	return data_read_offset(dso, machine, offset, data, size);
+}
+
+/**
+ * dso__data_read_addr - Read data from dso address
+ * @dso: dso object
+ * @machine: machine object
+ * @add: virtual memory address
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso address.
+ */
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+			    struct machine *machine, u64 addr,
+			    u8 *data, ssize_t size)
+{
+	u64 offset = map->map_ip(map, addr);
+	return dso__data_read_offset(dso, machine, offset, data, size);
+}
+
+struct map *dso__new_map(const char *name)
+{
+	struct map *map = NULL;
+	struct dso *dso = dso__new(name);
+
+	if (dso)
+		map = map__new2(0, dso, MAP__FUNCTION);
+
+	return map;
+}
+
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+				    const char *short_name, int dso_type)
+{
+	/*
+	 * The kernel dso could be created by build_id processing.
+	 */
+	struct dso *dso = machine__findnew_dso(machine, name);
+
+	/*
+	 * We need to run this in all cases, since during the build_id
+	 * processing we had no idea this was the kernel dso.
+	 */
+	if (dso != NULL) {
+		dso__set_short_name(dso, short_name, false);
+		dso->kernel = dso_type;
+	}
+
+	return dso;
+}
+
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+static struct dso *__dso__findlink_by_longname(struct rb_root *root,
+					       struct dso *dso, const char *name)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	if (!name)
+		name = dso->long_name;
+	/*
+	 * Find node with the matching name
+	 */
+	while (*p) {
+		struct dso *this = rb_entry(*p, struct dso, rb_node);
+		int rc = strcmp(name, this->long_name);
+
+		parent = *p;
+		if (rc == 0) {
+			/*
+			 * In case the new DSO is a duplicate of an existing
+			 * one, print a one-time warning & put the new entry
+			 * at the end of the list of duplicates.
+			 */
+			if (!dso || (dso == this))
+				return this;	/* Find matching dso */
+			/*
+			 * The core kernel DSOs may have duplicated long name.
+			 * In this case, the short name should be different.
+			 * Comparing the short names to differentiate the DSOs.
+			 */
+			rc = strcmp(dso->short_name, this->short_name);
+			if (rc == 0) {
+				pr_err("Duplicated dso name: %s\n", name);
+				return NULL;
+			}
+		}
+		if (rc < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	if (dso) {
+		/* Add new node and rebalance tree */
+		rb_link_node(&dso->rb_node, parent, p);
+		rb_insert_color(&dso->rb_node, root);
+		dso->root = root;
+	}
+	return NULL;
+}
+
+static inline struct dso *__dso__find_by_longname(struct rb_root *root,
+						  const char *name)
+{
+	return __dso__findlink_by_longname(root, NULL, name);
+}
+
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
+{
+	struct rb_root *root = dso->root;
+
+	if (name == NULL)
+		return;
+
+	if (dso->long_name_allocated)
+		free((char *)dso->long_name);
+
+	if (root) {
+		rb_erase(&dso->rb_node, root);
+		/*
+		 * __dso__findlink_by_longname() isn't guaranteed to add it
+		 * back, so a clean removal is required here.
+		 */
+		RB_CLEAR_NODE(&dso->rb_node);
+		dso->root = NULL;
+	}
+
+	dso->long_name		 = name;
+	dso->long_name_len	 = strlen(name);
+	dso->long_name_allocated = name_allocated;
+
+	if (root)
+		__dso__findlink_by_longname(root, dso, NULL);
+}
+
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
+{
+	if (name == NULL)
+		return;
+
+	if (dso->short_name_allocated)
+		free((char *)dso->short_name);
+
+	dso->short_name		  = name;
+	dso->short_name_len	  = strlen(name);
+	dso->short_name_allocated = name_allocated;
+}
+
+static void dso__set_basename(struct dso *dso)
+{
+       /*
+        * basename() may modify path buffer, so we must pass
+        * a copy.
+        */
+       char *base, *lname = strdup(dso->long_name);
+
+       if (!lname)
+               return;
+
+       /*
+        * basename() may return a pointer to internal
+        * storage which is reused in subsequent calls
+        * so copy the result.
+        */
+       base = strdup(basename(lname));
+
+       free(lname);
+
+       if (!base)
+               return;
+
+       dso__set_short_name(dso, base, true);
+}
+
+int dso__name_len(const struct dso *dso)
+{
+	if (!dso)
+		return strlen("[unknown]");
+	if (verbose > 0)
+		return dso->long_name_len;
+
+	return dso->short_name_len;
+}
+
+bool dso__loaded(const struct dso *dso, enum map_type type)
+{
+	return dso->loaded & (1 << type);
+}
+
+bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
+{
+	return dso->sorted_by_name & (1 << type);
+}
+
+void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
+{
+	dso->sorted_by_name |= (1 << type);
+}
+
+struct dso *dso__new(const char *name)
+{
+	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
+
+	if (dso != NULL) {
+		int i;
+		strcpy(dso->name, name);
+		dso__set_long_name(dso, dso->name, false);
+		dso__set_short_name(dso, dso->name, false);
+		for (i = 0; i < MAP__NR_TYPES; ++i)
+			dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
+		dso->data.cache = RB_ROOT;
+		dso->inlined_nodes = RB_ROOT;
+		dso->srclines = RB_ROOT;
+		dso->data.fd = -1;
+		dso->data.status = DSO_DATA_STATUS_UNKNOWN;
+		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
+		dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND;
+		dso->is_64_bit = (sizeof(void *) == 8);
+		dso->loaded = 0;
+		dso->rel = 0;
+		dso->sorted_by_name = 0;
+		dso->has_build_id = 0;
+		dso->has_srcline = 1;
+		dso->a2l_fails = 1;
+		dso->kernel = DSO_TYPE_USER;
+		dso->needs_swap = DSO_SWAP__UNSET;
+		RB_CLEAR_NODE(&dso->rb_node);
+		dso->root = NULL;
+		INIT_LIST_HEAD(&dso->node);
+		INIT_LIST_HEAD(&dso->data.open_entry);
+		pthread_mutex_init(&dso->lock, NULL);
+		refcount_set(&dso->refcnt, 1);
+	}
+
+	return dso;
+}
+
+void dso__delete(struct dso *dso)
+{
+	int i;
+
+	if (!RB_EMPTY_NODE(&dso->rb_node))
+		pr_err("DSO %s is still in rbtree when being deleted!\n",
+		       dso->long_name);
+
+	/* free inlines first, as they reference symbols */
+	inlines__tree_delete(&dso->inlined_nodes);
+	srcline__tree_delete(&dso->srclines);
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		symbols__delete(&dso->symbols[i]);
+
+	if (dso->short_name_allocated) {
+		zfree((char **)&dso->short_name);
+		dso->short_name_allocated = false;
+	}
+
+	if (dso->long_name_allocated) {
+		zfree((char **)&dso->long_name);
+		dso->long_name_allocated = false;
+	}
+
+	dso__data_close(dso);
+	auxtrace_cache__free(dso->auxtrace_cache);
+	dso_cache__free(dso);
+	dso__free_a2l(dso);
+	zfree(&dso->symsrc_filename);
+	nsinfo__zput(dso->nsinfo);
+	pthread_mutex_destroy(&dso->lock);
+	free(dso);
+}
+
+struct dso *dso__get(struct dso *dso)
+{
+	if (dso)
+		refcount_inc(&dso->refcnt);
+	return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+	if (dso && refcount_dec_and_test(&dso->refcnt))
+		dso__delete(dso);
+}
+
+void dso__set_build_id(struct dso *dso, void *build_id)
+{
+	memcpy(dso->build_id, build_id, sizeof(dso->build_id));
+	dso->has_build_id = 1;
+}
+
+bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
+{
+	return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
+}
+
+void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
+{
+	char path[PATH_MAX];
+
+	if (machine__is_default_guest(machine))
+		return;
+	sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
+	if (sysfs__read_build_id(path, dso->build_id,
+				 sizeof(dso->build_id)) == 0)
+		dso->has_build_id = true;
+}
+
+int dso__kernel_module_get_build_id(struct dso *dso,
+				    const char *root_dir)
+{
+	char filename[PATH_MAX];
+	/*
+	 * kernel module short names are of the form "[module]" and
+	 * we need just "module" here.
+	 */
+	const char *name = dso->short_name + 1;
+
+	snprintf(filename, sizeof(filename),
+		 "%s/sys/module/%.*s/notes/.note.gnu.build-id",
+		 root_dir, (int)strlen(name) - 1, name);
+
+	if (sysfs__read_build_id(filename, dso->build_id,
+				 sizeof(dso->build_id)) == 0)
+		dso->has_build_id = true;
+
+	return 0;
+}
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
+{
+	bool have_build_id = false;
+	struct dso *pos;
+	struct nscookie nsc;
+
+	list_for_each_entry(pos, head, node) {
+		if (with_hits && !pos->hit && !dso__is_vdso(pos))
+			continue;
+		if (pos->has_build_id) {
+			have_build_id = true;
+			continue;
+		}
+		nsinfo__mountns_enter(pos->nsinfo, &nsc);
+		if (filename__read_build_id(pos->long_name, pos->build_id,
+					    sizeof(pos->build_id)) > 0) {
+			have_build_id	  = true;
+			pos->has_build_id = true;
+		}
+		nsinfo__mountns_exit(&nsc);
+	}
+
+	return have_build_id;
+}
+
+void __dsos__add(struct dsos *dsos, struct dso *dso)
+{
+	list_add_tail(&dso->node, &dsos->head);
+	__dso__findlink_by_longname(&dsos->root, dso, NULL);
+	/*
+	 * It is now in the linked list, grab a reference, then garbage collect
+	 * this when needing memory, by looking at LRU dso instances in the
+	 * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+	 * anywhere besides the one for the list, do, under a lock for the
+	 * list: remove it from the list, then a dso__put(), that probably will
+	 * be the last and will then call dso__delete(), end of life.
+	 *
+	 * That, or at the end of the 'struct machine' lifetime, when all
+	 * 'struct dso' instances will be removed from the list, in
+	 * dsos__exit(), if they have no other reference from some other data
+	 * structure.
+	 *
+	 * E.g.: after processing a 'perf.data' file and storing references
+	 * to objects instantiated while processing events, we will have
+	 * references to the 'thread', 'map', 'dso' structs all from 'struct
+	 * hist_entry' instances, but we may not need anything not referenced,
+	 * so we might as well call machines__exit()/machines__delete() and
+	 * garbage collect it.
+	 */
+	dso__get(dso);
+}
+
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+	down_write(&dsos->lock);
+	__dsos__add(dsos, dso);
+	up_write(&dsos->lock);
+}
+
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+	struct dso *pos;
+
+	if (cmp_short) {
+		list_for_each_entry(pos, &dsos->head, node)
+			if (strcmp(pos->short_name, name) == 0)
+				return pos;
+		return NULL;
+	}
+	return __dso__find_by_longname(&dsos->root, name);
+}
+
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+	struct dso *dso;
+	down_read(&dsos->lock);
+	dso = __dsos__find(dsos, name, cmp_short);
+	up_read(&dsos->lock);
+	return dso;
+}
+
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
+{
+	struct dso *dso = dso__new(name);
+
+	if (dso != NULL) {
+		__dsos__add(dsos, dso);
+		dso__set_basename(dso);
+		/* Put dso here because __dsos_add already got it */
+		dso__put(dso);
+	}
+	return dso;
+}
+
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
+{
+	struct dso *dso = __dsos__find(dsos, name, false);
+
+	return dso ? dso : __dsos__addnew(dsos, name);
+}
+
+struct dso *dsos__findnew(struct dsos *dsos, const char *name)
+{
+	struct dso *dso;
+	down_write(&dsos->lock);
+	dso = dso__get(__dsos__findnew(dsos, name));
+	up_write(&dsos->lock);
+	return dso;
+}
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+			       bool (skip)(struct dso *dso, int parm), int parm)
+{
+	struct dso *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, head, node) {
+		if (skip && skip(pos, parm))
+			continue;
+		ret += dso__fprintf_buildid(pos, fp);
+		ret += fprintf(fp, " %s\n", pos->long_name);
+	}
+	return ret;
+}
+
+size_t __dsos__fprintf(struct list_head *head, FILE *fp)
+{
+	struct dso *pos;
+	size_t ret = 0;
+
+	list_for_each_entry(pos, head, node) {
+		int i;
+		for (i = 0; i < MAP__NR_TYPES; ++i)
+			ret += dso__fprintf(pos, i, fp);
+	}
+
+	return ret;
+}
+
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+{
+	char sbuild_id[SBUILD_ID_SIZE];
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+	return fprintf(fp, "%s", sbuild_id);
+}
+
+size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
+
+	if (dso->short_name != dso->long_name)
+		ret += fprintf(fp, "%s, ", dso->long_name);
+	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
+		       dso__loaded(dso, type) ? "" : "NOT ");
+	ret += dso__fprintf_buildid(dso, fp);
+	ret += fprintf(fp, ")\n");
+	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		ret += symbol__fprintf(pos, fp);
+	}
+
+	return ret;
+}
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine)
+{
+	int fd;
+	enum dso_type type = DSO__TYPE_UNKNOWN;
+
+	fd = dso__data_get_fd(dso, machine);
+	if (fd >= 0) {
+		type = dso__type_fd(fd);
+		dso__data_put_fd(dso);
+	}
+
+	return type;
+}
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
+{
+	int idx, errnum = dso->load_errno;
+	/*
+	 * This must have a same ordering as the enum dso_load_errno.
+	 */
+	static const char *dso_load__error_str[] = {
+	"Internal tools/perf/ library error",
+	"Invalid ELF file",
+	"Can not read build id",
+	"Mismatching build id",
+	"Decompression failure",
+	};
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		const char *err = str_error_r(errnum, buf, buflen);
+
+		if (err != buf)
+			scnprintf(buf, buflen, "%s", err);
+
+		return 0;
+	}
+
+	if (errnum <  __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END)
+		return -1;
+
+	idx = errnum - __DSO_LOAD_ERRNO__START;
+	scnprintf(buf, buflen, "%s", dso_load__error_str[idx]);
+	return 0;
+}
diff --git a/util/dso.h b/util/dso.h
new file mode 100644
index 0000000..c229dbe
--- /dev/null
+++ b/util/dso.h
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DSO
+#define __PERF_DSO
+
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include "rwsem.h"
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include "map.h"
+#include "namespaces.h"
+#include "build-id.h"
+
+enum dso_binary_type {
+	DSO_BINARY_TYPE__KALLSYMS = 0,
+	DSO_BINARY_TYPE__GUEST_KALLSYMS,
+	DSO_BINARY_TYPE__VMLINUX,
+	DSO_BINARY_TYPE__GUEST_VMLINUX,
+	DSO_BINARY_TYPE__JAVA_JIT,
+	DSO_BINARY_TYPE__DEBUGLINK,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+	DSO_BINARY_TYPE__GUEST_KMODULE,
+	DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+	DSO_BINARY_TYPE__KCORE,
+	DSO_BINARY_TYPE__GUEST_KCORE,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+enum dso_kernel_type {
+	DSO_TYPE_USER = 0,
+	DSO_TYPE_KERNEL,
+	DSO_TYPE_GUEST_KERNEL
+};
+
+enum dso_swap_type {
+	DSO_SWAP__UNSET,
+	DSO_SWAP__NO,
+	DSO_SWAP__YES,
+};
+
+enum dso_data_status {
+	DSO_DATA_STATUS_ERROR	= -1,
+	DSO_DATA_STATUS_UNKNOWN	= 0,
+	DSO_DATA_STATUS_OK	= 1,
+};
+
+enum dso_data_status_seen {
+	DSO_DATA_STATUS_SEEN_ITRACE,
+};
+
+enum dso_type {
+	DSO__TYPE_UNKNOWN,
+	DSO__TYPE_64BIT,
+	DSO__TYPE_32BIT,
+	DSO__TYPE_X32BIT,
+};
+
+enum dso_load_errno {
+	DSO_LOAD_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__DSO_LOAD_ERRNO__START		= -10000,
+
+	DSO_LOAD_ERRNO__INTERNAL_ERROR	= __DSO_LOAD_ERRNO__START,
+
+	/* for symsrc__init() */
+	DSO_LOAD_ERRNO__INVALID_ELF,
+	DSO_LOAD_ERRNO__CANNOT_READ_BUILDID,
+	DSO_LOAD_ERRNO__MISMATCHING_BUILDID,
+
+	/* for decompress_kmodule */
+	DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE,
+
+	__DSO_LOAD_ERRNO__END,
+};
+
+#define DSO__SWAP(dso, type, val)			\
+({							\
+	type ____r = val;				\
+	BUG_ON(dso->needs_swap == DSO_SWAP__UNSET);	\
+	if (dso->needs_swap == DSO_SWAP__YES) {		\
+		switch (sizeof(____r)) {		\
+		case 2:					\
+			____r = bswap_16(val);		\
+			break;				\
+		case 4:					\
+			____r = bswap_32(val);		\
+			break;				\
+		case 8:					\
+			____r = bswap_64(val);		\
+			break;				\
+		default:				\
+			BUG_ON(1);			\
+		}					\
+	}						\
+	____r;						\
+})
+
+#define DSO__DATA_CACHE_SIZE 4096
+#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
+
+struct dso_cache {
+	struct rb_node	rb_node;
+	u64 offset;
+	u64 size;
+	char data[0];
+};
+
+/*
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
+ */
+struct dsos {
+	struct list_head head;
+	struct rb_root	 root;	/* rbtree root sorted by long name */
+	struct rw_semaphore lock;
+};
+
+struct auxtrace_cache;
+
+struct dso {
+	pthread_mutex_t	 lock;
+	struct list_head node;
+	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
+	struct rb_root	 *root;		/* root of rbtree that rb_node is in */
+	struct rb_root	 symbols[MAP__NR_TYPES];
+	struct rb_root	 symbol_names[MAP__NR_TYPES];
+	struct rb_root	 inlined_nodes;
+	struct rb_root	 srclines;
+	struct {
+		u64		addr;
+		struct symbol	*symbol;
+	} last_find_result[MAP__NR_TYPES];
+	void		 *a2l;
+	char		 *symsrc_filename;
+	unsigned int	 a2l_fails;
+	enum dso_kernel_type	kernel;
+	enum dso_swap_type	needs_swap;
+	enum dso_binary_type	symtab_type;
+	enum dso_binary_type	binary_type;
+	enum dso_load_errno	load_errno;
+	u8		 adjust_symbols:1;
+	u8		 has_build_id:1;
+	u8		 has_srcline:1;
+	u8		 hit:1;
+	u8		 annotate_warned:1;
+	u8		 short_name_allocated:1;
+	u8		 long_name_allocated:1;
+	u8		 is_64_bit:1;
+	u8		 sorted_by_name;
+	u8		 loaded;
+	u8		 rel;
+	u8		 build_id[BUILD_ID_SIZE];
+	u64		 text_offset;
+	const char	 *short_name;
+	const char	 *long_name;
+	u16		 long_name_len;
+	u16		 short_name_len;
+	void		*dwfl;			/* DWARF debug info */
+	struct auxtrace_cache *auxtrace_cache;
+
+	/* dso data file */
+	struct {
+		struct rb_root	 cache;
+		int		 fd;
+		int		 status;
+		u32		 status_seen;
+		size_t		 file_size;
+		struct list_head open_entry;
+		u64		 debug_frame_offset;
+		u64		 eh_frame_hdr_offset;
+	} data;
+
+	union { /* Tool specific area */
+		void	 *priv;
+		u64	 db_id;
+	};
+	struct nsinfo	*nsinfo;
+	refcount_t	 refcnt;
+	char		 name[0];
+};
+
+/* dso__for_each_symbol - iterate over the symbols of given type
+ *
+ * @dso: the 'struct dso *' in which symbols itereated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ * @type: the 'enum map_type' type of symbols
+ */
+#define dso__for_each_symbol(dso, pos, n, type)	\
+	symbols__for_each_entry(&(dso)->symbols[(type)], pos, n)
+
+static inline void dso__set_loaded(struct dso *dso, enum map_type type)
+{
+	dso->loaded |= (1 << type);
+}
+
+struct dso *dso__new(const char *name);
+void dso__delete(struct dso *dso);
+
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
+
+int dso__name_len(const struct dso *dso);
+
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+	dso__put(*dso);
+	*dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
+bool dso__loaded(const struct dso *dso, enum map_type type);
+
+bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
+void dso__set_sorted_by_name(struct dso *dso, enum map_type type);
+void dso__sort_by_name(struct dso *dso, enum map_type type);
+
+void dso__set_build_id(struct dso *dso, void *build_id);
+bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
+void dso__read_running_kernel_build_id(struct dso *dso,
+				       struct machine *machine);
+int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
+
+char dso__symtab_origin(const struct dso *dso);
+int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
+				   char *root_dir, char *filename, size_t size);
+bool is_supported_compression(const char *ext);
+bool is_kernel_module(const char *pathname, int cpumode);
+bool decompress_to_file(const char *ext, const char *filename, int output_fd);
+bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len);
+
+#define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
+
+struct kmod_path {
+	char *name;
+	char *ext;
+	bool  comp;
+	bool  kmod;
+};
+
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+		     bool alloc_name, bool alloc_ext);
+
+#define kmod_path__parse(__m, __p)      __kmod_path__parse(__m, __p, false, false)
+#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false)
+#define kmod_path__parse_ext(__m, __p)  __kmod_path__parse(__m, __p, false, true)
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine);
+
+/*
+ * The dso__data_* external interface provides following functions:
+ *   dso__data_get_fd
+ *   dso__data_put_fd
+ *   dso__data_close
+ *   dso__data_size
+ *   dso__data_read_offset
+ *   dso__data_read_addr
+ *
+ * Please refer to the dso.c object code for each function and
+ * arguments documentation. Following text tries to explain the
+ * dso file descriptor caching.
+ *
+ * The dso__data* interface allows caching of opened file descriptors
+ * to speed up the dso data accesses. The idea is to leave the file
+ * descriptor opened ideally for the whole life of the dso object.
+ *
+ * The current usage of the dso__data_* interface is as follows:
+ *
+ * Get DSO's fd:
+ *   int fd = dso__data_get_fd(dso, machine);
+ *   if (fd >= 0) {
+ *       USE 'fd' SOMEHOW
+ *       dso__data_put_fd(dso);
+ *   }
+ *
+ * Read DSO's data:
+ *   n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
+ *   n = dso__data_read_addr(dso_0, &machine, 0, buf, BUFSIZE);
+ *
+ * Eventually close DSO's fd:
+ *   dso__data_close(dso);
+ *
+ * It is not necessary to close the DSO object data file. Each time new
+ * DSO data file is opened, the limit (RLIMIT_NOFILE/2) is checked. Once
+ * it is crossed, the oldest opened DSO object is closed.
+ *
+ * The dso__delete function calls close_dso function to ensure the
+ * data file descriptor gets closed/unmapped before the dso object
+ * is freed.
+ *
+ * TODO
+*/
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso);
+void dso__data_close(struct dso *dso);
+
+off_t dso__data_size(struct dso *dso, struct machine *machine);
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size);
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+			    struct machine *machine, u64 addr,
+			    u8 *data, ssize_t size);
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
+
+struct map *dso__new_map(const char *name);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+				    const char *short_name, int dso_type);
+
+void __dsos__add(struct dsos *dsos, struct dso *dso);
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name);
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+
+void dso__reset_find_symbol_cache(struct dso *dso);
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+			       bool (skip)(struct dso *dso, int parm), int parm);
+size_t __dsos__fprintf(struct list_head *head, FILE *fp);
+
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    enum map_type type, FILE *fp);
+size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+
+static inline bool dso__is_vmlinux(struct dso *dso)
+{
+	return dso->binary_type == DSO_BINARY_TYPE__VMLINUX ||
+	       dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
+}
+
+static inline bool dso__is_kcore(struct dso *dso)
+{
+	return dso->binary_type == DSO_BINARY_TYPE__KCORE ||
+	       dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
+}
+
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+	return dso->kernel && dso->long_name[0] != '/';
+}
+
+void dso__free_a2l(struct dso *dso);
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine);
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
+
+void reset_fd_limit(void);
+
+#endif /* __PERF_DSO */
diff --git a/util/dump-insn.c b/util/dump-insn.c
new file mode 100644
index 0000000..10988d3
--- /dev/null
+++ b/util/dump-insn.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "dump-insn.h"
+
+/* Fallback code */
+
+__weak
+const char *dump_insn(struct perf_insn *x __maybe_unused,
+		      u64 ip __maybe_unused, u8 *inbuf __maybe_unused,
+		      int inlen __maybe_unused, int *lenp)
+{
+	if (lenp)
+		*lenp = 0;
+	return "?";
+}
diff --git a/util/dump-insn.h b/util/dump-insn.h
new file mode 100644
index 0000000..0e06280
--- /dev/null
+++ b/util/dump-insn.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DUMP_INSN_H
+#define __PERF_DUMP_INSN_H 1
+
+#define MAXINSN 15
+
+#include <linux/types.h>
+
+struct thread;
+
+struct perf_insn {
+	/* Initialized by callers: */
+	struct thread *thread;
+	u8	      cpumode;
+	bool	      is64bit;
+	int	      cpu;
+	/* Temporary */
+	char	      out[256];
+};
+
+const char *dump_insn(struct perf_insn *x, u64 ip,
+		      u8 *inbuf, int inlen, int *lenp);
+#endif
diff --git a/util/dwarf-aux.c b/util/dwarf-aux.c
new file mode 100644
index 0000000..7eb7de5
--- /dev/null
+++ b/util/dwarf-aux.c
@@ -0,0 +1,1293 @@
+/*
+ * dwarf-aux.c : libdw auxiliary interfaces
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include "util.h"
+#include "debug.h"
+#include "dwarf-aux.h"
+#include "string2.h"
+
+/**
+ * cu_find_realpath - Find the realpath of the target file
+ * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit)
+ * @fname:  The tail filename of the target file
+ *
+ * Find the real(long) path of @fname in @cu_die.
+ */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
+{
+	Dwarf_Files *files;
+	size_t nfiles, i;
+	const char *src = NULL;
+	int ret;
+
+	if (!fname)
+		return NULL;
+
+	ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
+	if (ret != 0)
+		return NULL;
+
+	for (i = 0; i < nfiles; i++) {
+		src = dwarf_filesrc(files, i, NULL, NULL);
+		if (strtailcmp(src, fname) == 0)
+			break;
+	}
+	if (i == nfiles)
+		return NULL;
+	return src;
+}
+
+/**
+ * cu_get_comp_dir - Get the path of compilation directory
+ * @cu_die: a CU DIE
+ *
+ * Get the path of compilation directory of given @cu_die.
+ * Since this depends on DW_AT_comp_dir, older gcc will not
+ * embedded it. In that case, this returns NULL.
+ */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die)
+{
+	Dwarf_Attribute attr;
+	if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
+		return NULL;
+	return dwarf_formstring(&attr);
+}
+
+/**
+ * cu_find_lineinfo - Get a line number and file name for given address
+ * @cu_die: a CU DIE
+ * @addr: An address
+ * @fname: a pointer which returns the file name string
+ * @lineno: a pointer which returns the line number
+ *
+ * Find a line number and file name for @addr in @cu_die.
+ */
+int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr,
+		    const char **fname, int *lineno)
+{
+	Dwarf_Line *line;
+	Dwarf_Addr laddr;
+
+	line = dwarf_getsrc_die(cu_die, (Dwarf_Addr)addr);
+	if (line && dwarf_lineaddr(line, &laddr) == 0 &&
+	    addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) {
+		*fname = dwarf_linesrc(line, NULL, NULL);
+		if (!*fname)
+			/* line number is useless without filename */
+			*lineno = 0;
+	}
+
+	return *lineno ?: -ENOENT;
+}
+
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
+
+/**
+ * cu_walk_functions_at - Walk on function DIEs at given address
+ * @cu_die: A CU DIE
+ * @addr: An address
+ * @callback: A callback which called with found DIEs
+ * @data: A user data
+ *
+ * Walk on function DIEs at given @addr in @cu_die. Passed DIEs
+ * should be subprogram or inlined-subroutines.
+ */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+		    int (*callback)(Dwarf_Die *, void *), void *data)
+{
+	Dwarf_Die die_mem;
+	Dwarf_Die *sc_die;
+	int ret = -ENOENT;
+
+	/* Inlined function could be recursive. Trace it until fail */
+	for (sc_die = die_find_realfunc(cu_die, addr, &die_mem);
+	     sc_die != NULL;
+	     sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr,
+				     &die_mem)) {
+		ret = callback(sc_die, data);
+		if (ret)
+			break;
+	}
+
+	return ret;
+
+}
+
+/**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attiribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+		return NULL;
+	return dwarf_formstring(&attr);
+}
+
+/**
+ * die_compare_name - Compare diename and tname
+ * @dw_die: a DIE
+ * @tname: a string of target name
+ *
+ * Compare the name of @dw_die and @tname. Return false if @dw_die has no name.
+ */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
+{
+	const char *name;
+
+	name = dwarf_diename(dw_die);
+	return name ? (strcmp(tname, name) == 0) : false;
+}
+
+/**
+ * die_match_name - Match diename/linkage name and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+	const char *name;
+
+	name = dwarf_diename(dw_die);
+	if (name && strglobmatch(name, glob))
+		return true;
+	/* fall back to check linkage name */
+	name = die_get_linkage_name(dw_die);
+	if (name && strglobmatch(name, glob))
+		return true;
+
+	return false;
+}
+
+/**
+ * die_get_call_lineno - Get callsite line number of inline-function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site line number of @in_die. This means from where the inline
+ * function is called.
+ */
+int die_get_call_lineno(Dwarf_Die *in_die)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Word ret;
+
+	if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+		return -ENOENT;
+
+	dwarf_formudata(&attr, &ret);
+	return (int)ret;
+}
+
+/**
+ * die_get_type - Get type DIE
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+	    dwarf_formref_die(&attr, die_mem))
+		return die_mem;
+	else
+		return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+	int tag;
+
+	do {
+		vr_die = die_get_type(vr_die, die_mem);
+		if (!vr_die)
+			break;
+		tag = dwarf_tag(vr_die);
+	} while (tag == DW_TAG_const_type ||
+		 tag == DW_TAG_restrict_type ||
+		 tag == DW_TAG_volatile_type ||
+		 tag == DW_TAG_shared_type);
+
+	return vr_die;
+}
+
+/**
+ * die_get_real_type - Get a type die, but skip qualifiers and typedef
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ * If the type is qualifiers (e.g. const) or typedef, this skips it
+ * and tries to find real type (structure or basic types, e.g. int).
+ */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+	do {
+		vr_die = __die_get_real_type(vr_die, die_mem);
+	} while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+	return vr_die;
+}
+
+/* Get attribute and translate it as a udata */
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+			      Dwarf_Word *result)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+	    dwarf_formudata(&attr, result) != 0)
+		return -ENOENT;
+
+	return 0;
+}
+
+/* Get attribute and translate it as a sdata */
+static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+			      Dwarf_Sword *result)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+	    dwarf_formsdata(&attr, result) != 0)
+		return -ENOENT;
+
+	return 0;
+}
+
+/**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+ *
+ * Get the encoding of @tp_die and return true if the encoding
+ * is signed.
+ */
+bool die_is_signed_type(Dwarf_Die *tp_die)
+{
+	Dwarf_Word ret;
+
+	if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
+		return false;
+
+	return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
+		ret == DW_ATE_signed_fixed);
+}
+
+/**
+ * die_is_func_def - Ensure that this DIE is a subprogram and definition
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is a subprogram and NOT a declaration. This
+ * returns true if @dw_die is a function definition.
+ **/
+bool die_is_func_def(Dwarf_Die *dw_die)
+{
+	Dwarf_Attribute attr;
+
+	return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
+		dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
+}
+
+/**
+ * die_is_func_instance - Ensure that this DIE is an instance of a subprogram
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is an instance (which has an entry address).
+ * This returns true if @dw_die is a function instance. If not, you need to
+ * call die_walk_instances() to find actual instances.
+ **/
+bool die_is_func_instance(Dwarf_Die *dw_die)
+{
+	Dwarf_Addr tmp;
+
+	/* Actually gcc optimizes non-inline as like as inlined */
+	return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0;
+}
+/**
+ * die_get_data_member_location - Get the data-member offset
+ * @mb_die: a DIE of a member of a data structure
+ * @offs: The offset of the member in the data structure
+ *
+ * Get the offset of @mb_die in the data structure including @mb_die, and
+ * stores result offset to @offs. If any error occurs this returns errno.
+ */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Op *expr;
+	size_t nexpr;
+	int ret;
+
+	if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
+		return -ENOENT;
+
+	if (dwarf_formudata(&attr, offs) != 0) {
+		/* DW_AT_data_member_location should be DW_OP_plus_uconst */
+		ret = dwarf_getlocation(&attr, &expr, &nexpr);
+		if (ret < 0 || nexpr == 0)
+			return -ENOENT;
+
+		if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
+			pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
+				 expr[0].atom, nexpr);
+			return -ENOTSUP;
+		}
+		*offs = (Dwarf_Word)expr[0].number;
+	}
+	return 0;
+}
+
+/* Get the call file index number in CU DIE */
+static int die_get_call_fileno(Dwarf_Die *in_die)
+{
+	Dwarf_Sword idx;
+
+	if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
+		return (int)idx;
+	else
+		return -ENOENT;
+}
+
+/* Get the declared file index number in CU DIE */
+static int die_get_decl_fileno(Dwarf_Die *pdie)
+{
+	Dwarf_Sword idx;
+
+	if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
+		return (int)idx;
+	else
+		return -ENOENT;
+}
+
+/**
+ * die_get_call_file - Get callsite file name of inlined function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site file name of @in_die. This means from which file the inline
+ * function is called.
+ */
+const char *die_get_call_file(Dwarf_Die *in_die)
+{
+	Dwarf_Die cu_die;
+	Dwarf_Files *files;
+	int idx;
+
+	idx = die_get_call_fileno(in_die);
+	if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
+	    dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+		return NULL;
+
+	return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
+
+/**
+ * die_find_child - Generic DIE search function in DIE tree
+ * @rt_die: a root DIE
+ * @callback: a callback function
+ * @data: a user data passed to the callback function
+ * @die_mem: a buffer for result DIE
+ *
+ * Trace DIE tree from @rt_die and call @callback for each child DIE.
+ * If @callback returns DIE_FIND_CB_END, this stores the DIE into
+ * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE,
+ * this continues to trace the tree. Optionally, @callback can return
+ * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only
+ * the children and trace only the siblings respectively.
+ * Returns NULL if @callback can't find any appropriate DIE.
+ */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+			  int (*callback)(Dwarf_Die *, void *),
+			  void *data, Dwarf_Die *die_mem)
+{
+	Dwarf_Die child_die;
+	int ret;
+
+	ret = dwarf_child(rt_die, die_mem);
+	if (ret != 0)
+		return NULL;
+
+	do {
+		ret = callback(die_mem, data);
+		if (ret == DIE_FIND_CB_END)
+			return die_mem;
+
+		if ((ret & DIE_FIND_CB_CHILD) &&
+		    die_find_child(die_mem, callback, data, &child_die)) {
+			memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+			return die_mem;
+		}
+	} while ((ret & DIE_FIND_CB_SIBLING) &&
+		 dwarf_siblingof(die_mem, die_mem) == 0);
+
+	return NULL;
+}
+
+struct __addr_die_search_param {
+	Dwarf_Addr	addr;
+	Dwarf_Die	*die_mem;
+};
+
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct __addr_die_search_param *ad = data;
+	Dwarf_Addr addr = 0;
+
+	if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+	    !dwarf_highpc(fn_die, &addr) &&
+	    addr == ad->addr) {
+		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem)
+{
+	struct __addr_die_search_param ad;
+	ad.addr = addr;
+	ad.die_mem = die_mem;
+	/* dwarf_getscopes can't find subprogram. */
+	if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+		return NULL;
+	else
+		return die_mem;
+}
+
+/* die_find callback for non-inlined function search */
+static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct __addr_die_search_param *ad = data;
+
+	/*
+	 * Since a declaration entry doesn't has given pc, this always returns
+	 * function definition entry.
+	 */
+	if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+	    dwarf_haspc(fn_die, ad->addr)) {
+		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_find_realfunc - Search a non-inlined function at given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search a non-inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem)
+{
+	struct __addr_die_search_param ad;
+	ad.addr = addr;
+	ad.die_mem = die_mem;
+	/* dwarf_getscopes can't find subprogram. */
+	if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
+		return NULL;
+	else
+		return die_mem;
+}
+
+/* die_find callback for inline function search */
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
+{
+	Dwarf_Addr *addr = data;
+
+	if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
+	    dwarf_haspc(die_mem, *addr))
+		return DIE_FIND_CB_END;
+
+	return DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_find_top_inlinefunc - Search the top inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * Even if several inlined functions are expanded recursively, this
+ * doesn't trace it down, and returns the topmost one.
+ */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				   Dwarf_Die *die_mem)
+{
+	return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
+}
+
+/**
+ * die_find_inlinefunc - Search an inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * If several inlined functions are expanded recursively, this trace
+ * it down and returns deepest one.
+ */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+			       Dwarf_Die *die_mem)
+{
+	Dwarf_Die tmp_die;
+
+	sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
+	if (!sp_die)
+		return NULL;
+
+	/* Inlined function could be recursive. Trace it until fail */
+	while (sp_die) {
+		memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
+		sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
+					&tmp_die);
+	}
+
+	return die_mem;
+}
+
+struct __instance_walk_param {
+	void    *addr;
+	int	(*callback)(Dwarf_Die *, void *);
+	void    *data;
+	int	retval;
+};
+
+static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
+{
+	struct __instance_walk_param *iwp = data;
+	Dwarf_Attribute attr_mem;
+	Dwarf_Die origin_mem;
+	Dwarf_Attribute *attr;
+	Dwarf_Die *origin;
+	int tmp;
+
+	attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem);
+	if (attr == NULL)
+		return DIE_FIND_CB_CONTINUE;
+
+	origin = dwarf_formref_die(attr, &origin_mem);
+	if (origin == NULL || origin->addr != iwp->addr)
+		return DIE_FIND_CB_CONTINUE;
+
+	/* Ignore redundant instances */
+	if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) {
+		dwarf_decl_line(origin, &tmp);
+		if (die_get_call_lineno(inst) == tmp) {
+			tmp = die_get_decl_fileno(origin);
+			if (die_get_call_fileno(inst) == tmp)
+				return DIE_FIND_CB_CONTINUE;
+		}
+	}
+
+	iwp->retval = iwp->callback(inst, iwp->data);
+
+	return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_walk_instances - Walk on instances of given DIE
+ * @or_die: an abstract original DIE
+ * @callback: a callback function which is called with instance DIE
+ * @data: user data
+ *
+ * Walk on the instances of give @in_die. @in_die must be an inlined function
+ * declartion. This returns the return value of @callback if it returns
+ * non-zero value, or -ENOENT if there is no instance.
+ */
+int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
+		       void *data)
+{
+	Dwarf_Die cu_die;
+	Dwarf_Die die_mem;
+	struct __instance_walk_param iwp = {
+		.addr = or_die->addr,
+		.callback = callback,
+		.data = data,
+		.retval = -ENOENT,
+	};
+
+	if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL)
+		return -ENOENT;
+
+	die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem);
+
+	return iwp.retval;
+}
+
+/* Line walker internal parameters */
+struct __line_walk_param {
+	bool recursive;
+	line_walk_callback_t callback;
+	void *data;
+	int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+	struct __line_walk_param *lw = data;
+	Dwarf_Addr addr = 0;
+	const char *fname;
+	int lineno;
+
+	if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+		fname = die_get_call_file(in_die);
+		lineno = die_get_call_lineno(in_die);
+		if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
+			lw->retval = lw->callback(fname, lineno, addr, lw->data);
+			if (lw->retval != 0)
+				return DIE_FIND_CB_END;
+		}
+	}
+	if (!lw->recursive)
+		/* Don't need to search recursively */
+		return DIE_FIND_CB_SIBLING;
+
+	if (addr) {
+		fname = dwarf_decl_file(in_die);
+		if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
+			lw->retval = lw->callback(fname, lineno, addr, lw->data);
+			if (lw->retval != 0)
+				return DIE_FIND_CB_END;
+		}
+	}
+
+	/* Continue to search nested inlined function call-sites */
+	return DIE_FIND_CB_CONTINUE;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
+				line_walk_callback_t callback, void *data)
+{
+	struct __line_walk_param lw = {
+		.recursive = recursive,
+		.callback = callback,
+		.data = data,
+		.retval = 0,
+	};
+	Dwarf_Die die_mem;
+	Dwarf_Addr addr;
+	const char *fname;
+	int lineno;
+
+	/* Handle function declaration line */
+	fname = dwarf_decl_file(sp_die);
+	if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+	    dwarf_entrypc(sp_die, &addr) == 0) {
+		lw.retval = callback(fname, lineno, addr, data);
+		if (lw.retval != 0)
+			goto done;
+	}
+	die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+	return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct __line_walk_param *lw = data;
+
+	lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data);
+	if (lw->retval != 0)
+		return DWARF_CB_ABORT;
+
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_walk_lines - Walk on lines inside given DIE
+ * @rt_die: a root DIE (CU, subprogram or inlined_subroutine)
+ * @callback: callback routine
+ * @data: user data
+ *
+ * Walk on all lines inside given @rt_die and call @callback on each line.
+ * If the @rt_die is a function, walk only on the lines inside the function,
+ * otherwise @rt_die must be a CU DIE.
+ * Note that this walks not only dwarf line list, but also function entries
+ * and inline call-site.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
+{
+	Dwarf_Lines *lines;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	const char *fname, *decf = NULL;
+	int lineno, ret = 0;
+	int decl = 0, inl;
+	Dwarf_Die die_mem, *cu_die;
+	size_t nlines, i;
+
+	/* Get the CU die */
+	if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
+		cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
+		dwarf_decl_line(rt_die, &decl);
+		decf = dwarf_decl_file(rt_die);
+	} else
+		cu_die = rt_die;
+	if (!cu_die) {
+		pr_debug2("Failed to get CU from given DIE.\n");
+		return -EINVAL;
+	}
+
+	/* Get lines list in the CU */
+	if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+		pr_debug2("Failed to get source lines on this CU.\n");
+		return -ENOENT;
+	}
+	pr_debug2("Get %zd lines from this CU\n", nlines);
+
+	/* Walk on the lines on lines list */
+	for (i = 0; i < nlines; i++) {
+		line = dwarf_onesrcline(lines, i);
+		if (line == NULL ||
+		    dwarf_lineno(line, &lineno) != 0 ||
+		    dwarf_lineaddr(line, &addr) != 0) {
+			pr_debug2("Failed to get line info. "
+				  "Possible error in debuginfo.\n");
+			continue;
+		}
+		/* Filter lines based on address */
+		if (rt_die != cu_die) {
+			/*
+			 * Address filtering
+			 * The line is included in given function, and
+			 * no inline block includes it.
+			 */
+			if (!dwarf_haspc(rt_die, addr))
+				continue;
+			if (die_find_inlinefunc(rt_die, addr, &die_mem)) {
+				dwarf_decl_line(&die_mem, &inl);
+				if (inl != decl ||
+				    decf != dwarf_decl_file(&die_mem))
+					continue;
+			}
+		}
+		/* Get source line */
+		fname = dwarf_linesrc(line, NULL, NULL);
+
+		ret = callback(fname, lineno, addr, data);
+		if (ret != 0)
+			return ret;
+	}
+
+	/*
+	 * Dwarf lines doesn't include function declarations and inlined
+	 * subroutines. We have to check functions list or given function.
+	 */
+	if (rt_die != cu_die)
+		/*
+		 * Don't need walk functions recursively, because nested
+		 * inlined functions don't have lines of the specified DIE.
+		 */
+		ret = __die_walk_funclines(rt_die, false, callback, data);
+	else {
+		struct __line_walk_param param = {
+			.callback = callback,
+			.data = data,
+			.retval = 0,
+		};
+		dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+		ret = param.retval;
+	}
+
+	return ret;
+}
+
+struct __find_variable_param {
+	const char *name;
+	Dwarf_Addr addr;
+};
+
+static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct __find_variable_param *fvp = data;
+	Dwarf_Attribute attr;
+	int tag;
+
+	tag = dwarf_tag(die_mem);
+	if ((tag == DW_TAG_formal_parameter ||
+	     tag == DW_TAG_variable) &&
+	    die_compare_name(die_mem, fvp->name) &&
+	/* Does the DIE have location information or external instance? */
+	    (dwarf_attr(die_mem, DW_AT_external, &attr) ||
+	     dwarf_attr(die_mem, DW_AT_location, &attr)))
+		return DIE_FIND_CB_END;
+	if (dwarf_haspc(die_mem, fvp->addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_at - Find a given name variable at given address
+ * @sp_die: a function DIE
+ * @name: variable name
+ * @addr: address
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a variable DIE called @name at @addr in @sp_die.
+ */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+				Dwarf_Addr addr, Dwarf_Die *die_mem)
+{
+	struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+	return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
+			      die_mem);
+}
+
+static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
+{
+	const char *name = data;
+
+	if (dwarf_tag(die_mem) == DW_TAG_member) {
+		if (die_compare_name(die_mem, name))
+			return DIE_FIND_CB_END;
+		else if (!dwarf_diename(die_mem)) {	/* Unnamed structure */
+			Dwarf_Die type_die, tmp_die;
+			if (die_get_type(die_mem, &type_die) &&
+			    die_find_member(&type_die, name, &tmp_die))
+				return DIE_FIND_CB_END;
+		}
+	}
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_member - Find a given name member in a data structure
+ * @st_die: a data structure type DIE
+ * @name: member name
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a member DIE called @name in @st_die.
+ */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+			   Dwarf_Die *die_mem)
+{
+	return die_find_child(st_die, __die_find_member_cb, (void *)name,
+			      die_mem);
+}
+
+/**
+ * die_get_typename - Get the name of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for result type name
+ *
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
+ * Note that the result will stores typedef name if possible, and stores
+ * "*(function_type)" if the type is a function pointer.
+ */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	Dwarf_Die type;
+	int tag, ret;
+	const char *tmp = "";
+
+	if (__die_get_real_type(vr_die, &type) == NULL)
+		return -ENOENT;
+
+	tag = dwarf_tag(&type);
+	if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+		tmp = "*";
+	else if (tag == DW_TAG_subroutine_type) {
+		/* Function pointer */
+		return strbuf_add(buf, "(function_type)", 15);
+	} else {
+		if (!dwarf_diename(&type))
+			return -ENOENT;
+		if (tag == DW_TAG_union_type)
+			tmp = "union ";
+		else if (tag == DW_TAG_structure_type)
+			tmp = "struct ";
+		else if (tag == DW_TAG_enumeration_type)
+			tmp = "enum ";
+		/* Write a base name */
+		return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+	}
+	ret = die_get_typename(&type, buf);
+	return ret ? ret : strbuf_addstr(buf, tmp);
+}
+
+/**
+ * die_get_varname - Get the name and type of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name
+ *
+ * Get the name and type of @vr_die and stores it in @buf as "type\tname".
+ */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	int ret;
+
+	ret = die_get_typename(vr_die, buf);
+	if (ret < 0) {
+		pr_debug("Failed to get type, make it unknown.\n");
+		ret = strbuf_add(buf, " (unknown_type)", 14);
+	}
+
+	return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+}
+
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+				struct strbuf *buf)
+{
+	Dwarf_Die *scopes;
+	int count;
+	size_t offset = 0;
+	Dwarf_Addr base;
+	Dwarf_Addr start, end;
+	Dwarf_Addr entry;
+	int ret;
+	bool first = true;
+	const char *name;
+
+	ret = dwarf_entrypc(sp_die, &entry);
+	if (ret)
+		return ret;
+
+	name = dwarf_diename(sp_die);
+	if (!name)
+		return -ENOENT;
+
+	count = dwarf_getscopes_die(vr_die, &scopes);
+
+	/* (*SCOPES)[1] is the DIE for the scope containing that scope */
+	if (count <= 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+					&start, &end)) > 0) {
+		start -= entry;
+		end -= entry;
+
+		if (first) {
+			ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+					  name, start, end);
+			first = false;
+		} else {
+			ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+					  start, end);
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	if (!first)
+		ret = strbuf_add(buf, "]>", 2);
+
+out:
+	free(scopes);
+	return ret;
+}
+
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	int ret = 0;
+	Dwarf_Addr base;
+	Dwarf_Addr start, end;
+	Dwarf_Addr entry;
+	Dwarf_Op *op;
+	size_t nops;
+	size_t offset = 0;
+	Dwarf_Attribute attr;
+	bool first = true;
+	const char *name;
+
+	ret = dwarf_entrypc(sp_die, &entry);
+	if (ret)
+		return ret;
+
+	name = dwarf_diename(sp_die);
+	if (!name)
+		return -ENOENT;
+
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+		return -EINVAL;
+
+	while ((offset = dwarf_getlocations(&attr, offset, &base,
+					&start, &end, &op, &nops)) > 0) {
+		if (start == 0) {
+			/* Single Location Descriptions */
+			ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+			goto out;
+		}
+
+		/* Location Lists */
+		start -= entry;
+		end -= entry;
+		if (first) {
+			ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+					  name, start, end);
+			first = false;
+		} else {
+			ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+					  start, end);
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	if (!first)
+		ret = strbuf_add(buf, "]>", 2);
+out:
+	return ret;
+}
+#else
+int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
+		      Dwarf_Die *vr_die __maybe_unused,
+		      struct strbuf *buf __maybe_unused)
+{
+	return -ENOTSUP;
+}
+#endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+	Dwarf_Attribute loc;
+	int tag = dwarf_tag(vr_die);
+
+	if (tag != DW_TAG_formal_parameter &&
+	    tag != DW_TAG_variable)
+		return false;
+
+	return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+		dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+	Dwarf_Die tmp_die;
+
+	if (die_has_loclist(cu_die))
+		return true;
+
+	if (!dwarf_child(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	if (!dwarf_siblingof(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+			   Dwarf_Addr addr, unsigned long *idx)
+{
+	unsigned long i;
+	Dwarf_Addr tmp;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+			return false;
+
+		if (tmp == addr) {
+			*idx = i;
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+				      Dwarf_Lines *lines,
+				      unsigned long nr_lines,
+				      Dwarf_Addr highpc,
+				      Dwarf_Addr *postprologue_addr)
+{
+	unsigned long i;
+	int entrypc_lno, lno;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	bool p_end;
+
+	/* entrypc_lno is actual source line number */
+	line = dwarf_onesrcline(lines, entrypc_idx);
+	if (dwarf_lineno(line, &entrypc_lno))
+		return false;
+
+	for (i = entrypc_idx; i < nr_lines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		if (dwarf_lineaddr(line, &addr) ||
+		    dwarf_lineno(line, &lno)    ||
+		    dwarf_lineprologueend(line, &p_end))
+			return false;
+
+		/* highpc is exclusive. [entrypc,highpc) */
+		if (addr >= highpc)
+			break;
+
+		/* clang supports prologue-end marker */
+		if (p_end)
+			break;
+
+		/* Actual next line in source */
+		if (lno != entrypc_lno)
+			break;
+
+		/*
+		 * Single source line can have multiple line records.
+		 * For Example,
+		 *     void foo() { printf("hello\n"); }
+		 * contains two line records. One points to declaration and
+		 * other points to printf() line. Variable 'lno' won't get
+		 * incremented in this case but 'i' will.
+		 */
+		if (i != entrypc_idx)
+			break;
+	}
+
+	dwarf_lineaddr(line, postprologue_addr);
+	if (*postprologue_addr >= highpc)
+		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+			       postprologue_addr);
+
+	return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc)
+{
+	size_t nr_lines = 0;
+	unsigned long entrypc_idx = 0;
+	Dwarf_Lines *lines = NULL;
+	Dwarf_Addr postprologue_addr;
+	Dwarf_Addr highpc;
+
+	if (dwarf_highpc(sp_die, &highpc))
+		return;
+
+	if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+		return;
+
+	if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+		return;
+
+	if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+				       highpc, &postprologue_addr))
+		return;
+
+	*entrypc = postprologue_addr;
+}
diff --git a/util/dwarf-aux.h b/util/dwarf-aux.h
new file mode 100644
index 0000000..8ac53bf
--- /dev/null
+++ b/util/dwarf-aux.h
@@ -0,0 +1,139 @@
+#ifndef _DWARF_AUX_H
+#define _DWARF_AUX_H
+/*
+ * dwarf-aux.h : libdw auxiliary interfaces
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
+
+/* Find the realpath of the target file */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+
+/* Get DW_AT_comp_dir (should be NULL with older gcc) */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+
+/* Get a line number and file name for given address */
+int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
+		     const char **fname, int *lineno);
+
+/* Walk on funcitons at given address */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			 int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
+/* Ensure that this DIE is a subprogram and definition (not declaration) */
+bool die_is_func_def(Dwarf_Die *dw_die);
+
+/* Ensure that this DIE is an instance of a subprogram */
+bool die_is_func_instance(Dwarf_Die *dw_die);
+
+/* Compare diename and tname */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+
+/* Matching diename with glob pattern */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
+/* Get callsite line number of inline-function instance */
+int die_get_call_lineno(Dwarf_Die *in_die);
+
+/* Get callsite file name of inlined function instance */
+const char *die_get_call_file(Dwarf_Die *in_die);
+
+/* Get type die */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Get a type die, but skip qualifiers and typedef */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Check whether the DIE is signed or not */
+bool die_is_signed_type(Dwarf_Die *tp_die);
+
+/* Get data_member_location offset */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+
+/* Return values for die_find_child() callbacks */
+enum {
+	DIE_FIND_CB_END = 0,		/* End of Search */
+	DIE_FIND_CB_CHILD = 1,		/* Search only children */
+	DIE_FIND_CB_SIBLING = 2,	/* Search only siblings */
+	DIE_FIND_CB_CONTINUE = 3,	/* Search children and siblings */
+};
+
+/* Search child DIEs */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+			 int (*callback)(Dwarf_Die *, void *),
+			 void *data, Dwarf_Die *die_mem);
+
+/* Search a non-inlined function including given address */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			     Dwarf_Die *die_mem);
+
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem);
+
+/* Search the top inlined function including given address */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				   Dwarf_Die *die_mem);
+
+/* Search the deepest inlined function including given address */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+			       Dwarf_Die *die_mem);
+
+/* Walk on the instances of given DIE */
+int die_walk_instances(Dwarf_Die *in_die,
+		       int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_callback_t) (const char *fname, int lineno,
+				      Dwarf_Addr addr, void *data);
+
+/*
+ * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
+ * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
+
+/* Find a variable called 'name' at given address */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+				Dwarf_Addr addr, Dwarf_Die *die_mem);
+
+/* Find a member called 'name' */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+			   Dwarf_Die *die_mem);
+
+/* Get the name of given variable DIE */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc);
+
+#endif
diff --git a/util/dwarf-regs.c b/util/dwarf-regs.c
new file mode 100644
index 0000000..db55edd
--- /dev/null
+++ b/util/dwarf-regs.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <util.h>
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+#include <linux/kernel.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183  /* ARM 64 bit */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+	switch (machine) {
+	case EM_NONE:	/* Generic arch - use host arch */
+		return get_arch_regstr(n);
+	case EM_386:
+		return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+	case EM_X86_64:
+		return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+	case EM_ARM:
+		return __get_dwarf_regstr(arm_regstr_tbl, n);
+	case EM_AARCH64:
+		return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+	case EM_SH:
+		return __get_dwarf_regstr(sh_regstr_tbl, n);
+	case EM_S390:
+		return __get_dwarf_regstr(s390_regstr_tbl, n);
+	case EM_PPC:
+	case EM_PPC64:
+		return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+	case EM_SPARC:
+	case EM_SPARCV9:
+		return __get_dwarf_regstr(sparc_regstr_tbl, n);
+	case EM_XTENSA:
+		return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+	default:
+		pr_err("ELF MACHINE %x is not supported.\n", machine);
+	}
+	return NULL;
+}
diff --git a/util/env.c b/util/env.c
new file mode 100644
index 0000000..4c84276
--- /dev/null
+++ b/util/env.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cpumap.h"
+#include "env.h"
+#include "sane_ctype.h"
+#include "util.h"
+#include <errno.h>
+#include <sys/utsname.h>
+
+struct perf_env perf_env;
+
+void perf_env__exit(struct perf_env *env)
+{
+	int i;
+
+	zfree(&env->hostname);
+	zfree(&env->os_release);
+	zfree(&env->version);
+	zfree(&env->arch);
+	zfree(&env->cpu_desc);
+	zfree(&env->cpuid);
+	zfree(&env->cmdline);
+	zfree(&env->cmdline_argv);
+	zfree(&env->sibling_cores);
+	zfree(&env->sibling_threads);
+	zfree(&env->pmu_mappings);
+	zfree(&env->cpu);
+
+	for (i = 0; i < env->nr_numa_nodes; i++)
+		cpu_map__put(env->numa_nodes[i].map);
+	zfree(&env->numa_nodes);
+
+	for (i = 0; i < env->caches_cnt; i++)
+		cpu_cache_level__free(&env->caches[i]);
+	zfree(&env->caches);
+
+	for (i = 0; i < env->nr_memory_nodes; i++)
+		free(env->memory_nodes[i].set);
+	zfree(&env->memory_nodes);
+}
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
+{
+	int i;
+
+	/* do not include NULL termination */
+	env->cmdline_argv = calloc(argc, sizeof(char *));
+	if (env->cmdline_argv == NULL)
+		goto out_enomem;
+
+	/*
+	 * Must copy argv contents because it gets moved around during option
+	 * parsing:
+	 */
+	for (i = 0; i < argc ; i++) {
+		env->cmdline_argv[i] = argv[i];
+		if (env->cmdline_argv[i] == NULL)
+			goto out_free;
+	}
+
+	env->nr_cmdline = argc;
+
+	return 0;
+out_free:
+	zfree(&env->cmdline_argv);
+out_enomem:
+	return -ENOMEM;
+}
+
+int perf_env__read_cpu_topology_map(struct perf_env *env)
+{
+	int cpu, nr_cpus;
+
+	if (env->cpu != NULL)
+		return 0;
+
+	if (env->nr_cpus_avail == 0)
+		env->nr_cpus_avail = cpu__max_present_cpu();
+
+	nr_cpus = env->nr_cpus_avail;
+	if (nr_cpus == -1)
+		return -EINVAL;
+
+	env->cpu = calloc(nr_cpus, sizeof(env->cpu[0]));
+	if (env->cpu == NULL)
+		return -ENOMEM;
+
+	for (cpu = 0; cpu < nr_cpus; ++cpu) {
+		env->cpu[cpu].core_id	= cpu_map__get_core_id(cpu);
+		env->cpu[cpu].socket_id	= cpu_map__get_socket_id(cpu);
+	}
+
+	env->nr_cpus_avail = nr_cpus;
+	return 0;
+}
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+	free(cache->type);
+	free(cache->map);
+	free(cache->size);
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+	if (!strcmp(arch, "x86_64"))
+		return "x86";
+	if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+		return "x86";
+	if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+		return "sparc";
+	if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
+		return "arm64";
+	if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+		return "arm";
+	if (!strncmp(arch, "s390", 4))
+		return "s390";
+	if (!strncmp(arch, "parisc", 6))
+		return "parisc";
+	if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+		return "powerpc";
+	if (!strncmp(arch, "mips", 4))
+		return "mips";
+	if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+		return "sh";
+
+	return arch;
+}
+
+const char *perf_env__arch(struct perf_env *env)
+{
+	struct utsname uts;
+	char *arch_name;
+
+	if (!env) { /* Assume local operation */
+		if (uname(&uts) < 0)
+			return NULL;
+		arch_name = uts.machine;
+	} else
+		arch_name = env->arch;
+
+	return normalize_arch(arch_name);
+}
diff --git a/util/env.h b/util/env.h
new file mode 100644
index 0000000..c4ef2e5
--- /dev/null
+++ b/util/env.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ENV_H
+#define __PERF_ENV_H
+
+#include <linux/types.h>
+#include "cpumap.h"
+
+struct cpu_topology_map {
+	int	socket_id;
+	int	core_id;
+};
+
+struct cpu_cache_level {
+	u32	level;
+	u32	line_size;
+	u32	sets;
+	u32	ways;
+	char	*type;
+	char	*size;
+	char	*map;
+};
+
+struct numa_node {
+	u32		 node;
+	u64		 mem_total;
+	u64		 mem_free;
+	struct cpu_map	*map;
+};
+
+struct memory_node {
+	u64		 node;
+	u64		 size;
+	unsigned long	*set;
+};
+
+struct perf_env {
+	char			*hostname;
+	char			*os_release;
+	char			*version;
+	char			*arch;
+	int			nr_cpus_online;
+	int			nr_cpus_avail;
+	char			*cpu_desc;
+	char			*cpuid;
+	unsigned long long	total_mem;
+	unsigned int		msr_pmu_type;
+
+	int			nr_cmdline;
+	int			nr_sibling_cores;
+	int			nr_sibling_threads;
+	int			nr_numa_nodes;
+	int			nr_memory_nodes;
+	int			nr_pmu_mappings;
+	int			nr_groups;
+	char			*cmdline;
+	const char		**cmdline_argv;
+	char			*sibling_cores;
+	char			*sibling_threads;
+	char			*pmu_mappings;
+	struct cpu_topology_map	*cpu;
+	struct cpu_cache_level	*caches;
+	int			 caches_cnt;
+	struct numa_node	*numa_nodes;
+	struct memory_node	*memory_nodes;
+	unsigned long long	 memory_bsize;
+};
+
+extern struct perf_env perf_env;
+
+void perf_env__exit(struct perf_env *env);
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
+
+int perf_env__read_cpu_topology_map(struct perf_env *env);
+
+void cpu_cache_level__free(struct cpu_cache_level *cache);
+
+const char *perf_env__arch(struct perf_env *env);
+#endif /* __PERF_ENV_H */
diff --git a/util/event.c b/util/event.c
new file mode 100644
index 0000000..98ff3a6
--- /dev/null
+++ b/util/event.c
@@ -0,0 +1,1682 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
+#include <linux/perf_event.h>
+#include "event.h"
+#include "debug.h"
+#include "hist.h"
+#include "machine.h"
+#include "sort.h"
+#include "string2.h"
+#include "strlist.h"
+#include "thread.h"
+#include "thread_map.h"
+#include "sane_ctype.h"
+#include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
+
+static const char *perf_event__names[] = {
+	[0]					= "TOTAL",
+	[PERF_RECORD_MMAP]			= "MMAP",
+	[PERF_RECORD_MMAP2]			= "MMAP2",
+	[PERF_RECORD_LOST]			= "LOST",
+	[PERF_RECORD_COMM]			= "COMM",
+	[PERF_RECORD_EXIT]			= "EXIT",
+	[PERF_RECORD_THROTTLE]			= "THROTTLE",
+	[PERF_RECORD_UNTHROTTLE]		= "UNTHROTTLE",
+	[PERF_RECORD_FORK]			= "FORK",
+	[PERF_RECORD_READ]			= "READ",
+	[PERF_RECORD_SAMPLE]			= "SAMPLE",
+	[PERF_RECORD_AUX]			= "AUX",
+	[PERF_RECORD_ITRACE_START]		= "ITRACE_START",
+	[PERF_RECORD_LOST_SAMPLES]		= "LOST_SAMPLES",
+	[PERF_RECORD_SWITCH]			= "SWITCH",
+	[PERF_RECORD_SWITCH_CPU_WIDE]		= "SWITCH_CPU_WIDE",
+	[PERF_RECORD_NAMESPACES]		= "NAMESPACES",
+	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
+	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
+	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
+	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
+	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
+	[PERF_RECORD_ID_INDEX]			= "ID_INDEX",
+	[PERF_RECORD_AUXTRACE_INFO]		= "AUXTRACE_INFO",
+	[PERF_RECORD_AUXTRACE]			= "AUXTRACE",
+	[PERF_RECORD_AUXTRACE_ERROR]		= "AUXTRACE_ERROR",
+	[PERF_RECORD_THREAD_MAP]		= "THREAD_MAP",
+	[PERF_RECORD_CPU_MAP]			= "CPU_MAP",
+	[PERF_RECORD_STAT_CONFIG]		= "STAT_CONFIG",
+	[PERF_RECORD_STAT]			= "STAT",
+	[PERF_RECORD_STAT_ROUND]		= "STAT_ROUND",
+	[PERF_RECORD_EVENT_UPDATE]		= "EVENT_UPDATE",
+	[PERF_RECORD_TIME_CONV]			= "TIME_CONV",
+	[PERF_RECORD_HEADER_FEATURE]		= "FEATURE",
+};
+
+static const char *perf_ns__names[] = {
+	[NET_NS_INDEX]		= "net",
+	[UTS_NS_INDEX]		= "uts",
+	[IPC_NS_INDEX]		= "ipc",
+	[PID_NS_INDEX]		= "pid",
+	[USER_NS_INDEX]		= "user",
+	[MNT_NS_INDEX]		= "mnt",
+	[CGROUP_NS_INDEX]	= "cgroup",
+};
+
+const char *perf_event__name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(perf_event__names))
+		return "INVALID";
+	if (!perf_event__names[id])
+		return "UNKNOWN";
+	return perf_event__names[id];
+}
+
+static const char *perf_ns__name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(perf_ns__names))
+		return "UNKNOWN";
+	return perf_ns__names[id];
+}
+
+static int perf_tool__process_synth_event(struct perf_tool *tool,
+					  union perf_event *event,
+					  struct machine *machine,
+					  perf_event__handler_t process)
+{
+	struct perf_sample synth_sample = {
+	.pid	   = -1,
+	.tid	   = -1,
+	.time	   = -1,
+	.stream_id = -1,
+	.cpu	   = -1,
+	.period	   = 1,
+	.cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+	};
+
+	return process(tool, event, &synth_sample, machine);
+};
+
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
+				    pid_t *tgid, pid_t *ppid)
+{
+	char filename[PATH_MAX];
+	char bf[4096];
+	int fd;
+	size_t size = 0;
+	ssize_t n;
+	char *name, *tgids, *ppids;
+
+	*tgid = -1;
+	*ppid = -1;
+
+	snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("couldn't open %s\n", filename);
+		return -1;
+	}
+
+	n = read(fd, bf, sizeof(bf) - 1);
+	close(fd);
+	if (n <= 0) {
+		pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+			   pid);
+		return -1;
+	}
+	bf[n] = '\0';
+
+	name = strstr(bf, "Name:");
+	tgids = strstr(bf, "Tgid:");
+	ppids = strstr(bf, "PPid:");
+
+	if (name) {
+		char *nl;
+
+		name += 5;  /* strlen("Name:") */
+		name = ltrim(name);
+
+		nl = strchr(name, '\n');
+		if (nl)
+			*nl = '\0';
+
+		size = strlen(name);
+		if (size >= len)
+			size = len - 1;
+		memcpy(comm, name, size);
+		comm[size] = '\0';
+	} else {
+		pr_debug("Name: string not found for pid %d\n", pid);
+	}
+
+	if (tgids) {
+		tgids += 5;  /* strlen("Tgid:") */
+		*tgid = atoi(tgids);
+	} else {
+		pr_debug("Tgid: string not found for pid %d\n", pid);
+	}
+
+	if (ppids) {
+		ppids += 5;  /* strlen("PPid:") */
+		*ppid = atoi(ppids);
+	} else {
+		pr_debug("PPid: string not found for pid %d\n", pid);
+	}
+
+	return 0;
+}
+
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+				    struct machine *machine,
+				    pid_t *tgid, pid_t *ppid)
+{
+	size_t size;
+
+	*ppid = -1;
+
+	memset(&event->comm, 0, sizeof(event->comm));
+
+	if (machine__is_host(machine)) {
+		if (perf_event__get_comm_ids(pid, event->comm.comm,
+					     sizeof(event->comm.comm),
+					     tgid, ppid) != 0) {
+			return -1;
+		}
+	} else {
+		*tgid = machine->pid;
+	}
+
+	if (*tgid < 0)
+		return -1;
+
+	event->comm.pid = *tgid;
+	event->comm.header.type = PERF_RECORD_COMM;
+
+	size = strlen(event->comm.comm) + 1;
+	size = PERF_ALIGN(size, sizeof(u64));
+	memset(event->comm.comm + size, 0, machine->id_hdr_size);
+	event->comm.header.size = (sizeof(event->comm) -
+				(sizeof(event->comm.comm) - size) +
+				machine->id_hdr_size);
+	event->comm.tid = pid;
+
+	return 0;
+}
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+					 union perf_event *event, pid_t pid,
+					 perf_event__handler_t process,
+					 struct machine *machine)
+{
+	pid_t tgid, ppid;
+
+	if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
+		return -1;
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+		return -1;
+
+	return tgid;
+}
+
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+					 struct perf_ns_link_info *ns_link_info)
+{
+	struct stat64 st;
+	char proc_ns[128];
+
+	sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+	if (stat64(proc_ns, &st) == 0) {
+		ns_link_info->dev = st.st_dev;
+		ns_link_info->ino = st.st_ino;
+	}
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+				      union perf_event *event,
+				      pid_t pid, pid_t tgid,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	u32 idx;
+	struct perf_ns_link_info *ns_link_info;
+
+	if (!tool || !tool->namespace_events)
+		return 0;
+
+	memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+	       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+	       machine->id_hdr_size));
+
+	event->namespaces.pid = tgid;
+	event->namespaces.tid = pid;
+
+	event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+	ns_link_info = event->namespaces.link_info;
+
+	for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+		perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+					     &ns_link_info[idx]);
+
+	event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+	event->namespaces.header.size = (sizeof(event->namespaces) +
+			(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+			machine->id_hdr_size);
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+		return -1;
+
+	return 0;
+}
+
+static int perf_event__synthesize_fork(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid, pid_t ppid,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
+
+	/*
+	 * for main thread set parent to ppid from status file. For other
+	 * threads set parent pid to main thread. ie., assume main thread
+	 * spawns all threads in a process
+	*/
+	if (tgid == pid) {
+		event->fork.ppid = ppid;
+		event->fork.ptid = ppid;
+	} else {
+		event->fork.ppid = tgid;
+		event->fork.ptid = tgid;
+	}
+	event->fork.pid  = tgid;
+	event->fork.tid  = pid;
+	event->fork.header.type = PERF_RECORD_FORK;
+
+	event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
+
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+		return -1;
+
+	return 0;
+}
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid,
+				       perf_event__handler_t process,
+				       struct machine *machine,
+				       bool mmap_data,
+				       unsigned int proc_map_timeout)
+{
+	char filename[PATH_MAX];
+	FILE *fp;
+	unsigned long long t;
+	bool truncation = false;
+	unsigned long long timeout = proc_map_timeout * 1000000ULL;
+	int rc = 0;
+	const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+	int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
+		 machine->root_dir, pid, pid);
+
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		/*
+		 * We raced with a task exiting - just return:
+		 */
+		pr_debug("couldn't open %s\n", filename);
+		return -1;
+	}
+
+	event->header.type = PERF_RECORD_MMAP2;
+	t = rdclock();
+
+	while (1) {
+		char bf[BUFSIZ];
+		char prot[5];
+		char execname[PATH_MAX];
+		char anonstr[] = "//anon";
+		unsigned int ino;
+		size_t size;
+		ssize_t n;
+
+		if (fgets(bf, sizeof(bf), fp) == NULL)
+			break;
+
+		if ((rdclock() - t) > timeout) {
+			pr_warning("Reading %s time out. "
+				   "You may want to increase "
+				   "the time limit by --proc-map-timeout\n",
+				   filename);
+			truncation = true;
+			goto out;
+		}
+
+		/* ensure null termination since stack will be reused. */
+		strcpy(execname, "");
+
+		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
+		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
+		       &event->mmap2.start, &event->mmap2.len, prot,
+		       &event->mmap2.pgoff, &event->mmap2.maj,
+		       &event->mmap2.min,
+		       &ino, execname);
+
+		/*
+ 		 * Anon maps don't have the execname.
+ 		 */
+		if (n < 7)
+			continue;
+
+		event->mmap2.ino = (u64)ino;
+
+		/*
+		 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+		 */
+		if (machine__is_host(machine))
+			event->header.misc = PERF_RECORD_MISC_USER;
+		else
+			event->header.misc = PERF_RECORD_MISC_GUEST_USER;
+
+		/* map protection and flags bits */
+		event->mmap2.prot = 0;
+		event->mmap2.flags = 0;
+		if (prot[0] == 'r')
+			event->mmap2.prot |= PROT_READ;
+		if (prot[1] == 'w')
+			event->mmap2.prot |= PROT_WRITE;
+		if (prot[2] == 'x')
+			event->mmap2.prot |= PROT_EXEC;
+
+		if (prot[3] == 's')
+			event->mmap2.flags |= MAP_SHARED;
+		else
+			event->mmap2.flags |= MAP_PRIVATE;
+
+		if (prot[2] != 'x') {
+			if (!mmap_data || prot[0] != 'r')
+				continue;
+
+			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+		}
+
+out:
+		if (truncation)
+			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
+		if (!strcmp(execname, ""))
+			strcpy(execname, anonstr);
+
+		if (hugetlbfs_mnt_len &&
+		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+			strcpy(execname, anonstr);
+			event->mmap2.flags |= MAP_HUGETLB;
+		}
+
+		size = strlen(execname) + 1;
+		memcpy(event->mmap2.filename, execname, size);
+		size = PERF_ALIGN(size, sizeof(u64));
+		event->mmap2.len -= event->mmap.start;
+		event->mmap2.header.size = (sizeof(event->mmap2) -
+					(sizeof(event->mmap2.filename) - size));
+		memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+		event->mmap2.header.size += machine->id_hdr_size;
+		event->mmap2.pid = tgid;
+		event->mmap2.tid = pid;
+
+		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+			rc = -1;
+			break;
+		}
+
+		if (truncation)
+			break;
+	}
+
+	fclose(fp);
+	return rc;
+}
+
+int perf_event__synthesize_modules(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine)
+{
+	int rc = 0;
+	struct map *pos;
+	struct map_groups *kmaps = &machine->kmaps;
+	struct maps *maps = &kmaps->maps[MAP__FUNCTION];
+	union perf_event *event = zalloc((sizeof(event->mmap) +
+					  machine->id_hdr_size));
+	if (event == NULL) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for kernel modules\n");
+		return -1;
+	}
+
+	event->header.type = PERF_RECORD_MMAP;
+
+	/*
+	 * kernel uses 0 for user space maps, see kernel/perf_event.c
+	 * __perf_event_mmap
+	 */
+	if (machine__is_host(machine))
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
+	else
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		size_t size;
+
+		if (__map__is_kernel(pos))
+			continue;
+
+		size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+		event->mmap.header.type = PERF_RECORD_MMAP;
+		event->mmap.header.size = (sizeof(event->mmap) -
+				        (sizeof(event->mmap.filename) - size));
+		memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+		event->mmap.header.size += machine->id_hdr_size;
+		event->mmap.start = pos->start;
+		event->mmap.len   = pos->end - pos->start;
+		event->mmap.pid   = machine->pid;
+
+		memcpy(event->mmap.filename, pos->dso->long_name,
+		       pos->dso->long_name_len + 1);
+		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+			rc = -1;
+			break;
+		}
+	}
+
+	free(event);
+	return rc;
+}
+
+static int __event__synthesize_thread(union perf_event *comm_event,
+				      union perf_event *mmap_event,
+				      union perf_event *fork_event,
+				      union perf_event *namespaces_event,
+				      pid_t pid, int full,
+				      perf_event__handler_t process,
+				      struct perf_tool *tool,
+				      struct machine *machine,
+				      bool mmap_data,
+				      unsigned int proc_map_timeout)
+{
+	char filename[PATH_MAX];
+	DIR *tasks;
+	struct dirent *dirent;
+	pid_t tgid, ppid;
+	int rc = 0;
+
+	/* special case: only send one comm event using passed in pid */
+	if (!full) {
+		tgid = perf_event__synthesize_comm(tool, comm_event, pid,
+						   process, machine);
+
+		if (tgid == -1)
+			return -1;
+
+		if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+						      tgid, process, machine) < 0)
+			return -1;
+
+
+		return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+							  process, machine, mmap_data,
+							  proc_map_timeout);
+	}
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(filename, sizeof(filename), "%s/proc/%d/task",
+		 machine->root_dir, pid);
+
+	tasks = opendir(filename);
+	if (tasks == NULL) {
+		pr_debug("couldn't open %s\n", filename);
+		return 0;
+	}
+
+	while ((dirent = readdir(tasks)) != NULL) {
+		char *end;
+		pid_t _pid;
+
+		_pid = strtol(dirent->d_name, &end, 10);
+		if (*end)
+			continue;
+
+		rc = -1;
+		if (perf_event__prepare_comm(comm_event, _pid, machine,
+					     &tgid, &ppid) != 0)
+			break;
+
+		if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+						ppid, process, machine) < 0)
+			break;
+
+		if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+						      tgid, process, machine) < 0)
+			break;
+
+		/*
+		 * Send the prepared comm event
+		 */
+		if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
+			break;
+
+		rc = 0;
+		if (_pid == pid) {
+			/* process the parent's maps too */
+			rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+						process, machine, mmap_data, proc_map_timeout);
+			if (rc)
+				break;
+		}
+	}
+
+	closedir(tasks);
+	return rc;
+}
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+				      struct thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine,
+				      bool mmap_data,
+				      unsigned int proc_map_timeout)
+{
+	union perf_event *comm_event, *mmap_event, *fork_event;
+	union perf_event *namespaces_event;
+	int err = -1, thread, j;
+
+	comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+	if (comm_event == NULL)
+		goto out;
+
+	mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+	if (mmap_event == NULL)
+		goto out_free_comm;
+
+	fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+	if (fork_event == NULL)
+		goto out_free_mmap;
+
+	namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+				  (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+				  machine->id_hdr_size);
+	if (namespaces_event == NULL)
+		goto out_free_fork;
+
+	err = 0;
+	for (thread = 0; thread < threads->nr; ++thread) {
+		if (__event__synthesize_thread(comm_event, mmap_event,
+					       fork_event, namespaces_event,
+					       thread_map__pid(threads, thread), 0,
+					       process, tool, machine,
+					       mmap_data, proc_map_timeout)) {
+			err = -1;
+			break;
+		}
+
+		/*
+		 * comm.pid is set to thread group id by
+		 * perf_event__synthesize_comm
+		 */
+		if ((int) comm_event->comm.pid != thread_map__pid(threads, thread)) {
+			bool need_leader = true;
+
+			/* is thread group leader in thread_map? */
+			for (j = 0; j < threads->nr; ++j) {
+				if ((int) comm_event->comm.pid == thread_map__pid(threads, j)) {
+					need_leader = false;
+					break;
+				}
+			}
+
+			/* if not, generate events for it */
+			if (need_leader &&
+			    __event__synthesize_thread(comm_event, mmap_event,
+						       fork_event, namespaces_event,
+						       comm_event->comm.pid, 0,
+						       process, tool, machine,
+						       mmap_data, proc_map_timeout)) {
+				err = -1;
+				break;
+			}
+		}
+	}
+	free(namespaces_event);
+out_free_fork:
+	free(fork_event);
+out_free_mmap:
+	free(mmap_event);
+out_free_comm:
+	free(comm_event);
+out:
+	return err;
+}
+
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+					    perf_event__handler_t process,
+					    struct machine *machine,
+					    bool mmap_data,
+					    unsigned int proc_map_timeout,
+					    struct dirent **dirent,
+					    int start,
+					    int num)
+{
+	union perf_event *comm_event, *mmap_event, *fork_event;
+	union perf_event *namespaces_event;
+	int err = -1;
+	char *end;
+	pid_t pid;
+	int i;
+
+	comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+	if (comm_event == NULL)
+		goto out;
+
+	mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+	if (mmap_event == NULL)
+		goto out_free_comm;
+
+	fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+	if (fork_event == NULL)
+		goto out_free_mmap;
+
+	namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+				  (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+				  machine->id_hdr_size);
+	if (namespaces_event == NULL)
+		goto out_free_fork;
+
+	for (i = start; i < start + num; i++) {
+		if (!isdigit(dirent[i]->d_name[0]))
+			continue;
+
+		pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+		/* only interested in proper numerical dirents */
+		if (*end)
+			continue;
+		/*
+		 * We may race with exiting thread, so don't stop just because
+		 * one thread couldn't be synthesized.
+		 */
+		__event__synthesize_thread(comm_event, mmap_event, fork_event,
+					   namespaces_event, pid, 1, process,
+					   tool, machine, mmap_data,
+					   proc_map_timeout);
+	}
+	err = 0;
+
+	free(namespaces_event);
+out_free_fork:
+	free(fork_event);
+out_free_mmap:
+	free(mmap_event);
+out_free_comm:
+	free(comm_event);
+out:
+	return err;
+}
+
+struct synthesize_threads_arg {
+	struct perf_tool *tool;
+	perf_event__handler_t process;
+	struct machine *machine;
+	bool mmap_data;
+	unsigned int proc_map_timeout;
+	struct dirent **dirent;
+	int num;
+	int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+	struct synthesize_threads_arg *args = arg;
+
+	__perf_event__synthesize_threads(args->tool, args->process,
+					 args->machine, args->mmap_data,
+					 args->proc_map_timeout, args->dirent,
+					 args->start, args->num);
+	return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine,
+				   bool mmap_data,
+				   unsigned int proc_map_timeout,
+				   unsigned int nr_threads_synthesize)
+{
+	struct synthesize_threads_arg *args = NULL;
+	pthread_t *synthesize_threads = NULL;
+	char proc_path[PATH_MAX];
+	struct dirent **dirent;
+	int num_per_thread;
+	int m, n, i, j;
+	int thread_nr;
+	int base = 0;
+	int err = -1;
+
+
+	if (machine__is_default_guest(machine))
+		return 0;
+
+	snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+	n = scandir(proc_path, &dirent, 0, alphasort);
+	if (n < 0)
+		return err;
+
+	if (nr_threads_synthesize == UINT_MAX)
+		thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+	else
+		thread_nr = nr_threads_synthesize;
+
+	if (thread_nr <= 1) {
+		err = __perf_event__synthesize_threads(tool, process,
+						       machine, mmap_data,
+						       proc_map_timeout,
+						       dirent, base, n);
+		goto free_dirent;
+	}
+	if (thread_nr > n)
+		thread_nr = n;
+
+	synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+	if (synthesize_threads == NULL)
+		goto free_dirent;
+
+	args = calloc(sizeof(*args), thread_nr);
+	if (args == NULL)
+		goto free_threads;
+
+	num_per_thread = n / thread_nr;
+	m = n % thread_nr;
+	for (i = 0; i < thread_nr; i++) {
+		args[i].tool = tool;
+		args[i].process = process;
+		args[i].machine = machine;
+		args[i].mmap_data = mmap_data;
+		args[i].proc_map_timeout = proc_map_timeout;
+		args[i].dirent = dirent;
+	}
+	for (i = 0; i < m; i++) {
+		args[i].num = num_per_thread + 1;
+		args[i].start = i * args[i].num;
+	}
+	if (i != 0)
+		base = args[i-1].start + args[i-1].num;
+	for (j = i; j < thread_nr; j++) {
+		args[j].num = num_per_thread;
+		args[j].start = base + (j - i) * args[i].num;
+	}
+
+	for (i = 0; i < thread_nr; i++) {
+		if (pthread_create(&synthesize_threads[i], NULL,
+				   synthesize_threads_worker, &args[i]))
+			goto out_join;
+	}
+	err = 0;
+out_join:
+	for (i = 0; i < thread_nr; i++)
+		pthread_join(synthesize_threads[i], NULL);
+	free(args);
+free_threads:
+	free(synthesize_threads);
+free_dirent:
+	for (i = 0; i < n; i++)
+		free(dirent[i]);
+	free(dirent);
+
+	return err;
+}
+
+struct process_symbol_args {
+	const char *name;
+	u64	   start;
+};
+
+static int find_symbol_cb(void *arg, const char *name, char type,
+			  u64 start)
+{
+	struct process_symbol_args *args = arg;
+
+	/*
+	 * Must be a function or at least an alias, as in PARISC64, where "_text" is
+	 * an 'A' to the same address as "_stext".
+	 */
+	if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+	      type == 'A') || strcmp(name, args->name))
+		return 0;
+
+	args->start = start;
+	return 1;
+}
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr)
+{
+	struct process_symbol_args args = { .name = symbol_name, };
+
+	if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
+		return -1;
+
+	*addr = args.start;
+	return 0;
+}
+
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	size_t size;
+	struct map *map = machine__kernel_map(machine);
+	struct kmap *kmap;
+	int err;
+	union perf_event *event;
+
+	if (symbol_conf.kptr_restrict)
+		return -1;
+	if (map == NULL)
+		return -1;
+
+	/*
+	 * We should get this from /sys/kernel/sections/.text, but till that is
+	 * available use this, and after it is use this as a fallback for older
+	 * kernels.
+	 */
+	event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
+	if (event == NULL) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for kernel modules\n");
+		return -1;
+	}
+
+	if (machine__is_host(machine)) {
+		/*
+		 * kernel uses PERF_RECORD_MISC_USER for user space maps,
+		 * see kernel/perf_event.c __perf_event_mmap
+		 */
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
+	} else {
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+	}
+
+	kmap = map__kmap(map);
+	size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+			"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+	size = PERF_ALIGN(size, sizeof(u64));
+	event->mmap.header.type = PERF_RECORD_MMAP;
+	event->mmap.header.size = (sizeof(event->mmap) -
+			(sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+	event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+	event->mmap.start = map->start;
+	event->mmap.len   = map->end - event->mmap.start;
+	event->mmap.pid   = machine->pid;
+
+	err = perf_tool__process_synth_event(tool, event, machine, process);
+	free(event);
+
+	return err;
+}
+
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+				      struct thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	union perf_event *event;
+	int i, err, size;
+
+	size  = sizeof(event->thread_map);
+	size +=	threads->nr * sizeof(event->thread_map.entries[0]);
+
+	event = zalloc(size);
+	if (!event)
+		return -ENOMEM;
+
+	event->header.type = PERF_RECORD_THREAD_MAP;
+	event->header.size = size;
+	event->thread_map.nr = threads->nr;
+
+	for (i = 0; i < threads->nr; i++) {
+		struct thread_map_event_entry *entry = &event->thread_map.entries[i];
+		char *comm = thread_map__comm(threads, i);
+
+		if (!comm)
+			comm = (char *) "";
+
+		entry->pid = thread_map__pid(threads, i);
+		strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+	}
+
+	err = process(tool, event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+			    struct cpu_map *map)
+{
+	int i;
+
+	cpus->nr = map->nr;
+
+	for (i = 0; i < map->nr; i++)
+		cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct cpu_map_mask *mask,
+			    struct cpu_map *map, int max)
+{
+	int i;
+
+	mask->nr = BITS_TO_LONGS(max);
+	mask->long_size = sizeof(long);
+
+	for (i = 0; i < map->nr; i++)
+		set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct cpu_map *map)
+{
+	return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct cpu_map *map, int *max)
+{
+	int i;
+
+	*max = 0;
+
+	for (i = 0; i < map->nr; i++) {
+		/* bit possition of the cpu is + 1 */
+		int bit = map->map[i] + 1;
+
+		if (bit > *max)
+			*max = bit;
+	}
+
+	return sizeof(struct cpu_map_mask) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max)
+{
+	size_t size_cpus, size_mask;
+	bool is_dummy = cpu_map__empty(map);
+
+	/*
+	 * Both array and mask data have variable size based
+	 * on the number of cpus and their actual values.
+	 * The size of the 'struct cpu_map_data' is:
+	 *
+	 *   array = size of 'struct cpu_map_entries' +
+	 *           number of cpus * sizeof(u64)
+	 *
+	 *   mask  = size of 'struct cpu_map_mask' +
+	 *           maximum cpu bit converted to size of longs
+	 *
+	 * and finaly + the size of 'struct cpu_map_data'.
+	 */
+	size_cpus = cpus_size(map);
+	size_mask = mask_size(map, max);
+
+	if (is_dummy || (size_cpus < size_mask)) {
+		*size += size_cpus;
+		*type  = PERF_CPU_MAP__CPUS;
+	} else {
+		*size += size_mask;
+		*type  = PERF_CPU_MAP__MASK;
+	}
+
+	*size += sizeof(struct cpu_map_data);
+	return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+			      u16 type, int max)
+{
+	data->type = type;
+
+	switch (type) {
+	case PERF_CPU_MAP__CPUS:
+		synthesize_cpus((struct cpu_map_entries *) data->data, map);
+		break;
+	case PERF_CPU_MAP__MASK:
+		synthesize_mask((struct cpu_map_mask *) data->data, map, max);
+	default:
+		break;
+	};
+}
+
+static struct cpu_map_event* cpu_map_event__new(struct cpu_map *map)
+{
+	size_t size = sizeof(struct cpu_map_event);
+	struct cpu_map_event *event;
+	int max;
+	u16 type;
+
+	event = cpu_map_data__alloc(map, &size, &type, &max);
+	if (!event)
+		return NULL;
+
+	event->header.type = PERF_RECORD_CPU_MAP;
+	event->header.size = size;
+	event->data.type   = type;
+
+	cpu_map_data__synthesize(&event->data, map, type, max);
+	return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+				   struct cpu_map *map,
+				   perf_event__handler_t process,
+				   struct machine *machine)
+{
+	struct cpu_map_event *event;
+	int err;
+
+	event = cpu_map_event__new(map);
+	if (!event)
+		return -ENOMEM;
+
+	err = process(tool, (union perf_event *) event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+				       struct perf_stat_config *config,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	struct stat_config_event *event;
+	int size, i = 0, err;
+
+	size  = sizeof(*event);
+	size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+	event = zalloc(size);
+	if (!event)
+		return -ENOMEM;
+
+	event->header.type = PERF_RECORD_STAT_CONFIG;
+	event->header.size = size;
+	event->nr          = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val)					\
+	event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term;	\
+	event->data[i].val = __val;				\
+	i++;
+
+	ADD(AGGR_MODE,	config->aggr_mode)
+	ADD(INTERVAL,	config->interval)
+	ADD(SCALE,	config->scale)
+
+	WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+		  "stat config terms unbalanced\n");
+#undef ADD
+
+	err = process(tool, (union perf_event *) event, NULL, machine);
+
+	free(event);
+	return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+				u32 cpu, u32 thread, u64 id,
+				struct perf_counts_values *count,
+				perf_event__handler_t process,
+				struct machine *machine)
+{
+	struct stat_event event;
+
+	event.header.type = PERF_RECORD_STAT;
+	event.header.size = sizeof(event);
+	event.header.misc = 0;
+
+	event.id        = id;
+	event.cpu       = cpu;
+	event.thread    = thread;
+	event.val       = count->val;
+	event.ena       = count->ena;
+	event.run       = count->run;
+
+	return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+				      u64 evtime, u64 type,
+				      perf_event__handler_t process,
+				      struct machine *machine)
+{
+	struct stat_round_event event;
+
+	event.header.type = PERF_RECORD_STAT_ROUND;
+	event.header.size = sizeof(event);
+	event.header.misc = 0;
+
+	event.time = evtime;
+	event.type = type;
+
+	return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+				  struct stat_config_event *event)
+{
+	unsigned i;
+
+	for (i = 0; i < event->nr; i++) {
+
+		switch (event->data[i].tag) {
+#define CASE(__term, __val)					\
+		case PERF_STAT_CONFIG_TERM__##__term:		\
+			config->__val = event->data[i].val;	\
+			break;
+
+		CASE(AGGR_MODE, aggr_mode)
+		CASE(SCALE,     scale)
+		CASE(INTERVAL,  interval)
+#undef CASE
+		default:
+			pr_warning("unknown stat config term %" PRIu64 "\n",
+				   event->data[i].tag);
+		}
+	}
+}
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
+{
+	const char *s;
+
+	if (event->header.misc & PERF_RECORD_MISC_COMM_EXEC)
+		s = " exec";
+	else
+		s = "";
+
+	return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
+}
+
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+	size_t ret = 0;
+	struct perf_ns_link_info *ns_link_info;
+	u32 nr_namespaces, idx;
+
+	ns_link_info = event->namespaces.link_info;
+	nr_namespaces = event->namespaces.nr_namespaces;
+
+	ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+		       event->namespaces.pid,
+		       event->namespaces.tid,
+		       nr_namespaces);
+
+	for (idx = 0; idx < nr_namespaces; idx++) {
+		if (idx && (idx % 4 == 0))
+			ret += fprintf(fp, "\n\t\t ");
+
+		ret  += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
+				perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+				(u64)ns_link_info[idx].ino,
+				((idx + 1) != nr_namespaces) ? ", " : "]\n");
+	}
+
+	return ret;
+}
+
+int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_comm_event(machine, event, sample);
+}
+
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	return machine__process_namespaces_event(machine, event, sample);
+}
+
+int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_lost_event(machine, event, sample);
+}
+
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event,
+			    struct perf_sample *sample __maybe_unused,
+			    struct machine *machine)
+{
+	return machine__process_aux_event(machine, event);
+}
+
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine)
+{
+	return machine__process_itrace_start_event(machine, event);
+}
+
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine)
+{
+	return machine__process_lost_samples_event(machine, event, sample);
+}
+
+int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine)
+{
+	return machine__process_switch_event(machine, event);
+}
+
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
+		       event->mmap.pid, event->mmap.tid, event->mmap.start,
+		       event->mmap.len, event->mmap.pgoff,
+		       (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x',
+		       event->mmap.filename);
+}
+
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64
+			   " %02x:%02x %"PRIu64" %"PRIu64"]: %c%c%c%c %s\n",
+		       event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+		       event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+		       event->mmap2.min, event->mmap2.ino,
+		       event->mmap2.ino_generation,
+		       (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+		       (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+		       (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+		       (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+		       event->mmap2.filename);
+}
+
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+	struct thread_map *threads = thread_map__new_event(&event->thread_map);
+	size_t ret;
+
+	ret = fprintf(fp, " nr: ");
+
+	if (threads)
+		ret += thread_map__fprintf(threads, fp);
+	else
+		ret += fprintf(fp, "failed to get threads from event\n");
+
+	thread_map__put(threads);
+	return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+	struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+	size_t ret;
+
+	ret = fprintf(fp, ": ");
+
+	if (cpus)
+		ret += cpu_map__fprintf(cpus, fp);
+	else
+		ret += fprintf(fp, "failed to get cpumap from event\n");
+
+	cpu_map__put(cpus);
+	return ret;
+}
+
+int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_mmap_event(machine, event, sample);
+}
+
+int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_mmap2_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, "(%d:%d):(%d:%d)\n",
+		       event->fork.pid, event->fork.tid,
+		       event->fork.ppid, event->fork.ptid);
+}
+
+int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_fork_event(machine, event, sample);
+}
+
+int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine)
+{
+	return machine__process_exit_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s%s]\n",
+		       event->aux.aux_offset, event->aux.aux_size,
+		       event->aux.flags,
+		       event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+		       event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "",
+		       event->aux.flags & PERF_AUX_FLAG_PARTIAL   ? "P" : "");
+}
+
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " pid: %u tid: %u\n",
+		       event->itrace_start.pid, event->itrace_start.tid);
+}
+
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	const char *in_out = !out ? "IN         " :
+		!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
+				    "OUT        " : "OUT preempt";
+
+	if (event->header.type == PERF_RECORD_SWITCH)
+		return fprintf(fp, " %s\n", in_out);
+
+	return fprintf(fp, " %s  %s pid/tid: %5u/%-5u\n",
+		       in_out, out ? "next" : "prev",
+		       event->context_switch.next_prev_pid,
+		       event->context_switch.next_prev_tid);
+}
+
+static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
+}
+
+size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+{
+	size_t ret = fprintf(fp, "PERF_RECORD_%s",
+			     perf_event__name(event->header.type));
+
+	switch (event->header.type) {
+	case PERF_RECORD_COMM:
+		ret += perf_event__fprintf_comm(event, fp);
+		break;
+	case PERF_RECORD_FORK:
+	case PERF_RECORD_EXIT:
+		ret += perf_event__fprintf_task(event, fp);
+		break;
+	case PERF_RECORD_MMAP:
+		ret += perf_event__fprintf_mmap(event, fp);
+		break;
+	case PERF_RECORD_NAMESPACES:
+		ret += perf_event__fprintf_namespaces(event, fp);
+		break;
+	case PERF_RECORD_MMAP2:
+		ret += perf_event__fprintf_mmap2(event, fp);
+		break;
+	case PERF_RECORD_AUX:
+		ret += perf_event__fprintf_aux(event, fp);
+		break;
+	case PERF_RECORD_ITRACE_START:
+		ret += perf_event__fprintf_itrace_start(event, fp);
+		break;
+	case PERF_RECORD_SWITCH:
+	case PERF_RECORD_SWITCH_CPU_WIDE:
+		ret += perf_event__fprintf_switch(event, fp);
+		break;
+	case PERF_RECORD_LOST:
+		ret += perf_event__fprintf_lost(event, fp);
+		break;
+	default:
+		ret += fprintf(fp, "\n");
+	}
+
+	return ret;
+}
+
+int perf_event__process(struct perf_tool *tool __maybe_unused,
+			union perf_event *event,
+			struct perf_sample *sample,
+			struct machine *machine)
+{
+	return machine__process_event(machine, event, sample);
+}
+
+void thread__find_addr_map(struct thread *thread, u8 cpumode,
+			   enum map_type type, u64 addr,
+			   struct addr_location *al)
+{
+	struct map_groups *mg = thread->mg;
+	struct machine *machine = mg->machine;
+	bool load_map = false;
+
+	al->machine = machine;
+	al->thread = thread;
+	al->addr = addr;
+	al->cpumode = cpumode;
+	al->filtered = 0;
+
+	if (machine == NULL) {
+		al->map = NULL;
+		return;
+	}
+
+	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
+		al->level = 'k';
+		mg = &machine->kmaps;
+		load_map = true;
+	} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
+		al->level = '.';
+	} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
+		al->level = 'g';
+		mg = &machine->kmaps;
+		load_map = true;
+	} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
+		al->level = 'u';
+	} else {
+		al->level = 'H';
+		al->map = NULL;
+
+		if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
+			cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
+			!perf_guest)
+			al->filtered |= (1 << HIST_FILTER__GUEST);
+		if ((cpumode == PERF_RECORD_MISC_USER ||
+			cpumode == PERF_RECORD_MISC_KERNEL) &&
+			!perf_host)
+			al->filtered |= (1 << HIST_FILTER__HOST);
+
+		return;
+	}
+try_again:
+	al->map = map_groups__find(mg, type, al->addr);
+	if (al->map == NULL) {
+		/*
+		 * If this is outside of all known maps, and is a negative
+		 * address, try to look it up in the kernel dso, as it might be
+		 * a vsyscall or vdso (which executes in user-mode).
+		 *
+		 * XXX This is nasty, we should have a symbol list in the
+		 * "[vdso]" dso, but for now lets use the old trick of looking
+		 * in the whole kernel symbol list.
+		 */
+		if (cpumode == PERF_RECORD_MISC_USER && machine &&
+		    mg != &machine->kmaps &&
+		    machine__kernel_ip(machine, al->addr)) {
+			mg = &machine->kmaps;
+			load_map = true;
+			goto try_again;
+		}
+	} else {
+		/*
+		 * Kernel maps might be changed when loading symbols so loading
+		 * must be done prior to using kernel maps.
+		 */
+		if (load_map)
+			map__load(al->map);
+		al->addr = al->map->map_ip(al->map, al->addr);
+	}
+}
+
+void thread__find_addr_location(struct thread *thread,
+				u8 cpumode, enum map_type type, u64 addr,
+				struct addr_location *al)
+{
+	thread__find_addr_map(thread, cpumode, type, addr, al);
+	if (al->map != NULL)
+		al->sym = map__find_symbol(al->map, al->addr);
+	else
+		al->sym = NULL;
+}
+
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
+
+	if (thread == NULL)
+		return -1;
+
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
+	dump_printf(" ...... dso: %s\n",
+		    al->map ? al->map->dso->long_name :
+			al->level == 'H' ? "[hypervisor]" : "<not found>");
+
+	if (thread__is_filtered(thread))
+		al->filtered |= (1 << HIST_FILTER__THREAD);
+
+	al->sym = NULL;
+	al->cpu = sample->cpu;
+	al->socket = -1;
+	al->srcline = NULL;
+
+	if (al->cpu >= 0) {
+		struct perf_env *env = machine->env;
+
+		if (env && env->cpu)
+			al->socket = env->cpu[al->cpu].socket_id;
+	}
+
+	if (al->map) {
+		struct dso *dso = al->map->dso;
+
+		if (symbol_conf.dso_list &&
+		    (!dso || !(strlist__has_entry(symbol_conf.dso_list,
+						  dso->short_name) ||
+			       (dso->short_name != dso->long_name &&
+				strlist__has_entry(symbol_conf.dso_list,
+						   dso->long_name))))) {
+			al->filtered |= (1 << HIST_FILTER__DSO);
+		}
+
+		al->sym = map__find_symbol(al->map, al->addr);
+	}
+
+	if (symbol_conf.sym_list &&
+		(!al->sym || !strlist__has_entry(symbol_conf.sym_list,
+						al->sym->name))) {
+		al->filtered |= (1 << HIST_FILTER__SYMBOL);
+	}
+
+	return 0;
+}
+
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__put(struct addr_location *al)
+{
+	thread__zput(al->thread);
+}
+
+bool is_bts_event(struct perf_event_attr *attr)
+{
+	return attr->type == PERF_TYPE_HARDWARE &&
+	       (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+	       attr->sample_period == 1;
+}
+
+bool sample_addr_correlates_sym(struct perf_event_attr *attr)
+{
+	if (attr->type == PERF_TYPE_SOFTWARE &&
+	    (attr->config == PERF_COUNT_SW_PAGE_FAULTS ||
+	     attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+	     attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+		return true;
+
+	if (is_bts_event(attr))
+		return true;
+
+	return false;
+}
+
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample)
+{
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
+	if (!al->map)
+		thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
+				      sample->addr, al);
+
+	al->cpu = sample->cpu;
+	al->sym = NULL;
+
+	if (al->map)
+		al->sym = map__find_symbol(al->map, al->addr);
+}
diff --git a/util/event.h b/util/event.h
new file mode 100644
index 0000000..0f79474
--- /dev/null
+++ b/util/event.h
@@ -0,0 +1,825 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
+
+#include <limits.h>
+#include <stdio.h>
+#include <linux/kernel.h>
+
+#include "../perf.h"
+#include "build-id.h"
+#include "perf_regs.h"
+
+struct mmap_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+	u64 start;
+	u64 len;
+	u64 pgoff;
+	char filename[PATH_MAX];
+};
+
+struct mmap2_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+	u64 start;
+	u64 len;
+	u64 pgoff;
+	u32 maj;
+	u32 min;
+	u64 ino;
+	u64 ino_generation;
+	u32 prot;
+	u32 flags;
+	char filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+	char comm[16];
+};
+
+struct namespaces_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+	u64 nr_namespaces;
+	struct perf_ns_link_info link_info[];
+};
+
+struct fork_event {
+	struct perf_event_header header;
+	u32 pid, ppid;
+	u32 tid, ptid;
+	u64 time;
+};
+
+struct lost_event {
+	struct perf_event_header header;
+	u64 id;
+	u64 lost;
+};
+
+struct lost_samples_event {
+	struct perf_event_header header;
+	u64 lost;
+};
+
+/*
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ */
+struct read_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+	u64 value;
+	u64 time_enabled;
+	u64 time_running;
+	u64 id;
+};
+
+struct throttle_event {
+	struct perf_event_header header;
+	u64 time;
+	u64 id;
+	u64 stream_id;
+};
+
+#define PERF_SAMPLE_MASK				\
+	(PERF_SAMPLE_IP | PERF_SAMPLE_TID |		\
+	 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR |		\
+	PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID |	\
+	 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD |		\
+	 PERF_SAMPLE_IDENTIFIER)
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+struct sample_event {
+	struct perf_event_header        header;
+	u64 array[];
+};
+
+struct regs_dump {
+	u64 abi;
+	u64 mask;
+	u64 *regs;
+
+	/* Cached values/mask filled by first register access. */
+	u64 cache_regs[PERF_REGS_MAX];
+	u64 cache_mask;
+};
+
+struct stack_dump {
+	u16 offset;
+	u64 size;
+	char *data;
+};
+
+struct sample_read_value {
+	u64 value;
+	u64 id;
+};
+
+struct sample_read {
+	u64 time_enabled;
+	u64 time_running;
+	union {
+		struct {
+			u64 nr;
+			struct sample_read_value *values;
+		} group;
+		struct sample_read_value one;
+	};
+};
+
+struct ip_callchain {
+	u64 nr;
+	u64 ips[0];
+};
+
+struct branch_flags {
+	u64 mispred:1;
+	u64 predicted:1;
+	u64 in_tx:1;
+	u64 abort:1;
+	u64 cycles:16;
+	u64 type:4;
+	u64 reserved:40;
+};
+
+struct branch_entry {
+	u64			from;
+	u64			to;
+	struct branch_flags	flags;
+};
+
+struct branch_stack {
+	u64			nr;
+	struct branch_entry	entries[0];
+};
+
+enum {
+	PERF_IP_FLAG_BRANCH		= 1ULL << 0,
+	PERF_IP_FLAG_CALL		= 1ULL << 1,
+	PERF_IP_FLAG_RETURN		= 1ULL << 2,
+	PERF_IP_FLAG_CONDITIONAL	= 1ULL << 3,
+	PERF_IP_FLAG_SYSCALLRET		= 1ULL << 4,
+	PERF_IP_FLAG_ASYNC		= 1ULL << 5,
+	PERF_IP_FLAG_INTERRUPT		= 1ULL << 6,
+	PERF_IP_FLAG_TX_ABORT		= 1ULL << 7,
+	PERF_IP_FLAG_TRACE_BEGIN	= 1ULL << 8,
+	PERF_IP_FLAG_TRACE_END		= 1ULL << 9,
+	PERF_IP_FLAG_IN_TX		= 1ULL << 10,
+};
+
+#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+
+#define PERF_BRANCH_MASK		(\
+	PERF_IP_FLAG_BRANCH		|\
+	PERF_IP_FLAG_CALL		|\
+	PERF_IP_FLAG_RETURN		|\
+	PERF_IP_FLAG_CONDITIONAL	|\
+	PERF_IP_FLAG_SYSCALLRET		|\
+	PERF_IP_FLAG_ASYNC		|\
+	PERF_IP_FLAG_INTERRUPT		|\
+	PERF_IP_FLAG_TX_ABORT		|\
+	PERF_IP_FLAG_TRACE_BEGIN	|\
+	PERF_IP_FLAG_TRACE_END)
+
+#define MAX_INSN 16
+
+struct perf_sample {
+	u64 ip;
+	u32 pid, tid;
+	u64 time;
+	u64 addr;
+	u64 id;
+	u64 stream_id;
+	u64 period;
+	u64 weight;
+	u64 transaction;
+	u32 cpu;
+	u32 raw_size;
+	u64 data_src;
+	u64 phys_addr;
+	u32 flags;
+	u16 insn_len;
+	u8  cpumode;
+	u16 misc;
+	char insn[MAX_INSN];
+	void *raw_data;
+	struct ip_callchain *callchain;
+	struct branch_stack *branch_stack;
+	struct regs_dump  user_regs;
+	struct regs_dump  intr_regs;
+	struct stack_dump user_stack;
+	struct sample_read read;
+};
+
+#define PERF_MEM_DATA_SRC_NONE \
+	(PERF_MEM_S(OP, NA) |\
+	 PERF_MEM_S(LVL, NA) |\
+	 PERF_MEM_S(SNOOP, NA) |\
+	 PERF_MEM_S(LOCK, NA) |\
+	 PERF_MEM_S(TLB, NA))
+
+struct build_id_event {
+	struct perf_event_header header;
+	pid_t			 pid;
+	u8			 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+	char			 filename[];
+};
+
+enum perf_user_event_type { /* above any possible kernel type */
+	PERF_RECORD_USER_TYPE_START		= 64,
+	PERF_RECORD_HEADER_ATTR			= 64,
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* deprecated */
+	PERF_RECORD_HEADER_TRACING_DATA		= 66,
+	PERF_RECORD_HEADER_BUILD_ID		= 67,
+	PERF_RECORD_FINISHED_ROUND		= 68,
+	PERF_RECORD_ID_INDEX			= 69,
+	PERF_RECORD_AUXTRACE_INFO		= 70,
+	PERF_RECORD_AUXTRACE			= 71,
+	PERF_RECORD_AUXTRACE_ERROR		= 72,
+	PERF_RECORD_THREAD_MAP			= 73,
+	PERF_RECORD_CPU_MAP			= 74,
+	PERF_RECORD_STAT_CONFIG			= 75,
+	PERF_RECORD_STAT			= 76,
+	PERF_RECORD_STAT_ROUND			= 77,
+	PERF_RECORD_EVENT_UPDATE		= 78,
+	PERF_RECORD_TIME_CONV			= 79,
+	PERF_RECORD_HEADER_FEATURE		= 80,
+	PERF_RECORD_HEADER_MAX
+};
+
+enum auxtrace_error_type {
+	PERF_AUXTRACE_ERROR_ITRACE  = 1,
+	PERF_AUXTRACE_ERROR_MAX
+};
+
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH		(INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+	PERF_SYNTH_INTEL_PTWRITE,
+	PERF_SYNTH_INTEL_MWAIT,
+	PERF_SYNTH_INTEL_PWRE,
+	PERF_SYNTH_INTEL_EXSTOP,
+	PERF_SYNTH_INTEL_PWRX,
+	PERF_SYNTH_INTEL_CBR,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data().  It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+	u64	payload;
+};
+
+struct perf_synth_intel_mwait {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	hints		:  8,
+				reserved1	: 24,
+				extensions	:  2,
+				reserved2	: 30;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_pwre {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	reserved1	:  7,
+				hw		:  1,
+				subcstate	:  4,
+				cstate		:  4,
+				reserved2	: 48;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_exstop {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+};
+
+struct perf_synth_intel_pwrx {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	deepest_cstate	:  4,
+				last_cstate	:  4,
+				wake_reason	:  4,
+				reserved1	: 52;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_cbr {
+	u32 padding;
+	union {
+		struct {
+			u32	cbr		:  8,
+				reserved1	:  8,
+				max_nonturbo	:  8,
+				reserved2	:  8;
+		};
+		u32	flags;
+	};
+	u32 freq;
+	u32 reserved3;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+	return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+	return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
+/*
+ * The kernel collects the number of events it couldn't send in a stretch and
+ * when possible sends this number in a PERF_RECORD_LOST event. The number of
+ * such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
+ * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
+ * the sum of all struct lost_event.lost fields reported.
+ *
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct lost_samples_event.lost fields reported.
+ *
+ * The total_period is needed because by default auto-freq is used, so
+ * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * the total number of low level events, it is necessary to to sum all struct
+ * sample_event.period and stash the result in total_period.
+ */
+struct events_stats {
+	u64 total_period;
+	u64 total_non_filtered_period;
+	u64 total_lost;
+	u64 total_lost_samples;
+	u64 total_aux_lost;
+	u64 total_aux_partial;
+	u64 total_invalid_chains;
+	u32 nr_events[PERF_RECORD_HEADER_MAX];
+	u32 nr_non_filtered_samples;
+	u32 nr_lost_warned;
+	u32 nr_unknown_events;
+	u32 nr_invalid_chains;
+	u32 nr_unknown_id;
+	u32 nr_unprocessable_samples;
+	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+	u32 nr_proc_map_timeout;
+};
+
+enum {
+	PERF_CPU_MAP__CPUS = 0,
+	PERF_CPU_MAP__MASK = 1,
+};
+
+struct cpu_map_entries {
+	u16	nr;
+	u16	cpu[];
+};
+
+struct cpu_map_mask {
+	u16	nr;
+	u16	long_size;
+	unsigned long mask[];
+};
+
+struct cpu_map_data {
+	u16	type;
+	char	data[];
+};
+
+struct cpu_map_event {
+	struct perf_event_header	header;
+	struct cpu_map_data		data;
+};
+
+struct attr_event {
+	struct perf_event_header header;
+	struct perf_event_attr attr;
+	u64 id[];
+};
+
+enum {
+	PERF_EVENT_UPDATE__UNIT  = 0,
+	PERF_EVENT_UPDATE__SCALE = 1,
+	PERF_EVENT_UPDATE__NAME  = 2,
+	PERF_EVENT_UPDATE__CPUS  = 3,
+};
+
+struct event_update_event_cpus {
+	struct cpu_map_data cpus;
+};
+
+struct event_update_event_scale {
+	double scale;
+};
+
+struct event_update_event {
+	struct perf_event_header header;
+	u64 type;
+	u64 id;
+
+	char data[];
+};
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+	u64	event_id;
+	char	name[MAX_EVENT_NAME];
+};
+
+struct event_type_event {
+	struct perf_event_header header;
+	struct perf_trace_event_type event_type;
+};
+
+struct tracing_data_event {
+	struct perf_event_header header;
+	u32 size;
+};
+
+struct id_index_entry {
+	u64 id;
+	u64 idx;
+	u64 cpu;
+	u64 tid;
+};
+
+struct id_index_event {
+	struct perf_event_header header;
+	u64 nr;
+	struct id_index_entry entries[0];
+};
+
+struct auxtrace_info_event {
+	struct perf_event_header header;
+	u32 type;
+	u32 reserved__; /* For alignment */
+	u64 priv[];
+};
+
+struct auxtrace_event {
+	struct perf_event_header header;
+	u64 size;
+	u64 offset;
+	u64 reference;
+	u32 idx;
+	u32 tid;
+	u32 cpu;
+	u32 reserved__; /* For alignment */
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+	struct perf_event_header header;
+	u32 type;
+	u32 code;
+	u32 cpu;
+	u32 pid;
+	u32 tid;
+	u32 reserved__; /* For alignment */
+	u64 ip;
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+struct aux_event {
+	struct perf_event_header header;
+	u64	aux_offset;
+	u64	aux_size;
+	u64	flags;
+};
+
+struct itrace_start_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+};
+
+struct context_switch_event {
+	struct perf_event_header header;
+	u32 next_prev_pid;
+	u32 next_prev_tid;
+};
+
+struct thread_map_event_entry {
+	u64	pid;
+	char	comm[16];
+};
+
+struct thread_map_event {
+	struct perf_event_header	header;
+	u64				nr;
+	struct thread_map_event_entry	entries[];
+};
+
+enum {
+	PERF_STAT_CONFIG_TERM__AGGR_MODE	= 0,
+	PERF_STAT_CONFIG_TERM__INTERVAL		= 1,
+	PERF_STAT_CONFIG_TERM__SCALE		= 2,
+	PERF_STAT_CONFIG_TERM__MAX		= 3,
+};
+
+struct stat_config_event_entry {
+	u64	tag;
+	u64	val;
+};
+
+struct stat_config_event {
+	struct perf_event_header	header;
+	u64				nr;
+	struct stat_config_event_entry	data[];
+};
+
+struct stat_event {
+	struct perf_event_header	header;
+
+	u64	id;
+	u32	cpu;
+	u32	thread;
+
+	union {
+		struct {
+			u64 val;
+			u64 ena;
+			u64 run;
+		};
+		u64 values[3];
+	};
+};
+
+enum {
+	PERF_STAT_ROUND_TYPE__INTERVAL	= 0,
+	PERF_STAT_ROUND_TYPE__FINAL	= 1,
+};
+
+struct stat_round_event {
+	struct perf_event_header	header;
+	u64				type;
+	u64				time;
+};
+
+struct time_conv_event {
+	struct perf_event_header header;
+	u64 time_shift;
+	u64 time_mult;
+	u64 time_zero;
+};
+
+struct feature_event {
+	struct perf_event_header 	header;
+	u64				feat_id;
+	char				data[];
+};
+
+union perf_event {
+	struct perf_event_header	header;
+	struct mmap_event		mmap;
+	struct mmap2_event		mmap2;
+	struct comm_event		comm;
+	struct namespaces_event		namespaces;
+	struct fork_event		fork;
+	struct lost_event		lost;
+	struct lost_samples_event	lost_samples;
+	struct read_event		read;
+	struct throttle_event		throttle;
+	struct sample_event		sample;
+	struct attr_event		attr;
+	struct event_update_event	event_update;
+	struct event_type_event		event_type;
+	struct tracing_data_event	tracing_data;
+	struct build_id_event		build_id;
+	struct id_index_event		id_index;
+	struct auxtrace_info_event	auxtrace_info;
+	struct auxtrace_event		auxtrace;
+	struct auxtrace_error_event	auxtrace_error;
+	struct aux_event		aux;
+	struct itrace_start_event	itrace_start;
+	struct context_switch_event	context_switch;
+	struct thread_map_event		thread_map;
+	struct cpu_map_event		cpu_map;
+	struct stat_config_event	stat_config;
+	struct stat_event		stat;
+	struct stat_round_event		stat_round;
+	struct time_conv_event		time_conv;
+	struct feature_event		feat;
+};
+
+void perf_event__print_totals(void);
+
+struct perf_tool;
+struct thread_map;
+struct cpu_map;
+struct perf_stat_config;
+struct perf_counts_values;
+
+typedef int (*perf_event__handler_t)(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+				      struct thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine, bool mmap_data,
+				      unsigned int proc_map_timeout);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+				      struct thread_map *threads,
+				      perf_event__handler_t process,
+				      struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+				   struct cpu_map *cpus,
+				   perf_event__handler_t process,
+				   struct machine *machine);
+int perf_event__synthesize_threads(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine, bool mmap_data,
+				   unsigned int proc_map_timeout,
+				   unsigned int nr_threads_synthesize);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine);
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+				       struct perf_stat_config *config,
+				       perf_event__handler_t process,
+				       struct machine *machine);
+void perf_event__read_stat_config(struct perf_stat_config *config,
+				  struct stat_config_event *event);
+int perf_event__synthesize_stat(struct perf_tool *tool,
+				u32 cpu, u32 thread, u64 id,
+				struct perf_counts_values *count,
+				perf_event__handler_t process,
+				struct machine *machine);
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+				      u64 time, u64 type,
+				      perf_event__handler_t process,
+				      struct machine *machine);
+int perf_event__synthesize_modules(struct perf_tool *tool,
+				   perf_event__handler_t process,
+				   struct machine *machine);
+
+int perf_event__process_comm(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_lost(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+			    union perf_event *event,
+			    struct perf_sample *sample,
+			    struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
+int perf_event__process_switch(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine);
+int perf_event__process_mmap(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_mmap2(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_fork(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process_exit(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct machine *machine);
+int perf_event__process(struct perf_tool *tool,
+			union perf_event *event,
+			struct perf_sample *sample,
+			struct machine *machine);
+
+struct addr_location;
+
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample);
+
+void addr_location__put(struct addr_location *al);
+
+struct thread;
+
+bool is_bts_event(struct perf_event_attr *attr);
+bool sample_addr_correlates_sym(struct perf_event_attr *attr);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample);
+
+const char *perf_event__name(unsigned int id);
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+				     u64 read_format);
+int perf_event__synthesize_sample(union perf_event *event, u64 type,
+				  u64 read_format,
+				  const struct perf_sample *sample);
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+				  union perf_event *event, pid_t pid,
+				  perf_event__handler_t process,
+				  struct machine *machine);
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+				      union perf_event *event,
+				      pid_t pid, pid_t tgid,
+				      perf_event__handler_t process,
+				      struct machine *machine);
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+				       union perf_event *event,
+				       pid_t pid, pid_t tgid,
+				       perf_event__handler_t process,
+				       struct machine *machine,
+				       bool mmap_data,
+				       unsigned int proc_map_timeout);
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, FILE *fp);
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+				 const char *symbol_name, u64 *addr);
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
+void  cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+			       u16 type, int max);
+
+void event_attr_init(struct perf_event_attr *attr);
+
+int perf_event_paranoid(void);
+
+extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
+
+#endif /* __PERF_RECORD_H */
diff --git a/util/evlist.c b/util/evlist.c
new file mode 100644
index 0000000..a59281d
--- /dev/null
+++ b/util/evlist.c
@@ -0,0 +1,1797 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "util.h"
+#include <api/fs/fs.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include "cpumap.h"
+#include "thread_map.h"
+#include "target.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "debug.h"
+#include "units.h"
+#include "asm/bug.h"
+#include <signal.h>
+#include <unistd.h>
+
+#include "parse-events.h"
+#include <subcmd/parse-options.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/err.h>
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+		       struct thread_map *threads)
+{
+	int i;
+
+	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+		INIT_HLIST_HEAD(&evlist->heads[i]);
+	INIT_LIST_HEAD(&evlist->entries);
+	perf_evlist__set_maps(evlist, cpus, threads);
+	fdarray__init(&evlist->pollfd, 64);
+	evlist->workload.pid = -1;
+	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+}
+
+struct perf_evlist *perf_evlist__new(void)
+{
+	struct perf_evlist *evlist = zalloc(sizeof(*evlist));
+
+	if (evlist != NULL)
+		perf_evlist__init(evlist, NULL, NULL);
+
+	return evlist;
+}
+
+struct perf_evlist *perf_evlist__new_default(void)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist && perf_evlist__add_default(evlist)) {
+		perf_evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+struct perf_evlist *perf_evlist__new_dummy(void)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist && perf_evlist__add_dummy(evlist)) {
+		perf_evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+/**
+ * perf_evlist__set_id_pos - set the positions of event ids.
+ * @evlist: selected event list
+ *
+ * Events with compatible sample types all have the same id_pos
+ * and is_pos.  For convenience, put a copy on evlist.
+ */
+void perf_evlist__set_id_pos(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+
+	evlist->id_pos = first->id_pos;
+	evlist->is_pos = first->is_pos;
+}
+
+static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		perf_evsel__calc_id_pos(evsel);
+
+	perf_evlist__set_id_pos(evlist);
+}
+
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+	struct perf_evsel *pos, *n;
+
+	evlist__for_each_entry_safe(evlist, n, pos) {
+		list_del_init(&pos->node);
+		pos->evlist = NULL;
+		perf_evsel__delete(pos);
+	}
+
+	evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+	zfree(&evlist->mmap);
+	zfree(&evlist->overwrite_mmap);
+	fdarray__exit(&evlist->pollfd);
+}
+
+void perf_evlist__delete(struct perf_evlist *evlist)
+{
+	if (evlist == NULL)
+		return;
+
+	perf_evlist__munmap(evlist);
+	perf_evlist__close(evlist);
+	cpu_map__put(evlist->cpus);
+	thread_map__put(evlist->threads);
+	evlist->cpus = NULL;
+	evlist->threads = NULL;
+	perf_evlist__purge(evlist);
+	perf_evlist__exit(evlist);
+	free(evlist);
+}
+
+static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
+					  struct perf_evsel *evsel)
+{
+	/*
+	 * We already have cpus for evsel (via PMU sysfs) so
+	 * keep it, if there's no target cpu list defined.
+	 */
+	if (!evsel->own_cpus || evlist->has_user_cpus) {
+		cpu_map__put(evsel->cpus);
+		evsel->cpus = cpu_map__get(evlist->cpus);
+	} else if (evsel->cpus != evsel->own_cpus) {
+		cpu_map__put(evsel->cpus);
+		evsel->cpus = cpu_map__get(evsel->own_cpus);
+	}
+
+	thread_map__put(evsel->threads);
+	evsel->threads = thread_map__get(evlist->threads);
+}
+
+static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		__perf_evlist__propagate_maps(evlist, evsel);
+}
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
+{
+	entry->evlist = evlist;
+	list_add_tail(&entry->node, &evlist->entries);
+	entry->idx = evlist->nr_entries;
+	entry->tracking = !entry->idx;
+
+	if (!evlist->nr_entries++)
+		perf_evlist__set_id_pos(evlist);
+
+	__perf_evlist__propagate_maps(evlist, entry);
+}
+
+void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
+{
+	evsel->evlist = NULL;
+	list_del_init(&evsel->node);
+	evlist->nr_entries -= 1;
+}
+
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+				   struct list_head *list)
+{
+	struct perf_evsel *evsel, *temp;
+
+	__evlist__for_each_entry_safe(list, temp, evsel) {
+		list_del_init(&evsel->node);
+		perf_evlist__add(evlist, evsel);
+	}
+}
+
+void __perf_evlist__set_leader(struct list_head *list)
+{
+	struct perf_evsel *evsel, *leader;
+
+	leader = list_entry(list->next, struct perf_evsel, node);
+	evsel = list_entry(list->prev, struct perf_evsel, node);
+
+	leader->nr_members = evsel->idx - leader->idx + 1;
+
+	__evlist__for_each_entry(list, evsel) {
+		evsel->leader = leader;
+	}
+}
+
+void perf_evlist__set_leader(struct perf_evlist *evlist)
+{
+	if (evlist->nr_entries) {
+		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
+		__perf_evlist__set_leader(&evlist->entries);
+	}
+}
+
+void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
+{
+	attr->precise_ip = 3;
+
+	while (attr->precise_ip != 0) {
+		int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
+		if (fd != -1) {
+			close(fd);
+			break;
+		}
+		--attr->precise_ip;
+	}
+}
+
+int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
+{
+	struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
+
+	if (evsel == NULL)
+		return -ENOMEM;
+
+	perf_evlist__add(evlist, evsel);
+	return 0;
+}
+
+int perf_evlist__add_dummy(struct perf_evlist *evlist)
+{
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_DUMMY,
+		.size	= sizeof(attr), /* to capture ABI version */
+	};
+	struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
+
+	if (evsel == NULL)
+		return -ENOMEM;
+
+	perf_evlist__add(evlist, evsel);
+	return 0;
+}
+
+static int perf_evlist__add_attrs(struct perf_evlist *evlist,
+				  struct perf_event_attr *attrs, size_t nr_attrs)
+{
+	struct perf_evsel *evsel, *n;
+	LIST_HEAD(head);
+	size_t i;
+
+	for (i = 0; i < nr_attrs; i++) {
+		evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
+		if (evsel == NULL)
+			goto out_delete_partial_list;
+		list_add_tail(&evsel->node, &head);
+	}
+
+	perf_evlist__splice_list_tail(evlist, &head);
+
+	return 0;
+
+out_delete_partial_list:
+	__evlist__for_each_entry_safe(&head, n, evsel)
+		perf_evsel__delete(evsel);
+	return -1;
+}
+
+int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
+				     struct perf_event_attr *attrs, size_t nr_attrs)
+{
+	size_t i;
+
+	for (i = 0; i < nr_attrs; i++)
+		event_attr_init(attrs + i);
+
+	return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
+}
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
+		    (int)evsel->attr.config == id)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
+				     const char *name)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
+		    (strcmp(evsel->name, name) == 0))
+			return evsel;
+	}
+
+	return NULL;
+}
+
+int perf_evlist__add_newtp(struct perf_evlist *evlist,
+			   const char *sys, const char *name, void *handler)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
+
+	if (IS_ERR(evsel))
+		return -1;
+
+	evsel->handler = handler;
+	perf_evlist__add(evlist, evsel);
+	return 0;
+}
+
+static int perf_evlist__nr_threads(struct perf_evlist *evlist,
+				   struct perf_evsel *evsel)
+{
+	if (evsel->system_wide)
+		return 1;
+	else
+		return thread_map__nr(evlist->threads);
+}
+
+void perf_evlist__disable(struct perf_evlist *evlist)
+{
+	struct perf_evsel *pos;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+			continue;
+		perf_evsel__disable(pos);
+	}
+
+	evlist->enabled = false;
+}
+
+void perf_evlist__enable(struct perf_evlist *evlist)
+{
+	struct perf_evsel *pos;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+			continue;
+		perf_evsel__enable(pos);
+	}
+
+	evlist->enabled = true;
+}
+
+void perf_evlist__toggle_enable(struct perf_evlist *evlist)
+{
+	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
+}
+
+static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
+					 struct perf_evsel *evsel, int cpu)
+{
+	int thread;
+	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
+
+	if (!evsel->fd)
+		return -EINVAL;
+
+	for (thread = 0; thread < nr_threads; thread++) {
+		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
+					    struct perf_evsel *evsel,
+					    int thread)
+{
+	int cpu;
+	int nr_cpus = cpu_map__nr(evlist->cpus);
+
+	if (!evsel->fd)
+		return -EINVAL;
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
+				  struct perf_evsel *evsel, int idx)
+{
+	bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
+
+	if (per_cpu_mmaps)
+		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
+	else
+		return perf_evlist__enable_event_thread(evlist, evsel, idx);
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+	int nr_cpus = cpu_map__nr(evlist->cpus);
+	int nr_threads = thread_map__nr(evlist->threads);
+	int nfds = 0;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->system_wide)
+			nfds += nr_cpus;
+		else
+			nfds += nr_cpus * nr_threads;
+	}
+
+	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
+	    fdarray__grow(&evlist->pollfd, nfds) < 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+				     struct perf_mmap *map, short revent)
+{
+	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+	/*
+	 * Save the idx so that when we filter out fds POLLHUP'ed we can
+	 * close the associated evlist->mmap[] entry.
+	 */
+	if (pos >= 0) {
+		evlist->pollfd.priv[pos].ptr = map;
+
+		fcntl(fd, F_SETFL, O_NONBLOCK);
+	}
+
+	return pos;
+}
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+{
+	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
+}
+
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+					 void *arg __maybe_unused)
+{
+	struct perf_mmap *map = fda->priv[fd].ptr;
+
+	if (map)
+		perf_mmap__put(map);
+}
+
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
+{
+	return fdarray__filter(&evlist->pollfd, revents_and_mask,
+			       perf_evlist__munmap_filtered, NULL);
+}
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
+{
+	return fdarray__poll(&evlist->pollfd, timeout);
+}
+
+static void perf_evlist__id_hash(struct perf_evlist *evlist,
+				 struct perf_evsel *evsel,
+				 int cpu, int thread, u64 id)
+{
+	int hash;
+	struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+	sid->id = id;
+	sid->evsel = evsel;
+	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+	hlist_add_head(&sid->node, &evlist->heads[hash]);
+}
+
+void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
+			 int cpu, int thread, u64 id)
+{
+	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+	evsel->id[evsel->ids++] = id;
+}
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+			   struct perf_evsel *evsel,
+			   int cpu, int thread, int fd)
+{
+	u64 read_data[4] = { 0, };
+	int id_idx = 1; /* The first entry is the counter value */
+	u64 id;
+	int ret;
+
+	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+	if (!ret)
+		goto add;
+
+	if (errno != ENOTTY)
+		return -1;
+
+	/* Legacy way to get event id.. All hail to old kernels! */
+
+	/*
+	 * This way does not work with group format read, so bail
+	 * out in that case.
+	 */
+	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+		return -1;
+
+	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+	    read(fd, &read_data, sizeof(read_data)) == -1)
+		return -1;
+
+	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		++id_idx;
+	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		++id_idx;
+
+	id = read_data[id_idx];
+
+ add:
+	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+	return 0;
+}
+
+static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
+				     struct perf_evsel *evsel, int idx, int cpu,
+				     int thread)
+{
+	struct perf_sample_id *sid = SID(evsel, cpu, thread);
+	sid->idx = idx;
+	if (evlist->cpus && cpu >= 0)
+		sid->cpu = evlist->cpus->map[cpu];
+	else
+		sid->cpu = -1;
+	if (!evsel->system_wide && evlist->threads && thread >= 0)
+		sid->tid = thread_map__pid(evlist->threads, thread);
+	else
+		sid->tid = -1;
+}
+
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
+{
+	struct hlist_head *head;
+	struct perf_sample_id *sid;
+	int hash;
+
+	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+	head = &evlist->heads[hash];
+
+	hlist_for_each_entry(sid, head, node)
+		if (sid->id == id)
+			return sid;
+
+	return NULL;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (evlist->nr_entries == 1 || !id)
+		return perf_evlist__first(evlist);
+
+	sid = perf_evlist__id2sid(evlist, id);
+	if (sid)
+		return sid->evsel;
+
+	if (!perf_evlist__sample_id_all(evlist))
+		return perf_evlist__first(evlist);
+
+	return NULL;
+}
+
+struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
+						u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (!id)
+		return NULL;
+
+	sid = perf_evlist__id2sid(evlist, id);
+	if (sid)
+		return sid->evsel;
+
+	return NULL;
+}
+
+static int perf_evlist__event2id(struct perf_evlist *evlist,
+				 union perf_event *event, u64 *id)
+{
+	const u64 *array = event->sample.array;
+	ssize_t n;
+
+	n = (event->header.size - sizeof(event->header)) >> 3;
+
+	if (event->header.type == PERF_RECORD_SAMPLE) {
+		if (evlist->id_pos >= n)
+			return -1;
+		*id = array[evlist->id_pos];
+	} else {
+		if (evlist->is_pos > n)
+			return -1;
+		n -= evlist->is_pos;
+		*id = array[n];
+	}
+	return 0;
+}
+
+struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+					    union perf_event *event)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+	struct hlist_head *head;
+	struct perf_sample_id *sid;
+	int hash;
+	u64 id;
+
+	if (evlist->nr_entries == 1)
+		return first;
+
+	if (!first->attr.sample_id_all &&
+	    event->header.type != PERF_RECORD_SAMPLE)
+		return first;
+
+	if (perf_evlist__event2id(evlist, event, &id))
+		return NULL;
+
+	/* Synthesized events have an id of zero */
+	if (!id)
+		return first;
+
+	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+	head = &evlist->heads[hash];
+
+	hlist_for_each_entry(sid, head, node) {
+		if (sid->id == id)
+			return sid->evsel;
+	}
+	return NULL;
+}
+
+static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
+{
+	int i;
+
+	if (!evlist->overwrite_mmap)
+		return 0;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		int fd = evlist->overwrite_mmap[i].fd;
+		int err;
+
+		if (fd < 0)
+			continue;
+		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int perf_evlist__pause(struct perf_evlist *evlist)
+{
+	return perf_evlist__set_paused(evlist, true);
+}
+
+static int perf_evlist__resume(struct perf_evlist *evlist)
+{
+	return perf_evlist__set_paused(evlist, false);
+}
+
+static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
+{
+	int i;
+
+	if (evlist->mmap)
+		for (i = 0; i < evlist->nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->mmap[i]);
+
+	if (evlist->overwrite_mmap)
+		for (i = 0; i < evlist->nr_mmaps; i++)
+			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
+}
+
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+	perf_evlist__munmap_nofree(evlist);
+	zfree(&evlist->mmap);
+	zfree(&evlist->overwrite_mmap);
+}
+
+static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
+						 bool overwrite)
+{
+	int i;
+	struct perf_mmap *map;
+
+	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
+	if (cpu_map__empty(evlist->cpus))
+		evlist->nr_mmaps = thread_map__nr(evlist->threads);
+	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+	if (!map)
+		return NULL;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		map[i].fd = -1;
+		map[i].overwrite = overwrite;
+		/*
+		 * When the perf_mmap() call is made we grab one refcount, plus
+		 * one extra to let perf_mmap__consume() get the last
+		 * events after all real references (perf_mmap__get()) are
+		 * dropped.
+		 *
+		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
+		 * thus does perf_mmap__get() on it.
+		 */
+		refcount_set(&map[i].refcnt, 0);
+	}
+	return map;
+}
+
+static bool
+perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
+			 struct perf_evsel *evsel)
+{
+	if (evsel->attr.write_backward)
+		return false;
+	return true;
+}
+
+static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
+				       struct mmap_params *mp, int cpu_idx,
+				       int thread, int *_output, int *_output_overwrite)
+{
+	struct perf_evsel *evsel;
+	int revent;
+	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
+
+	evlist__for_each_entry(evlist, evsel) {
+		struct perf_mmap *maps = evlist->mmap;
+		int *output = _output;
+		int fd;
+		int cpu;
+
+		mp->prot = PROT_READ | PROT_WRITE;
+		if (evsel->attr.write_backward) {
+			output = _output_overwrite;
+			maps = evlist->overwrite_mmap;
+
+			if (!maps) {
+				maps = perf_evlist__alloc_mmap(evlist, true);
+				if (!maps)
+					return -1;
+				evlist->overwrite_mmap = maps;
+				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+			}
+			mp->prot &= ~PROT_WRITE;
+		}
+
+		if (evsel->system_wide && thread)
+			continue;
+
+		cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
+		if (cpu == -1)
+			continue;
+
+		fd = FD(evsel, cpu, thread);
+
+		if (*output == -1) {
+			*output = fd;
+
+			if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
+				return -1;
+		} else {
+			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+				return -1;
+
+			perf_mmap__get(&maps[idx]);
+		}
+
+		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+
+		/*
+		 * The system_wide flag causes a selected event to be opened
+		 * always without a pid.  Consequently it will never get a
+		 * POLLHUP, but it is used for tracking in combination with
+		 * other events, so it should not need to be polled anyway.
+		 * Therefore don't add it for polling.
+		 */
+		if (!evsel->system_wide &&
+		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
+			perf_mmap__put(&maps[idx]);
+			return -1;
+		}
+
+		if (evsel->attr.read_format & PERF_FORMAT_ID) {
+			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+						   fd) < 0)
+				return -1;
+			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
+						 thread);
+		}
+	}
+
+	return 0;
+}
+
+static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
+				     struct mmap_params *mp)
+{
+	int cpu, thread;
+	int nr_cpus = cpu_map__nr(evlist->cpus);
+	int nr_threads = thread_map__nr(evlist->threads);
+
+	pr_debug2("perf event ring buffer mmapped per cpu\n");
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		int output = -1;
+		int output_overwrite = -1;
+
+		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
+					      true);
+
+		for (thread = 0; thread < nr_threads; thread++) {
+			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
+							thread, &output, &output_overwrite))
+				goto out_unmap;
+		}
+	}
+
+	return 0;
+
+out_unmap:
+	perf_evlist__munmap_nofree(evlist);
+	return -1;
+}
+
+static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
+					struct mmap_params *mp)
+{
+	int thread;
+	int nr_threads = thread_map__nr(evlist->threads);
+
+	pr_debug2("perf event ring buffer mmapped per thread\n");
+	for (thread = 0; thread < nr_threads; thread++) {
+		int output = -1;
+		int output_overwrite = -1;
+
+		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
+					      false);
+
+		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
+						&output, &output_overwrite))
+			goto out_unmap;
+	}
+
+	return 0;
+
+out_unmap:
+	perf_evlist__munmap_nofree(evlist);
+	return -1;
+}
+
+unsigned long perf_event_mlock_kb_in_pages(void)
+{
+	unsigned long pages;
+	int max;
+
+	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+		/*
+		 * Pick a once upon a time good value, i.e. things look
+		 * strange since we can't read a sysctl value, but lets not
+		 * die yet...
+		 */
+		max = 512;
+	} else {
+		max -= (page_size / 1024);
+	}
+
+	pages = (max * 1024) / page_size;
+	if (!is_power_of_2(pages))
+		pages = rounddown_pow_of_two(pages);
+
+	return pages;
+}
+
+size_t perf_evlist__mmap_size(unsigned long pages)
+{
+	if (pages == UINT_MAX)
+		pages = perf_event_mlock_kb_in_pages();
+	else if (!is_power_of_2(pages))
+		return 0;
+
+	return (pages + 1) * page_size;
+}
+
+static long parse_pages_arg(const char *str, unsigned long min,
+			    unsigned long max)
+{
+	unsigned long pages, val;
+	static struct parse_tag tags[] = {
+		{ .tag  = 'B', .mult = 1       },
+		{ .tag  = 'K', .mult = 1 << 10 },
+		{ .tag  = 'M', .mult = 1 << 20 },
+		{ .tag  = 'G', .mult = 1 << 30 },
+		{ .tag  = 0 },
+	};
+
+	if (str == NULL)
+		return -EINVAL;
+
+	val = parse_tag_value(str, tags);
+	if (val != (unsigned long) -1) {
+		/* we got file size value */
+		pages = PERF_ALIGN(val, page_size) / page_size;
+	} else {
+		/* we got pages count value */
+		char *eptr;
+		pages = strtoul(str, &eptr, 10);
+		if (*eptr != '\0')
+			return -EINVAL;
+	}
+
+	if (pages == 0 && min == 0) {
+		/* leave number of pages at 0 */
+	} else if (!is_power_of_2(pages)) {
+		char buf[100];
+
+		/* round pages up to next power of 2 */
+		pages = roundup_pow_of_two(pages);
+		if (!pages)
+			return -EINVAL;
+
+		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
+		pr_info("rounding mmap pages size to %s (%lu pages)\n",
+			buf, pages);
+	}
+
+	if (pages > max)
+		return -EINVAL;
+
+	return pages;
+}
+
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
+{
+	unsigned long max = UINT_MAX;
+	long pages;
+
+	if (max > SIZE_MAX / page_size)
+		max = SIZE_MAX / page_size;
+
+	pages = parse_pages_arg(str, 1, max);
+	if (pages < 0) {
+		pr_err("Invalid argument for --mmap_pages/-m\n");
+		return -1;
+	}
+
+	*mmap_pages = pages;
+	return 0;
+}
+
+int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
+				  int unset __maybe_unused)
+{
+	return __perf_evlist__parse_mmap_pages(opt->value, str);
+}
+
+/**
+ * perf_evlist__mmap_ex - Create mmaps to receive events.
+ * @evlist: list of events
+ * @pages: map length in pages
+ * @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
+ *
+ * If @overwrite is %false the user needs to signal event consumption using
+ * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
+ * automatically.
+ *
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+			 unsigned int auxtrace_pages,
+			 bool auxtrace_overwrite)
+{
+	struct perf_evsel *evsel;
+	const struct cpu_map *cpus = evlist->cpus;
+	const struct thread_map *threads = evlist->threads;
+	/*
+	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+	 * Its value is decided by evsel's write_backward.
+	 * So &mp should not be passed through const pointer.
+	 */
+	struct mmap_params mp;
+
+	if (!evlist->mmap)
+		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
+	if (!evlist->mmap)
+		return -ENOMEM;
+
+	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+		return -ENOMEM;
+
+	evlist->mmap_len = perf_evlist__mmap_size(pages);
+	pr_debug("mmap size %zuB\n", evlist->mmap_len);
+	mp.mask = evlist->mmap_len - page_size - 1;
+
+	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+				   auxtrace_pages, auxtrace_overwrite);
+
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+		    evsel->sample_id == NULL &&
+		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
+			return -ENOMEM;
+	}
+
+	if (cpu_map__empty(cpus))
+		return perf_evlist__mmap_per_thread(evlist, &mp);
+
+	return perf_evlist__mmap_per_cpu(evlist, &mp);
+}
+
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
+{
+	return perf_evlist__mmap_ex(evlist, pages, 0, false);
+}
+
+int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
+{
+	bool all_threads = (target->per_thread && target->system_wide);
+	struct cpu_map *cpus;
+	struct thread_map *threads;
+
+	/*
+	 * If specify '-a' and '--per-thread' to perf record, perf record
+	 * will override '--per-thread'. target->per_thread = false and
+	 * target->system_wide = true.
+	 *
+	 * If specify '--per-thread' only to perf record,
+	 * target->per_thread = true and target->system_wide = false.
+	 *
+	 * So target->per_thread && target->system_wide is false.
+	 * For perf record, thread_map__new_str doesn't call
+	 * thread_map__new_all_cpus. That will keep perf record's
+	 * current behavior.
+	 *
+	 * For perf stat, it allows the case that target->per_thread and
+	 * target->system_wide are all true. It means to collect system-wide
+	 * per-thread data. thread_map__new_str will call
+	 * thread_map__new_all_cpus to enumerate all threads.
+	 */
+	threads = thread_map__new_str(target->pid, target->tid, target->uid,
+				      all_threads);
+
+	if (!threads)
+		return -1;
+
+	if (target__uses_dummy_map(target))
+		cpus = cpu_map__dummy_new();
+	else
+		cpus = cpu_map__new(target->cpu_list);
+
+	if (!cpus)
+		goto out_delete_threads;
+
+	evlist->has_user_cpus = !!target->cpu_list;
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	return 0;
+
+out_delete_threads:
+	thread_map__put(threads);
+	return -1;
+}
+
+void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
+			   struct thread_map *threads)
+{
+	/*
+	 * Allow for the possibility that one or another of the maps isn't being
+	 * changed i.e. don't put it.  Note we are assuming the maps that are
+	 * being applied are brand new and evlist is taking ownership of the
+	 * original reference count of 1.  If that is not the case it is up to
+	 * the caller to increase the reference count.
+	 */
+	if (cpus != evlist->cpus) {
+		cpu_map__put(evlist->cpus);
+		evlist->cpus = cpu_map__get(cpus);
+	}
+
+	if (threads != evlist->threads) {
+		thread_map__put(evlist->threads);
+		evlist->threads = thread_map__get(threads);
+	}
+
+	perf_evlist__propagate_maps(evlist);
+}
+
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+				   enum perf_event_sample_format bit)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		__perf_evsel__set_sample_bit(evsel, bit);
+}
+
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+				     enum perf_event_sample_format bit)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		__perf_evsel__reset_sample_bit(evsel, bit);
+}
+
+int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
+{
+	struct perf_evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->filter == NULL)
+			continue;
+
+		/*
+		 * filters only work for tracepoint event, which doesn't have cpu limit.
+		 * So evlist and evsel should always be same.
+		 */
+		err = perf_evsel__apply_filter(evsel, evsel->filter);
+		if (err) {
+			*err_evsel = evsel;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
+{
+	struct perf_evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		err = perf_evsel__set_filter(evsel, filter);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
+{
+	char *filter;
+	int ret = -1;
+	size_t i;
+
+	for (i = 0; i < npids; ++i) {
+		if (i == 0) {
+			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
+				return -1;
+		} else {
+			char *tmp;
+
+			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
+				goto out_free;
+
+			free(filter);
+			filter = tmp;
+		}
+	}
+
+	ret = perf_evlist__set_filter(evlist, filter);
+out_free:
+	free(filter);
+	return ret;
+}
+
+int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
+{
+	return perf_evlist__set_filter_pids(evlist, 1, &pid);
+}
+
+bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
+{
+	struct perf_evsel *pos;
+
+	if (evlist->nr_entries == 1)
+		return true;
+
+	if (evlist->id_pos < 0 || evlist->is_pos < 0)
+		return false;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (pos->id_pos != evlist->id_pos ||
+		    pos->is_pos != evlist->is_pos)
+			return false;
+	}
+
+	return true;
+}
+
+u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	if (evlist->combined_sample_type)
+		return evlist->combined_sample_type;
+
+	evlist__for_each_entry(evlist, evsel)
+		evlist->combined_sample_type |= evsel->attr.sample_type;
+
+	return evlist->combined_sample_type;
+}
+
+u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
+{
+	evlist->combined_sample_type = 0;
+	return __perf_evlist__combined_sample_type(evlist);
+}
+
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	u64 branch_type = 0;
+
+	evlist__for_each_entry(evlist, evsel)
+		branch_type |= evsel->attr.branch_sample_type;
+	return branch_type;
+}
+
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+	u64 read_format = first->attr.read_format;
+	u64 sample_type = first->attr.sample_type;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (read_format != pos->attr.read_format)
+			return false;
+	}
+
+	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
+	if ((sample_type & PERF_SAMPLE_READ) &&
+	    !(read_format & PERF_FORMAT_ID)) {
+		return false;
+	}
+
+	return true;
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+	return first->attr.read_format;
+}
+
+u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+	struct perf_sample *data;
+	u64 sample_type;
+	u16 size = 0;
+
+	if (!first->attr.sample_id_all)
+		goto out;
+
+	sample_type = first->attr.sample_type;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		size += sizeof(data->tid) * 2;
+
+       if (sample_type & PERF_SAMPLE_TIME)
+		size += sizeof(data->time);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		size += sizeof(data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		size += sizeof(data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		size += sizeof(data->cpu) * 2;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		size += sizeof(data->id);
+out:
+	return size;
+}
+
+bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+
+	evlist__for_each_entry_continue(evlist, pos) {
+		if (first->attr.sample_id_all != pos->attr.sample_id_all)
+			return false;
+	}
+
+	return true;
+}
+
+bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+	return first->attr.sample_id_all;
+}
+
+void perf_evlist__set_selected(struct perf_evlist *evlist,
+			       struct perf_evsel *evsel)
+{
+	evlist->selected = evsel;
+}
+
+void perf_evlist__close(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry_reverse(evlist, evsel)
+		perf_evsel__close(evsel);
+}
+
+static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
+{
+	struct cpu_map	  *cpus;
+	struct thread_map *threads;
+	int err = -ENOMEM;
+
+	/*
+	 * Try reading /sys/devices/system/cpu/online to get
+	 * an all cpus map.
+	 *
+	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
+	 * code needs an overhaul to properly forward the
+	 * error, and we may not want to do that fallback to a
+	 * default cpu identity map :-\
+	 */
+	cpus = cpu_map__new(NULL);
+	if (!cpus)
+		goto out;
+
+	threads = thread_map__new_dummy();
+	if (!threads)
+		goto out_put;
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+out:
+	return err;
+out_put:
+	cpu_map__put(cpus);
+	goto out;
+}
+
+int perf_evlist__open(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	int err;
+
+	/*
+	 * Default: one fd per CPU, all threads, aka systemwide
+	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
+	 */
+	if (evlist->threads == NULL && evlist->cpus == NULL) {
+		err = perf_evlist__create_syswide_maps(evlist);
+		if (err < 0)
+			goto out_err;
+	}
+
+	perf_evlist__update_id_pos(evlist);
+
+	evlist__for_each_entry(evlist, evsel) {
+		err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
+		if (err < 0)
+			goto out_err;
+	}
+
+	return 0;
+out_err:
+	perf_evlist__close(evlist);
+	errno = -err;
+	return err;
+}
+
+int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
+				  const char *argv[], bool pipe_output,
+				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
+{
+	int child_ready_pipe[2], go_pipe[2];
+	char bf;
+
+	if (pipe(child_ready_pipe) < 0) {
+		perror("failed to create 'ready' pipe");
+		return -1;
+	}
+
+	if (pipe(go_pipe) < 0) {
+		perror("failed to create 'go' pipe");
+		goto out_close_ready_pipe;
+	}
+
+	evlist->workload.pid = fork();
+	if (evlist->workload.pid < 0) {
+		perror("failed to fork");
+		goto out_close_pipes;
+	}
+
+	if (!evlist->workload.pid) {
+		int ret;
+
+		if (pipe_output)
+			dup2(2, 1);
+
+		signal(SIGTERM, SIG_DFL);
+
+		close(child_ready_pipe[0]);
+		close(go_pipe[1]);
+		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+		/*
+		 * Tell the parent we're ready to go
+		 */
+		close(child_ready_pipe[1]);
+
+		/*
+		 * Wait until the parent tells us to go.
+		 */
+		ret = read(go_pipe[0], &bf, 1);
+		/*
+		 * The parent will ask for the execvp() to be performed by
+		 * writing exactly one byte, in workload.cork_fd, usually via
+		 * perf_evlist__start_workload().
+		 *
+		 * For cancelling the workload without actually running it,
+		 * the parent will just close workload.cork_fd, without writing
+		 * anything, i.e. read will return zero and we just exit()
+		 * here.
+		 */
+		if (ret != 1) {
+			if (ret == -1)
+				perror("unable to read pipe");
+			exit(ret);
+		}
+
+		execvp(argv[0], (char **)argv);
+
+		if (exec_error) {
+			union sigval val;
+
+			val.sival_int = errno;
+			if (sigqueue(getppid(), SIGUSR1, val))
+				perror(argv[0]);
+		} else
+			perror(argv[0]);
+		exit(-1);
+	}
+
+	if (exec_error) {
+		struct sigaction act = {
+			.sa_flags     = SA_SIGINFO,
+			.sa_sigaction = exec_error,
+		};
+		sigaction(SIGUSR1, &act, NULL);
+	}
+
+	if (target__none(target)) {
+		if (evlist->threads == NULL) {
+			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
+				__func__, __LINE__);
+			goto out_close_pipes;
+		}
+		thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
+	}
+
+	close(child_ready_pipe[1]);
+	close(go_pipe[0]);
+	/*
+	 * wait for child to settle
+	 */
+	if (read(child_ready_pipe[0], &bf, 1) == -1) {
+		perror("unable to read pipe");
+		goto out_close_pipes;
+	}
+
+	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
+	evlist->workload.cork_fd = go_pipe[1];
+	close(child_ready_pipe[0]);
+	return 0;
+
+out_close_pipes:
+	close(go_pipe[0]);
+	close(go_pipe[1]);
+out_close_ready_pipe:
+	close(child_ready_pipe[0]);
+	close(child_ready_pipe[1]);
+	return -1;
+}
+
+int perf_evlist__start_workload(struct perf_evlist *evlist)
+{
+	if (evlist->workload.cork_fd > 0) {
+		char bf = 0;
+		int ret;
+		/*
+		 * Remove the cork, let it rip!
+		 */
+		ret = write(evlist->workload.cork_fd, &bf, 1);
+		if (ret < 0)
+			perror("unable to write to pipe");
+
+		close(evlist->workload.cork_fd);
+		return ret;
+	}
+
+	return 0;
+}
+
+int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
+			      struct perf_sample *sample)
+{
+	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+	if (!evsel)
+		return -EFAULT;
+	return perf_evsel__parse_sample(evsel, event, sample);
+}
+
+int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
+					union perf_event *event,
+					u64 *timestamp)
+{
+	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+	if (!evsel)
+		return -EFAULT;
+	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
+}
+
+size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
+{
+	struct perf_evsel *evsel;
+	size_t printed = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
+				   perf_evsel__name(evsel));
+	}
+
+	return printed + fprintf(fp, "\n");
+}
+
+int perf_evlist__strerror_open(struct perf_evlist *evlist,
+			       int err, char *buf, size_t size)
+{
+	int printed, value;
+	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+
+	switch (err) {
+	case EACCES:
+	case EPERM:
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
+
+		value = perf_event_paranoid();
+
+		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
+
+		if (value >= 2) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "For your workloads it needs to be <= 1\nHint:\t");
+		}
+		printed += scnprintf(buf + printed, size - printed,
+				     "For system wide tracing it needs to be set to -1.\n");
+
+		printed += scnprintf(buf + printed, size - printed,
+				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
+				    "Hint:\tThe current value is %d.", value);
+		break;
+	case EINVAL: {
+		struct perf_evsel *first = perf_evlist__first(evlist);
+		int max_freq;
+
+		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+			goto out_default;
+
+		if (first->attr.sample_freq < (u64)max_freq)
+			goto out_default;
+
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+				    emsg, max_freq, first->attr.sample_freq);
+		break;
+	}
+	default:
+out_default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
+
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
+
+	switch (err) {
+	case EPERM:
+		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
+		printed += scnprintf(buf + printed, size - printed,
+				     "Error:\t%s.\n"
+				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
+				     "Hint:\tTried using %zd kB.\n",
+				     emsg, pages_max_per_user, pages_attempted);
+
+		if (pages_attempted >= pages_max_per_user) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
+					     pages_max_per_user + pages_attempted);
+		}
+
+		printed += scnprintf(buf + printed, size - printed,
+				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
+		break;
+	default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
+
+void perf_evlist__to_front(struct perf_evlist *evlist,
+			   struct perf_evsel *move_evsel)
+{
+	struct perf_evsel *evsel, *n;
+	LIST_HEAD(move);
+
+	if (move_evsel == perf_evlist__first(evlist))
+		return;
+
+	evlist__for_each_entry_safe(evlist, n, evsel) {
+		if (evsel->leader == move_evsel->leader)
+			list_move_tail(&evsel->node, &move);
+	}
+
+	list_splice(&move, &evlist->entries);
+}
+
+void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
+				     struct perf_evsel *tracking_evsel)
+{
+	struct perf_evsel *evsel;
+
+	if (tracking_evsel->tracking)
+		return;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel != tracking_evsel)
+			evsel->tracking = false;
+	}
+
+	tracking_evsel->tracking = true;
+}
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
+			       const char *str)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->name)
+			continue;
+		if (strcmp(str, evsel->name) == 0)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
+				  enum bkw_mmap_state state)
+{
+	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
+	enum action {
+		NONE,
+		PAUSE,
+		RESUME,
+	} action = NONE;
+
+	if (!evlist->overwrite_mmap)
+		return;
+
+	switch (old_state) {
+	case BKW_MMAP_NOTREADY: {
+		if (state != BKW_MMAP_RUNNING)
+			goto state_err;
+		break;
+	}
+	case BKW_MMAP_RUNNING: {
+		if (state != BKW_MMAP_DATA_PENDING)
+			goto state_err;
+		action = PAUSE;
+		break;
+	}
+	case BKW_MMAP_DATA_PENDING: {
+		if (state != BKW_MMAP_EMPTY)
+			goto state_err;
+		break;
+	}
+	case BKW_MMAP_EMPTY: {
+		if (state != BKW_MMAP_RUNNING)
+			goto state_err;
+		action = RESUME;
+		break;
+	}
+	default:
+		WARN_ONCE(1, "Shouldn't get there\n");
+	}
+
+	evlist->bkw_mmap_state = state;
+
+	switch (action) {
+	case PAUSE:
+		perf_evlist__pause(evlist);
+		break;
+	case RESUME:
+		perf_evlist__resume(evlist);
+		break;
+	case NONE:
+	default:
+		break;
+	}
+
+state_err:
+	return;
+}
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->attr.exclude_kernel)
+			return false;
+	}
+
+	return true;
+}
diff --git a/util/evlist.h b/util/evlist.h
new file mode 100644
index 0000000..6c41b2f
--- /dev/null
+++ b/util/evlist.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <api/fd/array.h>
+#include <stdio.h>
+#include "../perf.h"
+#include "event.h"
+#include "evsel.h"
+#include "mmap.h"
+#include "util.h"
+#include <signal.h>
+#include <unistd.h>
+
+struct pollfd;
+struct thread_map;
+struct cpu_map;
+struct record_opts;
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+
+struct perf_evlist {
+	struct list_head entries;
+	struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+	int		 nr_entries;
+	int		 nr_groups;
+	int		 nr_mmaps;
+	bool		 enabled;
+	bool		 has_user_cpus;
+	size_t		 mmap_len;
+	int		 id_pos;
+	int		 is_pos;
+	u64		 combined_sample_type;
+	enum bkw_mmap_state bkw_mmap_state;
+	struct {
+		int	cork_fd;
+		pid_t	pid;
+	} workload;
+	struct fdarray	 pollfd;
+	struct perf_mmap *mmap;
+	struct perf_mmap *overwrite_mmap;
+	struct thread_map *threads;
+	struct cpu_map	  *cpus;
+	struct perf_evsel *selected;
+	struct events_stats stats;
+	struct perf_env	*env;
+	u64		first_sample_time;
+	u64		last_sample_time;
+};
+
+struct perf_evsel_str_handler {
+	const char *name;
+	void	   *handler;
+};
+
+struct perf_evlist *perf_evlist__new(void);
+struct perf_evlist *perf_evlist__new_default(void);
+struct perf_evlist *perf_evlist__new_dummy(void);
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+		       struct thread_map *threads);
+void perf_evlist__exit(struct perf_evlist *evlist);
+void perf_evlist__delete(struct perf_evlist *evlist);
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
+void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel);
+
+int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise);
+
+static inline int perf_evlist__add_default(struct perf_evlist *evlist)
+{
+	return __perf_evlist__add_default(evlist, true);
+}
+
+int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
+				     struct perf_event_attr *attrs, size_t nr_attrs);
+
+#define perf_evlist__add_default_attrs(evlist, array) \
+	__perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+
+int perf_evlist__add_dummy(struct perf_evlist *evlist);
+
+int perf_evlist__add_newtp(struct perf_evlist *evlist,
+			   const char *sys, const char *name, void *handler);
+
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+				   enum perf_event_sample_format bit);
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+				     enum perf_event_sample_format bit);
+
+#define perf_evlist__set_sample_bit(evlist, bit) \
+	__perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+#define perf_evlist__reset_sample_bit(evlist, bit) \
+	__perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
+int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
+int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
+				     const char *name);
+
+void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
+			 int cpu, int thread, u64 id);
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+			   struct perf_evsel *evsel,
+			   int cpu, int thread, int fd);
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask);
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
+struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
+						u64 id);
+
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
+
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
+
+void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
+
+int perf_evlist__open(struct perf_evlist *evlist);
+void perf_evlist__close(struct perf_evlist *evlist);
+
+struct callchain_param;
+
+void perf_evlist__set_id_pos(struct perf_evlist *evlist);
+bool perf_can_sample_identifier(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_record_cpu_wide(void);
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+			 struct callchain_param *callchain);
+int record_opts__config(struct record_opts *opts);
+
+int perf_evlist__prepare_workload(struct perf_evlist *evlist,
+				  struct target *target,
+				  const char *argv[], bool pipe_output,
+				  void (*exec_error)(int signo, siginfo_t *info,
+						     void *ucontext));
+int perf_evlist__start_workload(struct perf_evlist *evlist);
+
+struct option;
+
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
+int perf_evlist__parse_mmap_pages(const struct option *opt,
+				  const char *str,
+				  int unset);
+
+unsigned long perf_event_mlock_kb_in_pages(void);
+
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+			 unsigned int auxtrace_pages,
+			 bool auxtrace_overwrite);
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
+void perf_evlist__munmap(struct perf_evlist *evlist);
+
+size_t perf_evlist__mmap_size(unsigned long pages);
+
+void perf_evlist__disable(struct perf_evlist *evlist);
+void perf_evlist__enable(struct perf_evlist *evlist);
+void perf_evlist__toggle_enable(struct perf_evlist *evlist);
+
+int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
+				  struct perf_evsel *evsel, int idx);
+
+void perf_evlist__set_selected(struct perf_evlist *evlist,
+			       struct perf_evsel *evsel);
+
+void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
+			   struct thread_map *threads);
+int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
+int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
+
+void __perf_evlist__set_leader(struct list_head *list);
+void perf_evlist__set_leader(struct perf_evlist *evlist);
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
+u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
+bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
+u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
+
+int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
+			      struct perf_sample *sample);
+
+int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
+					union perf_event *event,
+					u64 *timestamp);
+
+bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);
+bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
+
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+				   struct list_head *list);
+
+static inline bool perf_evlist__empty(struct perf_evlist *evlist)
+{
+	return list_empty(&evlist->entries);
+}
+
+static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
+{
+	return list_entry(evlist->entries.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
+{
+	return list_entry(evlist->entries.prev, struct perf_evsel, node);
+}
+
+size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
+
+int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
+
+bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str);
+void perf_evlist__to_front(struct perf_evlist *evlist,
+			   struct perf_evsel *move_evsel);
+
+/**
+ * __evlist__for_each_entry - iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry(list, evsel) \
+        list_for_each_entry(evsel, list, node)
+
+/**
+ * evlist__for_each_entry - iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry(evlist, evsel) \
+	__evlist__for_each_entry(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_continue(list, evsel) \
+        list_for_each_entry_continue(evsel, list, node)
+
+/**
+ * evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_continue(evlist, evsel) \
+	__evlist__for_each_entry_continue(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_reverse(list, evsel) \
+        list_for_each_entry_reverse(evsel, list, node)
+
+/**
+ * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_reverse(evlist, evsel) \
+	__evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_safe(list, tmp, evsel) \
+        list_for_each_entry_safe(evsel, tmp, list, node)
+
+/**
+ * evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+	__evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
+
+void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
+				     struct perf_evsel *tracking_evsel);
+
+void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
+
+struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+					    union perf_event *event);
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
+#endif /* __PERF_EVLIST_H */
diff --git a/util/evsel.c b/util/evsel.c
new file mode 100644
index 0000000..4cd2cf9
--- /dev/null
+++ b/util/evsel.c
@@ -0,0 +1,2916 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <byteswap.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <traceevent/event-parse.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include "asm/bug.h"
+#include "callchain.h"
+#include "cgroup.h"
+#include "event.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "util.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "target.h"
+#include "perf_regs.h"
+#include "debug.h"
+#include "trace-event.h"
+#include "stat.h"
+#include "memswap.h"
+#include "util/parse-branch-options.h"
+
+#include "sane_ctype.h"
+
+struct perf_missing_features perf_missing_features;
+
+static clockid_t clockid;
+
+static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused)
+{
+	return 0;
+}
+
+void __weak test_attr__ready(void) { }
+
+static void perf_evsel__no_extra_fini(struct perf_evsel *evsel __maybe_unused)
+{
+}
+
+static struct {
+	size_t	size;
+	int	(*init)(struct perf_evsel *evsel);
+	void	(*fini)(struct perf_evsel *evsel);
+} perf_evsel__object = {
+	.size = sizeof(struct perf_evsel),
+	.init = perf_evsel__no_extra_init,
+	.fini = perf_evsel__no_extra_fini,
+};
+
+int perf_evsel__object_config(size_t object_size,
+			      int (*init)(struct perf_evsel *evsel),
+			      void (*fini)(struct perf_evsel *evsel))
+{
+
+	if (object_size == 0)
+		goto set_methods;
+
+	if (perf_evsel__object.size > object_size)
+		return -EINVAL;
+
+	perf_evsel__object.size = object_size;
+
+set_methods:
+	if (init != NULL)
+		perf_evsel__object.init = init;
+
+	if (fini != NULL)
+		perf_evsel__object.fini = fini;
+
+	return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+int __perf_evsel__sample_size(u64 sample_type)
+{
+	u64 mask = sample_type & PERF_SAMPLE_MASK;
+	int size = 0;
+	int i;
+
+	for (i = 0; i < 64; i++) {
+		if (mask & (1ULL << i))
+			size++;
+	}
+
+	size *= sizeof(u64);
+
+	return size;
+}
+
+/**
+ * __perf_evsel__calc_id_pos - calculate id_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
+ * sample_event.
+ */
+static int __perf_evsel__calc_id_pos(u64 sample_type)
+{
+	int idx = 0;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		return 0;
+
+	if (!(sample_type & PERF_SAMPLE_ID))
+		return -1;
+
+	if (sample_type & PERF_SAMPLE_IP)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		idx += 1;
+
+	return idx;
+}
+
+/**
+ * __perf_evsel__calc_is_pos - calculate is_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position (counting backwards) of the event id
+ * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
+ * sample_id_all is used there is an id sample appended to non-sample events.
+ */
+static int __perf_evsel__calc_is_pos(u64 sample_type)
+{
+	int idx = 1;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		return 1;
+
+	if (!(sample_type & PERF_SAMPLE_ID))
+		return -1;
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		idx += 1;
+
+	return idx;
+}
+
+void perf_evsel__calc_id_pos(struct perf_evsel *evsel)
+{
+	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type);
+	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type);
+}
+
+void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
+				  enum perf_event_sample_format bit)
+{
+	if (!(evsel->attr.sample_type & bit)) {
+		evsel->attr.sample_type |= bit;
+		evsel->sample_size += sizeof(u64);
+		perf_evsel__calc_id_pos(evsel);
+	}
+}
+
+void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
+				    enum perf_event_sample_format bit)
+{
+	if (evsel->attr.sample_type & bit) {
+		evsel->attr.sample_type &= ~bit;
+		evsel->sample_size -= sizeof(u64);
+		perf_evsel__calc_id_pos(evsel);
+	}
+}
+
+void perf_evsel__set_sample_id(struct perf_evsel *evsel,
+			       bool can_sample_identifier)
+{
+	if (can_sample_identifier) {
+		perf_evsel__reset_sample_bit(evsel, ID);
+		perf_evsel__set_sample_bit(evsel, IDENTIFIER);
+	} else {
+		perf_evsel__set_sample_bit(evsel, ID);
+	}
+	evsel->attr.read_format |= PERF_FORMAT_ID;
+}
+
+/**
+ * perf_evsel__is_function_event - Return whether given evsel is a function
+ * trace event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if event is function trace event
+ */
+bool perf_evsel__is_function_event(struct perf_evsel *evsel)
+{
+#define FUNCTION_EVENT "ftrace:function"
+
+	return evsel->name &&
+	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
+
+#undef FUNCTION_EVENT
+}
+
+void perf_evsel__init(struct perf_evsel *evsel,
+		      struct perf_event_attr *attr, int idx)
+{
+	evsel->idx	   = idx;
+	evsel->tracking	   = !idx;
+	evsel->attr	   = *attr;
+	evsel->leader	   = evsel;
+	evsel->unit	   = "";
+	evsel->scale	   = 1.0;
+	evsel->evlist	   = NULL;
+	evsel->bpf_fd	   = -1;
+	INIT_LIST_HEAD(&evsel->node);
+	INIT_LIST_HEAD(&evsel->config_terms);
+	perf_evsel__object.init(evsel);
+	evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
+	perf_evsel__calc_id_pos(evsel);
+	evsel->cmdline_group_boundary = false;
+	evsel->metric_expr   = NULL;
+	evsel->metric_name   = NULL;
+	evsel->metric_events = NULL;
+	evsel->collect_stat  = false;
+	evsel->pmu_name      = NULL;
+}
+
+struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
+{
+	struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
+
+	if (evsel != NULL)
+		perf_evsel__init(evsel, attr, idx);
+
+	if (perf_evsel__is_bpf_output(evsel)) {
+		evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+		evsel->attr.sample_period = 1;
+	}
+
+	return evsel;
+}
+
+static bool perf_event_can_profile_kernel(void)
+{
+	return geteuid() == 0 || perf_event_paranoid() == -1;
+}
+
+struct perf_evsel *perf_evsel__new_cycles(bool precise)
+{
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_HARDWARE,
+		.config	= PERF_COUNT_HW_CPU_CYCLES,
+		.exclude_kernel	= !perf_event_can_profile_kernel(),
+	};
+	struct perf_evsel *evsel;
+
+	event_attr_init(&attr);
+
+	if (!precise)
+		goto new_event;
+	/*
+	 * Unnamed union member, not supported as struct member named
+	 * initializer in older compilers such as gcc 4.4.7
+	 *
+	 * Just for probing the precise_ip:
+	 */
+	attr.sample_period = 1;
+
+	perf_event_attr__set_max_precise_ip(&attr);
+	/*
+	 * Now let the usual logic to set up the perf_event_attr defaults
+	 * to kick in when we return and before perf_evsel__open() is called.
+	 */
+	attr.sample_period = 0;
+new_event:
+	evsel = perf_evsel__new(&attr);
+	if (evsel == NULL)
+		goto out;
+
+	/* use asprintf() because free(evsel) assumes name is allocated */
+	if (asprintf(&evsel->name, "cycles%s%s%.*s",
+		     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
+		     attr.exclude_kernel ? "u" : "",
+		     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
+		goto error_free;
+out:
+	return evsel;
+error_free:
+	perf_evsel__delete(evsel);
+	evsel = NULL;
+	goto out;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
+{
+	struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
+	int err = -ENOMEM;
+
+	if (evsel == NULL) {
+		goto out_err;
+	} else {
+		struct perf_event_attr attr = {
+			.type	       = PERF_TYPE_TRACEPOINT,
+			.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+					  PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+		};
+
+		if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
+			goto out_free;
+
+		evsel->tp_format = trace_event__tp_format(sys, name);
+		if (IS_ERR(evsel->tp_format)) {
+			err = PTR_ERR(evsel->tp_format);
+			goto out_free;
+		}
+
+		event_attr_init(&attr);
+		attr.config = evsel->tp_format->id;
+		attr.sample_period = 1;
+		perf_evsel__init(evsel, &attr, idx);
+	}
+
+	return evsel;
+
+out_free:
+	zfree(&evsel->name);
+	free(evsel);
+out_err:
+	return ERR_PTR(err);
+}
+
+const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
+	"cycles",
+	"instructions",
+	"cache-references",
+	"cache-misses",
+	"branches",
+	"branch-misses",
+	"bus-cycles",
+	"stalled-cycles-frontend",
+	"stalled-cycles-backend",
+	"ref-cycles",
+};
+
+static const char *__perf_evsel__hw_name(u64 config)
+{
+	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
+		return perf_evsel__hw_names[config];
+
+	return "unknown-hardware";
+}
+
+static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int colon = 0, r = 0;
+	struct perf_event_attr *attr = &evsel->attr;
+	bool exclude_guest_default = false;
+
+#define MOD_PRINT(context, mod)	do {					\
+		if (!attr->exclude_##context) {				\
+			if (!colon) colon = ++r;			\
+			r += scnprintf(bf + r, size - r, "%c", mod);	\
+		} } while(0)
+
+	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
+		MOD_PRINT(kernel, 'k');
+		MOD_PRINT(user, 'u');
+		MOD_PRINT(hv, 'h');
+		exclude_guest_default = true;
+	}
+
+	if (attr->precise_ip) {
+		if (!colon)
+			colon = ++r;
+		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
+		exclude_guest_default = true;
+	}
+
+	if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
+		MOD_PRINT(host, 'H');
+		MOD_PRINT(guest, 'G');
+	}
+#undef MOD_PRINT
+	if (colon)
+		bf[colon - 1] = ':';
+	return r;
+}
+
+static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+	"cpu-clock",
+	"task-clock",
+	"page-faults",
+	"context-switches",
+	"cpu-migrations",
+	"minor-faults",
+	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
+	"dummy",
+};
+
+static const char *__perf_evsel__sw_name(u64 config)
+{
+	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
+		return perf_evsel__sw_names[config];
+	return "unknown-software";
+}
+
+static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+{
+	int r;
+
+	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
+
+	if (type & HW_BREAKPOINT_R)
+		r += scnprintf(bf + r, size - r, "r");
+
+	if (type & HW_BREAKPOINT_W)
+		r += scnprintf(bf + r, size - r, "w");
+
+	if (type & HW_BREAKPOINT_X)
+		r += scnprintf(bf + r, size - r, "x");
+
+	return r;
+}
+
+static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_EVSEL__MAX_ALIASES] = {
+ { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
+ { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
+ { "LLC",	"L2",							},
+ { "dTLB",	"d-tlb",	"Data-TLB",				},
+ { "iTLB",	"i-tlb",	"Instruction-TLB",			},
+ { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
+ { "node",								},
+};
+
+const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+				   [PERF_EVSEL__MAX_ALIASES] = {
+ { "load",	"loads",	"read",					},
+ { "store",	"stores",	"write",				},
+ { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
+};
+
+const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+				       [PERF_EVSEL__MAX_ALIASES] = {
+ { "refs",	"Reference",	"ops",		"access",		},
+ { "misses",	"miss",							},
+};
+
+#define C(x)		PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ	(1 << C(OP_READ))
+#define CACHE_WRITE	(1 << C(OP_WRITE))
+#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
+#define COP(x)		(1 << x)
+
+/*
+ * cache operartion stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+ [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)]	= (CACHE_READ),
+ [C(BPU)]	= (CACHE_READ),
+ [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+};
+
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+{
+	if (perf_evsel__hw_cache_stat[type] & COP(op))
+		return true;	/* valid */
+	else
+		return false;	/* invalid */
+}
+
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+					    char *bf, size_t size)
+{
+	if (result) {
+		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
+				 perf_evsel__hw_cache_op[op][0],
+				 perf_evsel__hw_cache_result[result][0]);
+	}
+
+	return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
+			 perf_evsel__hw_cache_op[op][1]);
+}
+
+static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+{
+	u8 op, result, type = (config >>  0) & 0xff;
+	const char *err = "unknown-ext-hardware-cache-type";
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX)
+		goto out_err;
+
+	op = (config >>  8) & 0xff;
+	err = "unknown-ext-hardware-cache-op";
+	if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		goto out_err;
+
+	result = (config >> 16) & 0xff;
+	err = "unknown-ext-hardware-cache-result";
+	if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		goto out_err;
+
+	err = "invalid-cache";
+	if (!perf_evsel__is_cache_op_valid(type, op))
+		goto out_err;
+
+	return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+out_err:
+	return scnprintf(bf, size, "%s", err);
+}
+
+static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
+	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
+	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+const char *perf_evsel__name(struct perf_evsel *evsel)
+{
+	char bf[128];
+
+	if (evsel->name)
+		return evsel->name;
+
+	switch (evsel->attr.type) {
+	case PERF_TYPE_RAW:
+		perf_evsel__raw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_HARDWARE:
+		perf_evsel__hw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_HW_CACHE:
+		perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		perf_evsel__sw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
+		break;
+
+	case PERF_TYPE_BREAKPOINT:
+		perf_evsel__bp_name(evsel, bf, sizeof(bf));
+		break;
+
+	default:
+		scnprintf(bf, sizeof(bf), "unknown attr type: %d",
+			  evsel->attr.type);
+		break;
+	}
+
+	evsel->name = strdup(bf);
+
+	return evsel->name ?: "unknown";
+}
+
+const char *perf_evsel__group_name(struct perf_evsel *evsel)
+{
+	return evsel->group_name ?: "anon group";
+}
+
+/*
+ * Returns the group details for the specified leader,
+ * with following rules.
+ *
+ *  For record -e '{cycles,instructions}'
+ *    'anon group { cycles:u, instructions:u }'
+ *
+ *  For record -e 'cycles,instructions' and report --group
+ *    'cycles:u, instructions:u'
+ */
+int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
+{
+	int ret = 0;
+	struct perf_evsel *pos;
+	const char *group_name = perf_evsel__group_name(evsel);
+
+	if (!evsel->forced_leader)
+		ret = scnprintf(buf, size, "%s { ", group_name);
+
+	ret += scnprintf(buf + ret, size - ret, "%s",
+			 perf_evsel__name(evsel));
+
+	for_each_group_member(pos, evsel)
+		ret += scnprintf(buf + ret, size - ret, ", %s",
+				 perf_evsel__name(pos));
+
+	if (!evsel->forced_leader)
+		ret += scnprintf(buf + ret, size - ret, " }");
+
+	return ret;
+}
+
+static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
+					   struct record_opts *opts,
+					   struct callchain_param *param)
+{
+	bool function = perf_evsel__is_function_event(evsel);
+	struct perf_event_attr *attr = &evsel->attr;
+
+	perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+
+	attr->sample_max_stack = param->max_stack;
+
+	if (param->record_mode == CALLCHAIN_LBR) {
+		if (!opts->branch_stack) {
+			if (attr->exclude_user) {
+				pr_warning("LBR callstack option is only available "
+					   "to get user callchain information. "
+					   "Falling back to framepointers.\n");
+			} else {
+				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+							PERF_SAMPLE_BRANCH_CALL_STACK |
+							PERF_SAMPLE_BRANCH_NO_CYCLES |
+							PERF_SAMPLE_BRANCH_NO_FLAGS;
+			}
+		} else
+			 pr_warning("Cannot use LBR callstack with branch stack. "
+				    "Falling back to framepointers.\n");
+	}
+
+	if (param->record_mode == CALLCHAIN_DWARF) {
+		if (!function) {
+			perf_evsel__set_sample_bit(evsel, REGS_USER);
+			perf_evsel__set_sample_bit(evsel, STACK_USER);
+			attr->sample_regs_user |= PERF_REGS_MASK;
+			attr->sample_stack_user = param->dump_size;
+			attr->exclude_callchain_user = 1;
+		} else {
+			pr_info("Cannot use DWARF unwind for function trace event,"
+				" falling back to framepointers.\n");
+		}
+	}
+
+	if (function) {
+		pr_info("Disabling user space callchains for function trace event.\n");
+		attr->exclude_callchain_user = 1;
+	}
+}
+
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+				  struct record_opts *opts,
+				  struct callchain_param *param)
+{
+	if (param->enabled)
+		return __perf_evsel__config_callchain(evsel, opts, param);
+}
+
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+			    struct callchain_param *param)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+	if (param->record_mode == CALLCHAIN_LBR) {
+		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+					      PERF_SAMPLE_BRANCH_CALL_STACK);
+	}
+	if (param->record_mode == CALLCHAIN_DWARF) {
+		perf_evsel__reset_sample_bit(evsel, REGS_USER);
+		perf_evsel__reset_sample_bit(evsel, STACK_USER);
+	}
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+			       struct record_opts *opts, bool track)
+{
+	struct perf_evsel_config_term *term;
+	struct list_head *config_terms = &evsel->config_terms;
+	struct perf_event_attr *attr = &evsel->attr;
+	/* callgraph default */
+	struct callchain_param param = {
+		.record_mode = callchain_param.record_mode,
+	};
+	u32 dump_size = 0;
+	int max_stack = 0;
+	const char *callgraph_buf = NULL;
+
+	list_for_each_entry(term, config_terms, list) {
+		switch (term->type) {
+		case PERF_EVSEL__CONFIG_TERM_PERIOD:
+			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
+				attr->sample_period = term->val.period;
+				attr->freq = 0;
+				perf_evsel__reset_sample_bit(evsel, PERIOD);
+			}
+			break;
+		case PERF_EVSEL__CONFIG_TERM_FREQ:
+			if (!(term->weak && opts->user_freq != UINT_MAX)) {
+				attr->sample_freq = term->val.freq;
+				attr->freq = 1;
+				perf_evsel__set_sample_bit(evsel, PERIOD);
+			}
+			break;
+		case PERF_EVSEL__CONFIG_TERM_TIME:
+			if (term->val.time)
+				perf_evsel__set_sample_bit(evsel, TIME);
+			else
+				perf_evsel__reset_sample_bit(evsel, TIME);
+			break;
+		case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+			callgraph_buf = term->val.callgraph;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_BRANCH:
+			if (term->val.branch && strcmp(term->val.branch, "no")) {
+				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+				parse_branch_str(term->val.branch,
+						 &attr->branch_sample_type);
+			} else
+				perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+			break;
+		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+			dump_size = term->val.stack_user;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+			max_stack = term->val.max_stack;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_INHERIT:
+			/*
+			 * attr->inherit should has already been set by
+			 * perf_evsel__config. If user explicitly set
+			 * inherit using config terms, override global
+			 * opt->no_inherit setting.
+			 */
+			attr->inherit = term->val.inherit ? 1 : 0;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+			attr->write_backward = term->val.overwrite ? 1 : 0;
+			break;
+		case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* User explicitly set per-event callgraph, clear the old setting and reset. */
+	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+		bool sample_address = false;
+
+		if (max_stack) {
+			param.max_stack = max_stack;
+			if (callgraph_buf == NULL)
+				callgraph_buf = "fp";
+		}
+
+		/* parse callgraph parameters */
+		if (callgraph_buf != NULL) {
+			if (!strcmp(callgraph_buf, "no")) {
+				param.enabled = false;
+				param.record_mode = CALLCHAIN_NONE;
+			} else {
+				param.enabled = true;
+				if (parse_callchain_record(callgraph_buf, &param)) {
+					pr_err("per-event callgraph setting for %s failed. "
+					       "Apply callgraph global setting for it\n",
+					       evsel->name);
+					return;
+				}
+				if (param.record_mode == CALLCHAIN_DWARF)
+					sample_address = true;
+			}
+		}
+		if (dump_size > 0) {
+			dump_size = round_up(dump_size, sizeof(u64));
+			param.dump_size = dump_size;
+		}
+
+		/* If global callgraph set, clear it */
+		if (callchain_param.enabled)
+			perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+		/* set perf-event callgraph */
+		if (param.enabled) {
+			if (sample_address) {
+				perf_evsel__set_sample_bit(evsel, ADDR);
+				perf_evsel__set_sample_bit(evsel, DATA_SRC);
+				evsel->attr.mmap_data = track;
+			}
+			perf_evsel__config_callchain(evsel, opts, &param);
+		}
+	}
+}
+
+/*
+ * The enable_on_exec/disabled value strategy:
+ *
+ *  1) For any type of traced program:
+ *    - all independent events and group leaders are disabled
+ *    - all group members are enabled
+ *
+ *     Group members are ruled by group leaders. They need to
+ *     be enabled, because the group scheduling relies on that.
+ *
+ *  2) For traced programs executed by perf:
+ *     - all independent events and group leaders have
+ *       enable_on_exec set
+ *     - we don't specifically enable or disable any event during
+ *       the record command
+ *
+ *     Independent events and group leaders are initially disabled
+ *     and get enabled by exec. Group members are ruled by group
+ *     leaders as stated in 1).
+ *
+ *  3) For traced programs attached by perf (pid/tid):
+ *     - we specifically enable or disable all events during
+ *       the record command
+ *
+ *     When attaching events to already running traced we
+ *     enable/disable events specifically, as there's no
+ *     initial traced exec call.
+ */
+void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
+			struct callchain_param *callchain)
+{
+	struct perf_evsel *leader = evsel->leader;
+	struct perf_event_attr *attr = &evsel->attr;
+	int track = evsel->tracking;
+	bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
+
+	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
+	attr->inherit	    = !opts->no_inherit;
+	attr->write_backward = opts->overwrite ? 1 : 0;
+
+	perf_evsel__set_sample_bit(evsel, IP);
+	perf_evsel__set_sample_bit(evsel, TID);
+
+	if (evsel->sample_read) {
+		perf_evsel__set_sample_bit(evsel, READ);
+
+		/*
+		 * We need ID even in case of single event, because
+		 * PERF_SAMPLE_READ process ID specific data.
+		 */
+		perf_evsel__set_sample_id(evsel, false);
+
+		/*
+		 * Apply group format only if we belong to group
+		 * with more than one members.
+		 */
+		if (leader->nr_members > 1) {
+			attr->read_format |= PERF_FORMAT_GROUP;
+			attr->inherit = 0;
+		}
+	}
+
+	/*
+	 * We default some events to have a default interval. But keep
+	 * it a weak assumption overridable by the user.
+	 */
+	if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
+				     opts->user_interval != ULLONG_MAX)) {
+		if (opts->freq) {
+			perf_evsel__set_sample_bit(evsel, PERIOD);
+			attr->freq		= 1;
+			attr->sample_freq	= opts->freq;
+		} else {
+			attr->sample_period = opts->default_interval;
+		}
+	}
+
+	/*
+	 * Disable sampling for all group members other
+	 * than leader in case leader 'leads' the sampling.
+	 */
+	if ((leader != evsel) && leader->sample_read) {
+		attr->freq           = 0;
+		attr->sample_freq    = 0;
+		attr->sample_period  = 0;
+		attr->write_backward = 0;
+		attr->sample_id_all  = 0;
+	}
+
+	if (opts->no_samples)
+		attr->sample_freq = 0;
+
+	if (opts->inherit_stat) {
+		evsel->attr.read_format |=
+			PERF_FORMAT_TOTAL_TIME_ENABLED |
+			PERF_FORMAT_TOTAL_TIME_RUNNING |
+			PERF_FORMAT_ID;
+		attr->inherit_stat = 1;
+	}
+
+	if (opts->sample_address) {
+		perf_evsel__set_sample_bit(evsel, ADDR);
+		attr->mmap_data = track;
+	}
+
+	/*
+	 * We don't allow user space callchains for  function trace
+	 * event, due to issues with page faults while tracing page
+	 * fault handler and its overall trickiness nature.
+	 */
+	if (perf_evsel__is_function_event(evsel))
+		evsel->attr.exclude_callchain_user = 1;
+
+	if (callchain && callchain->enabled && !evsel->no_aux_samples)
+		perf_evsel__config_callchain(evsel, opts, callchain);
+
+	if (opts->sample_intr_regs) {
+		attr->sample_regs_intr = opts->sample_intr_regs;
+		perf_evsel__set_sample_bit(evsel, REGS_INTR);
+	}
+
+	if (opts->sample_user_regs) {
+		attr->sample_regs_user |= opts->sample_user_regs;
+		perf_evsel__set_sample_bit(evsel, REGS_USER);
+	}
+
+	if (target__has_cpu(&opts->target) || opts->sample_cpu)
+		perf_evsel__set_sample_bit(evsel, CPU);
+
+	/*
+	 * When the user explicitly disabled time don't force it here.
+	 */
+	if (opts->sample_time &&
+	    (!perf_missing_features.sample_id_all &&
+	    (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
+	     opts->sample_time_set)))
+		perf_evsel__set_sample_bit(evsel, TIME);
+
+	if (opts->raw_samples && !evsel->no_aux_samples) {
+		perf_evsel__set_sample_bit(evsel, TIME);
+		perf_evsel__set_sample_bit(evsel, RAW);
+		perf_evsel__set_sample_bit(evsel, CPU);
+	}
+
+	if (opts->sample_address)
+		perf_evsel__set_sample_bit(evsel, DATA_SRC);
+
+	if (opts->sample_phys_addr)
+		perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+
+	if (opts->no_buffering) {
+		attr->watermark = 0;
+		attr->wakeup_events = 1;
+	}
+	if (opts->branch_stack && !evsel->no_aux_samples) {
+		perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+		attr->branch_sample_type = opts->branch_stack;
+	}
+
+	if (opts->sample_weight)
+		perf_evsel__set_sample_bit(evsel, WEIGHT);
+
+	attr->task  = track;
+	attr->mmap  = track;
+	attr->mmap2 = track && !perf_missing_features.mmap2;
+	attr->comm  = track;
+
+	if (opts->record_namespaces)
+		attr->namespaces  = track;
+
+	if (opts->record_switch_events)
+		attr->context_switch = track;
+
+	if (opts->sample_transaction)
+		perf_evsel__set_sample_bit(evsel, TRANSACTION);
+
+	if (opts->running_time) {
+		evsel->attr.read_format |=
+			PERF_FORMAT_TOTAL_TIME_ENABLED |
+			PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
+	/*
+	 * XXX see the function comment above
+	 *
+	 * Disabling only independent events or group leaders,
+	 * keeping group members enabled.
+	 */
+	if (perf_evsel__is_group_leader(evsel))
+		attr->disabled = 1;
+
+	/*
+	 * Setting enable_on_exec for independent events and
+	 * group leaders for traced executed by perf.
+	 */
+	if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
+		!opts->initial_delay)
+		attr->enable_on_exec = 1;
+
+	if (evsel->immediate) {
+		attr->disabled = 0;
+		attr->enable_on_exec = 0;
+	}
+
+	clockid = opts->clockid;
+	if (opts->use_clockid) {
+		attr->use_clockid = 1;
+		attr->clockid = opts->clockid;
+	}
+
+	if (evsel->precise_max)
+		perf_event_attr__set_max_precise_ip(attr);
+
+	if (opts->all_user) {
+		attr->exclude_kernel = 1;
+		attr->exclude_user   = 0;
+	}
+
+	if (opts->all_kernel) {
+		attr->exclude_kernel = 0;
+		attr->exclude_user   = 1;
+	}
+
+	/*
+	 * Apply event specific term settings,
+	 * it overloads any global configuration.
+	 */
+	apply_config_terms(evsel, opts, track);
+
+	evsel->ignore_missing_thread = opts->ignore_missing_thread;
+
+	/* The --period option takes the precedence. */
+	if (opts->period_set) {
+		if (opts->period)
+			perf_evsel__set_sample_bit(evsel, PERIOD);
+		else
+			perf_evsel__reset_sample_bit(evsel, PERIOD);
+	}
+}
+
+static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	if (evsel->system_wide)
+		nthreads = 1;
+
+	evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
+
+	if (evsel->fd) {
+		int cpu, thread;
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			for (thread = 0; thread < nthreads; thread++) {
+				FD(evsel, cpu, thread) = -1;
+			}
+		}
+	}
+
+	return evsel->fd != NULL ? 0 : -ENOMEM;
+}
+
+static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
+			  int ioc,  void *arg)
+{
+	int cpu, thread;
+
+	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+		for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+			int fd = FD(evsel, cpu, thread),
+			    err = ioctl(fd, ioc, arg);
+
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__run_ioctl(evsel,
+				     PERF_EVENT_IOC_SET_FILTER,
+				     (void *)filter);
+}
+
+int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
+{
+	char *new_filter = strdup(filter);
+
+	if (new_filter != NULL) {
+		free(evsel->filter);
+		evsel->filter = new_filter;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int perf_evsel__append_filter(struct perf_evsel *evsel,
+				     const char *fmt, const char *filter)
+{
+	char *new_filter;
+
+	if (evsel->filter == NULL)
+		return perf_evsel__set_filter(evsel, filter);
+
+	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
+		free(evsel->filter);
+		evsel->filter = new_filter;
+		return 0;
+	}
+
+	return -1;
+}
+
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__append_filter(evsel, "%s,%s", filter);
+}
+
+int perf_evsel__enable(struct perf_evsel *evsel)
+{
+	return perf_evsel__run_ioctl(evsel,
+				     PERF_EVENT_IOC_ENABLE,
+				     0);
+}
+
+int perf_evsel__disable(struct perf_evsel *evsel)
+{
+	return perf_evsel__run_ioctl(evsel,
+				     PERF_EVENT_IOC_DISABLE,
+				     0);
+}
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	if (ncpus == 0 || nthreads == 0)
+		return 0;
+
+	if (evsel->system_wide)
+		nthreads = 1;
+
+	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+	if (evsel->sample_id == NULL)
+		return -ENOMEM;
+
+	evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+	if (evsel->id == NULL) {
+		xyarray__delete(evsel->sample_id);
+		evsel->sample_id = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void perf_evsel__free_fd(struct perf_evsel *evsel)
+{
+	xyarray__delete(evsel->fd);
+	evsel->fd = NULL;
+}
+
+static void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+	xyarray__delete(evsel->sample_id);
+	evsel->sample_id = NULL;
+	zfree(&evsel->id);
+}
+
+static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
+{
+	struct perf_evsel_config_term *term, *h;
+
+	list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
+		list_del(&term->list);
+		free(term);
+	}
+}
+
+void perf_evsel__close_fd(struct perf_evsel *evsel)
+{
+	int cpu, thread;
+
+	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
+		for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+			close(FD(evsel, cpu, thread));
+			FD(evsel, cpu, thread) = -1;
+		}
+}
+
+void perf_evsel__exit(struct perf_evsel *evsel)
+{
+	assert(list_empty(&evsel->node));
+	assert(evsel->evlist == NULL);
+	perf_evsel__free_fd(evsel);
+	perf_evsel__free_id(evsel);
+	perf_evsel__free_config_terms(evsel);
+	cgroup__put(evsel->cgrp);
+	cpu_map__put(evsel->cpus);
+	cpu_map__put(evsel->own_cpus);
+	thread_map__put(evsel->threads);
+	zfree(&evsel->group_name);
+	zfree(&evsel->name);
+	perf_evsel__object.fini(evsel);
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+	perf_evsel__exit(evsel);
+	free(evsel);
+}
+
+void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
+				struct perf_counts_values *count)
+{
+	struct perf_counts_values tmp;
+
+	if (!evsel->prev_raw_counts)
+		return;
+
+	if (cpu == -1) {
+		tmp = evsel->prev_raw_counts->aggr;
+		evsel->prev_raw_counts->aggr = *count;
+	} else {
+		tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
+		*perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
+	}
+
+	count->val = count->val - tmp.val;
+	count->ena = count->ena - tmp.ena;
+	count->run = count->run - tmp.run;
+}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+			       bool scale, s8 *pscaled)
+{
+	s8 scaled = 0;
+
+	if (scale) {
+		if (count->run == 0) {
+			scaled = -1;
+			count->val = 0;
+		} else if (count->run < count->ena) {
+			scaled = 1;
+			count->val = (u64)((double) count->val * count->ena / count->run + 0.5);
+		}
+	} else
+		count->ena = count->run = 0;
+
+	if (pscaled)
+		*pscaled = scaled;
+}
+
+static int perf_evsel__read_size(struct perf_evsel *evsel)
+{
+	u64 read_format = evsel->attr.read_format;
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		nr = evsel->nr_members;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+	return size;
+}
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+		     struct perf_counts_values *count)
+{
+	size_t size = perf_evsel__read_size(evsel);
+
+	memset(count, 0, sizeof(*count));
+
+	if (FD(evsel, cpu, thread) < 0)
+		return -EINVAL;
+
+	if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
+		return -errno;
+
+	return 0;
+}
+
+static int
+perf_evsel__read_one(struct perf_evsel *evsel, int cpu, int thread)
+{
+	struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
+
+	return perf_evsel__read(evsel, cpu, thread, count);
+}
+
+static void
+perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
+		      u64 val, u64 ena, u64 run)
+{
+	struct perf_counts_values *count;
+
+	count = perf_counts(counter->counts, cpu, thread);
+
+	count->val    = val;
+	count->ena    = ena;
+	count->run    = run;
+	count->loaded = true;
+}
+
+static int
+perf_evsel__process_group_data(struct perf_evsel *leader,
+			       int cpu, int thread, u64 *data)
+{
+	u64 read_format = leader->attr.read_format;
+	struct sample_read_value *v;
+	u64 nr, ena = 0, run = 0, i;
+
+	nr = *data++;
+
+	if (nr != (u64) leader->nr_members)
+		return -EINVAL;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		ena = *data++;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		run = *data++;
+
+	v = (struct sample_read_value *) data;
+
+	perf_evsel__set_count(leader, cpu, thread,
+			      v[0].value, ena, run);
+
+	for (i = 1; i < nr; i++) {
+		struct perf_evsel *counter;
+
+		counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
+		if (!counter)
+			return -EINVAL;
+
+		perf_evsel__set_count(counter, cpu, thread,
+				      v[i].value, ena, run);
+	}
+
+	return 0;
+}
+
+static int
+perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread)
+{
+	struct perf_stat_evsel *ps = leader->stats;
+	u64 read_format = leader->attr.read_format;
+	int size = perf_evsel__read_size(leader);
+	u64 *data = ps->group_data;
+
+	if (!(read_format & PERF_FORMAT_ID))
+		return -EINVAL;
+
+	if (!perf_evsel__is_group_leader(leader))
+		return -EINVAL;
+
+	if (!data) {
+		data = zalloc(size);
+		if (!data)
+			return -ENOMEM;
+
+		ps->group_data = data;
+	}
+
+	if (FD(leader, cpu, thread) < 0)
+		return -EINVAL;
+
+	if (readn(FD(leader, cpu, thread), data, size) <= 0)
+		return -errno;
+
+	return perf_evsel__process_group_data(leader, cpu, thread, data);
+}
+
+int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread)
+{
+	u64 read_format = evsel->attr.read_format;
+
+	if (read_format & PERF_FORMAT_GROUP)
+		return perf_evsel__read_group(evsel, cpu, thread);
+	else
+		return perf_evsel__read_one(evsel, cpu, thread);
+}
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+			      int cpu, int thread, bool scale)
+{
+	struct perf_counts_values count;
+	size_t nv = scale ? 3 : 1;
+
+	if (FD(evsel, cpu, thread) < 0)
+		return -EINVAL;
+
+	if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+		return -ENOMEM;
+
+	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
+		return -errno;
+
+	perf_evsel__compute_deltas(evsel, cpu, thread, &count);
+	perf_counts_values__scale(&count, scale, NULL);
+	*perf_counts(evsel->counts, cpu, thread) = count;
+	return 0;
+}
+
+static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
+{
+	struct perf_evsel *leader = evsel->leader;
+	int fd;
+
+	if (perf_evsel__is_group_leader(evsel))
+		return -1;
+
+	/*
+	 * Leader must be already processed/open,
+	 * if not it's a bug.
+	 */
+	BUG_ON(!leader->fd);
+
+	fd = FD(leader, cpu, thread);
+	BUG_ON(fd == -1);
+
+	return fd;
+}
+
+struct bit_names {
+	int bit;
+	const char *name;
+};
+
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
+{
+	bool first_bit = true;
+	int i = 0;
+
+	do {
+		if (value & bits[i].bit) {
+			buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+			first_bit = false;
+		}
+	} while (bits[++i].name != NULL);
+}
+
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+		bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+		bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+		bit_name(WEIGHT), bit_name(PHYS_ADDR),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+		bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+		bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+		bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+		bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+		bit_name(ID), bit_name(GROUP),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
+#define BUF_SIZE		1024
+
+#define p_hex(val)		snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
+#define p_unsigned(val)		snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val)		snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val)	__p_read_format(buf, BUF_SIZE, val)
+
+#define PRINT_ATTRn(_n, _f, _p)				\
+do {							\
+	if (attr->_f) {					\
+		_p(attr->_f);				\
+		ret += attr__fprintf(fp, _n, buf, priv);\
+	}						\
+} while (0)
+
+#define PRINT_ATTRf(_f, _p)	PRINT_ATTRn(#_f, _f, _p)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+			     attr__fprintf_f attr__fprintf, void *priv)
+{
+	char buf[BUF_SIZE];
+	int ret = 0;
+
+	PRINT_ATTRf(type, p_unsigned);
+	PRINT_ATTRf(size, p_unsigned);
+	PRINT_ATTRf(config, p_hex);
+	PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+	PRINT_ATTRf(sample_type, p_sample_type);
+	PRINT_ATTRf(read_format, p_read_format);
+
+	PRINT_ATTRf(disabled, p_unsigned);
+	PRINT_ATTRf(inherit, p_unsigned);
+	PRINT_ATTRf(pinned, p_unsigned);
+	PRINT_ATTRf(exclusive, p_unsigned);
+	PRINT_ATTRf(exclude_user, p_unsigned);
+	PRINT_ATTRf(exclude_kernel, p_unsigned);
+	PRINT_ATTRf(exclude_hv, p_unsigned);
+	PRINT_ATTRf(exclude_idle, p_unsigned);
+	PRINT_ATTRf(mmap, p_unsigned);
+	PRINT_ATTRf(comm, p_unsigned);
+	PRINT_ATTRf(freq, p_unsigned);
+	PRINT_ATTRf(inherit_stat, p_unsigned);
+	PRINT_ATTRf(enable_on_exec, p_unsigned);
+	PRINT_ATTRf(task, p_unsigned);
+	PRINT_ATTRf(watermark, p_unsigned);
+	PRINT_ATTRf(precise_ip, p_unsigned);
+	PRINT_ATTRf(mmap_data, p_unsigned);
+	PRINT_ATTRf(sample_id_all, p_unsigned);
+	PRINT_ATTRf(exclude_host, p_unsigned);
+	PRINT_ATTRf(exclude_guest, p_unsigned);
+	PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+	PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+	PRINT_ATTRf(mmap2, p_unsigned);
+	PRINT_ATTRf(comm_exec, p_unsigned);
+	PRINT_ATTRf(use_clockid, p_unsigned);
+	PRINT_ATTRf(context_switch, p_unsigned);
+	PRINT_ATTRf(write_backward, p_unsigned);
+	PRINT_ATTRf(namespaces, p_unsigned);
+
+	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+	PRINT_ATTRf(bp_type, p_unsigned);
+	PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
+	PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+	PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
+	PRINT_ATTRf(sample_regs_user, p_hex);
+	PRINT_ATTRf(sample_stack_user, p_unsigned);
+	PRINT_ATTRf(clockid, p_signed);
+	PRINT_ATTRf(sample_regs_intr, p_hex);
+	PRINT_ATTRf(aux_watermark, p_unsigned);
+	PRINT_ATTRf(sample_max_stack, p_unsigned);
+
+	return ret;
+}
+
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+				void *priv __maybe_unused)
+{
+	return fprintf(fp, "  %-32s %s\n", name, val);
+}
+
+static void perf_evsel__remove_fd(struct perf_evsel *pos,
+				  int nr_cpus, int nr_threads,
+				  int thread_idx)
+{
+	for (int cpu = 0; cpu < nr_cpus; cpu++)
+		for (int thread = thread_idx; thread < nr_threads - 1; thread++)
+			FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
+}
+
+static int update_fds(struct perf_evsel *evsel,
+		      int nr_cpus, int cpu_idx,
+		      int nr_threads, int thread_idx)
+{
+	struct perf_evsel *pos;
+
+	if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
+		return -EINVAL;
+
+	evlist__for_each_entry(evsel->evlist, pos) {
+		nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
+
+		perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
+
+		/*
+		 * Since fds for next evsel has not been created,
+		 * there is no need to iterate whole event list.
+		 */
+		if (pos == evsel)
+			break;
+	}
+	return 0;
+}
+
+static bool ignore_missing_thread(struct perf_evsel *evsel,
+				  int nr_cpus, int cpu,
+				  struct thread_map *threads,
+				  int thread, int err)
+{
+	pid_t ignore_pid = thread_map__pid(threads, thread);
+
+	if (!evsel->ignore_missing_thread)
+		return false;
+
+	/* The system wide setup does not work with threads. */
+	if (evsel->system_wide)
+		return false;
+
+	/* The -ESRCH is perf event syscall errno for pid's not found. */
+	if (err != -ESRCH)
+		return false;
+
+	/* If there's only one thread, let it fail. */
+	if (threads->nr == 1)
+		return false;
+
+	/*
+	 * We should remove fd for missing_thread first
+	 * because thread_map__remove() will decrease threads->nr.
+	 */
+	if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
+		return false;
+
+	if (thread_map__remove(threads, thread))
+		return false;
+
+	pr_warning("WARNING: Ignored open failure for pid %d\n",
+		   ignore_pid);
+	return true;
+}
+
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+		     struct thread_map *threads)
+{
+	int cpu, thread, nthreads;
+	unsigned long flags = PERF_FLAG_FD_CLOEXEC;
+	int pid = -1, err;
+	enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
+
+	if (perf_missing_features.write_backward && evsel->attr.write_backward)
+		return -EINVAL;
+
+	if (cpus == NULL) {
+		static struct cpu_map *empty_cpu_map;
+
+		if (empty_cpu_map == NULL) {
+			empty_cpu_map = cpu_map__dummy_new();
+			if (empty_cpu_map == NULL)
+				return -ENOMEM;
+		}
+
+		cpus = empty_cpu_map;
+	}
+
+	if (threads == NULL) {
+		static struct thread_map *empty_thread_map;
+
+		if (empty_thread_map == NULL) {
+			empty_thread_map = thread_map__new_by_tid(-1);
+			if (empty_thread_map == NULL)
+				return -ENOMEM;
+		}
+
+		threads = empty_thread_map;
+	}
+
+	if (evsel->system_wide)
+		nthreads = 1;
+	else
+		nthreads = threads->nr;
+
+	if (evsel->fd == NULL &&
+	    perf_evsel__alloc_fd(evsel, cpus->nr, nthreads) < 0)
+		return -ENOMEM;
+
+	if (evsel->cgrp) {
+		flags |= PERF_FLAG_PID_CGROUP;
+		pid = evsel->cgrp->fd;
+	}
+
+fallback_missing_features:
+	if (perf_missing_features.clockid_wrong)
+		evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */
+	if (perf_missing_features.clockid) {
+		evsel->attr.use_clockid = 0;
+		evsel->attr.clockid = 0;
+	}
+	if (perf_missing_features.cloexec)
+		flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
+	if (perf_missing_features.mmap2)
+		evsel->attr.mmap2 = 0;
+	if (perf_missing_features.exclude_guest)
+		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+	if (perf_missing_features.lbr_flags)
+		evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+				     PERF_SAMPLE_BRANCH_NO_CYCLES);
+	if (perf_missing_features.group_read && evsel->attr.inherit)
+		evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
+retry_sample_id:
+	if (perf_missing_features.sample_id_all)
+		evsel->attr.sample_id_all = 0;
+
+	if (verbose >= 2) {
+		fprintf(stderr, "%.60s\n", graph_dotted_line);
+		fprintf(stderr, "perf_event_attr:\n");
+		perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
+		fprintf(stderr, "%.60s\n", graph_dotted_line);
+	}
+
+	for (cpu = 0; cpu < cpus->nr; cpu++) {
+
+		for (thread = 0; thread < nthreads; thread++) {
+			int fd, group_fd;
+
+			if (!evsel->cgrp && !evsel->system_wide)
+				pid = thread_map__pid(threads, thread);
+
+			group_fd = get_group_fd(evsel, cpu, thread);
+retry_open:
+			pr_debug2("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
+				  pid, cpus->map[cpu], group_fd, flags);
+
+			test_attr__ready();
+
+			fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
+						 group_fd, flags);
+
+			FD(evsel, cpu, thread) = fd;
+
+			if (fd < 0) {
+				err = -errno;
+
+				if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+					/*
+					 * We just removed 1 thread, so take a step
+					 * back on thread index and lower the upper
+					 * nthreads limit.
+					 */
+					nthreads--;
+					thread--;
+
+					/* ... and pretend like nothing have happened. */
+					err = 0;
+					continue;
+				}
+
+				pr_debug2("\nsys_perf_event_open failed, error %d\n",
+					  err);
+				goto try_fallback;
+			}
+
+			pr_debug2(" = %d\n", fd);
+
+			if (evsel->bpf_fd >= 0) {
+				int evt_fd = fd;
+				int bpf_fd = evsel->bpf_fd;
+
+				err = ioctl(evt_fd,
+					    PERF_EVENT_IOC_SET_BPF,
+					    bpf_fd);
+				if (err && errno != EEXIST) {
+					pr_err("failed to attach bpf fd %d: %s\n",
+					       bpf_fd, strerror(errno));
+					err = -EINVAL;
+					goto out_close;
+				}
+			}
+
+			set_rlimit = NO_CHANGE;
+
+			/*
+			 * If we succeeded but had to kill clockid, fail and
+			 * have perf_evsel__open_strerror() print us a nice
+			 * error.
+			 */
+			if (perf_missing_features.clockid ||
+			    perf_missing_features.clockid_wrong) {
+				err = -EINVAL;
+				goto out_close;
+			}
+		}
+	}
+
+	return 0;
+
+try_fallback:
+	/*
+	 * perf stat needs between 5 and 22 fds per CPU. When we run out
+	 * of them try to increase the limits.
+	 */
+	if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
+		struct rlimit l;
+		int old_errno = errno;
+
+		if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+			if (set_rlimit == NO_CHANGE)
+				l.rlim_cur = l.rlim_max;
+			else {
+				l.rlim_cur = l.rlim_max + 1000;
+				l.rlim_max = l.rlim_cur;
+			}
+			if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+				set_rlimit++;
+				errno = old_errno;
+				goto retry_open;
+			}
+		}
+		errno = old_errno;
+	}
+
+	if (err != -EINVAL || cpu > 0 || thread > 0)
+		goto out_close;
+
+	/*
+	 * Must probe features in the order they were added to the
+	 * perf_event_attr interface.
+	 */
+	if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+		perf_missing_features.write_backward = true;
+		pr_debug2("switching off write_backward\n");
+		goto out_close;
+	} else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
+		perf_missing_features.clockid_wrong = true;
+		pr_debug2("switching off clockid\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
+		perf_missing_features.clockid = true;
+		pr_debug2("switching off use_clockid\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
+		perf_missing_features.cloexec = true;
+		pr_debug2("switching off cloexec flag\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
+		perf_missing_features.mmap2 = true;
+		pr_debug2("switching off mmap2\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.exclude_guest &&
+		   (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
+		perf_missing_features.exclude_guest = true;
+		pr_debug2("switching off exclude_guest, exclude_host\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.sample_id_all) {
+		perf_missing_features.sample_id_all = true;
+		pr_debug2("switching off sample_id_all\n");
+		goto retry_sample_id;
+	} else if (!perf_missing_features.lbr_flags &&
+			(evsel->attr.branch_sample_type &
+			 (PERF_SAMPLE_BRANCH_NO_CYCLES |
+			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+		perf_missing_features.lbr_flags = true;
+		pr_debug2("switching off branch sample type no (cycles/flags)\n");
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.group_read &&
+		    evsel->attr.inherit &&
+		   (evsel->attr.read_format & PERF_FORMAT_GROUP) &&
+		   perf_evsel__is_group_leader(evsel)) {
+		perf_missing_features.group_read = true;
+		pr_debug2("switching off group read\n");
+		goto fallback_missing_features;
+	}
+out_close:
+	if (err)
+		threads->err_thread = thread;
+
+	do {
+		while (--thread >= 0) {
+			close(FD(evsel, cpu, thread));
+			FD(evsel, cpu, thread) = -1;
+		}
+		thread = nthreads;
+	} while (--cpu >= 0);
+	return err;
+}
+
+void perf_evsel__close(struct perf_evsel *evsel)
+{
+	if (evsel->fd == NULL)
+		return;
+
+	perf_evsel__close_fd(evsel);
+	perf_evsel__free_fd(evsel);
+}
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+			     struct cpu_map *cpus)
+{
+	return perf_evsel__open(evsel, cpus, NULL);
+}
+
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+				struct thread_map *threads)
+{
+	return perf_evsel__open(evsel, NULL, threads);
+}
+
+static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
+				       const union perf_event *event,
+				       struct perf_sample *sample)
+{
+	u64 type = evsel->attr.sample_type;
+	const u64 *array = event->sample.array;
+	bool swapped = evsel->needs_swap;
+	union u64_swap u;
+
+	array += ((event->header.size -
+		   sizeof(event->header)) / sizeof(u64)) - 1;
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		sample->id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+		}
+
+		sample->cpu = u.val32[0];
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		sample->stream_id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		sample->id = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		sample->time = *array;
+		array--;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+		}
+
+		sample->pid = u.val32[0];
+		sample->tid = u.val32[1];
+		array--;
+	}
+
+	return 0;
+}
+
+static inline bool overflow(const void *endp, u16 max_size, const void *offset,
+			    u64 size)
+{
+	return size > max_size || offset + size > endp;
+}
+
+#define OVERFLOW_CHECK(offset, size, max_size)				\
+	do {								\
+		if (overflow(endp, (max_size), (offset), (size)))	\
+			return -EFAULT;					\
+	} while (0)
+
+#define OVERFLOW_CHECK_u64(offset) \
+	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
+
+static int
+perf_event__check_size(union perf_event *event, unsigned int sample_size)
+{
+	/*
+	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
+	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
+	 * check the format does not go past the end of the event.
+	 */
+	if (sample_size + sizeof(event->header) > event->header.size)
+		return -EFAULT;
+
+	return 0;
+}
+
+int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
+			     struct perf_sample *data)
+{
+	u64 type = evsel->attr.sample_type;
+	bool swapped = evsel->needs_swap;
+	const u64 *array;
+	u16 max_size = event->header.size;
+	const void *endp = (void *)event + max_size;
+	u64 sz;
+
+	/*
+	 * used for cross-endian analysis. See git commit 65014ab3
+	 * for why this goofiness is needed.
+	 */
+	union u64_swap u;
+
+	memset(data, 0, sizeof(*data));
+	data->cpu = data->pid = data->tid = -1;
+	data->stream_id = data->id = data->time = -1ULL;
+	data->period = evsel->attr.sample_period;
+	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	data->misc    = event->header.misc;
+	data->id = -1ULL;
+	data->data_src = PERF_MEM_DATA_SRC_NONE;
+
+	if (event->header.type != PERF_RECORD_SAMPLE) {
+		if (!evsel->attr.sample_id_all)
+			return 0;
+		return perf_evsel__parse_id_sample(evsel, event, data);
+	}
+
+	array = event->sample.array;
+
+	if (perf_event__check_size(event, evsel->sample_size))
+		return -EFAULT;
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		data->id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_IP) {
+		data->ip = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+		}
+
+		data->pid = u.val32[0];
+		data->tid = u.val32[1];
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		data->time = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ADDR) {
+		data->addr = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		data->id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+
+		u.val64 = *array;
+		if (swapped) {
+			/* undo swap of u64, then swap on individual u32s */
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+		}
+
+		data->cpu = u.val32[0];
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		data->period = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_READ) {
+		u64 read_format = evsel->attr.read_format;
+
+		OVERFLOW_CHECK_u64(array);
+		if (read_format & PERF_FORMAT_GROUP)
+			data->read.group.nr = *array;
+		else
+			data->read.one.value = *array;
+
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			OVERFLOW_CHECK_u64(array);
+			data->read.time_enabled = *array;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			OVERFLOW_CHECK_u64(array);
+			data->read.time_running = *array;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			const u64 max_group_nr = UINT64_MAX /
+					sizeof(struct sample_read_value);
+
+			if (data->read.group.nr > max_group_nr)
+				return -EFAULT;
+			sz = data->read.group.nr *
+			     sizeof(struct sample_read_value);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->read.group.values =
+					(struct sample_read_value *)array;
+			array = (void *)array + sz;
+		} else {
+			OVERFLOW_CHECK_u64(array);
+			data->read.one.id = *array;
+			array++;
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+		OVERFLOW_CHECK_u64(array);
+		data->callchain = (struct ip_callchain *)array++;
+		if (data->callchain->nr > max_callchain_nr)
+			return -EFAULT;
+		sz = data->callchain->nr * sizeof(u64);
+		OVERFLOW_CHECK(array, sz, max_size);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		OVERFLOW_CHECK_u64(array);
+		u.val64 = *array;
+
+		/*
+		 * Undo swap of u64, then swap on individual u32s,
+		 * get the size of the raw area and undo all of the
+		 * swap. The pevent interface handles endianity by
+		 * itself.
+		 */
+		if (swapped) {
+			u.val64 = bswap_64(u.val64);
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+		}
+		data->raw_size = u.val32[0];
+
+		/*
+		 * The raw data is aligned on 64bits including the
+		 * u32 size, so it's safe to use mem_bswap_64.
+		 */
+		if (swapped)
+			mem_bswap_64((void *) array, data->raw_size);
+
+		array = (void *)array + sizeof(u32);
+
+		OVERFLOW_CHECK(array, data->raw_size, max_size);
+		data->raw_data = (void *)array;
+		array = (void *)array + data->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		const u64 max_branch_nr = UINT64_MAX /
+					  sizeof(struct branch_entry);
+
+		OVERFLOW_CHECK_u64(array);
+		data->branch_stack = (struct branch_stack *)array++;
+
+		if (data->branch_stack->nr > max_branch_nr)
+			return -EFAULT;
+		sz = data->branch_stack->nr * sizeof(struct branch_entry);
+		OVERFLOW_CHECK(array, sz, max_size);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		OVERFLOW_CHECK_u64(array);
+		data->user_regs.abi = *array;
+		array++;
+
+		if (data->user_regs.abi) {
+			u64 mask = evsel->attr.sample_regs_user;
+
+			sz = hweight_long(mask) * sizeof(u64);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->user_regs.mask = mask;
+			data->user_regs.regs = (u64 *)array;
+			array = (void *)array + sz;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		OVERFLOW_CHECK_u64(array);
+		sz = *array++;
+
+		data->user_stack.offset = ((char *)(array - 1)
+					  - (char *) event);
+
+		if (!sz) {
+			data->user_stack.size = 0;
+		} else {
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->user_stack.data = (char *)array;
+			array = (void *)array + sz;
+			OVERFLOW_CHECK_u64(array);
+			data->user_stack.size = *array++;
+			if (WARN_ONCE(data->user_stack.size > sz,
+				      "user stack dump failure\n"))
+				return -EFAULT;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT) {
+		OVERFLOW_CHECK_u64(array);
+		data->weight = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		OVERFLOW_CHECK_u64(array);
+		data->data_src = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		OVERFLOW_CHECK_u64(array);
+		data->transaction = *array;
+		array++;
+	}
+
+	data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		OVERFLOW_CHECK_u64(array);
+		data->intr_regs.abi = *array;
+		array++;
+
+		if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+			u64 mask = evsel->attr.sample_regs_intr;
+
+			sz = hweight_long(mask) * sizeof(u64);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->intr_regs.mask = mask;
+			data->intr_regs.regs = (u64 *)array;
+			array = (void *)array + sz;
+		}
+	}
+
+	data->phys_addr = 0;
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		data->phys_addr = *array;
+		array++;
+	}
+
+	return 0;
+}
+
+int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
+				       union perf_event *event,
+				       u64 *timestamp)
+{
+	u64 type = evsel->attr.sample_type;
+	const u64 *array;
+
+	if (!(type & PERF_SAMPLE_TIME))
+		return -1;
+
+	if (event->header.type != PERF_RECORD_SAMPLE) {
+		struct perf_sample data = {
+			.time = -1ULL,
+		};
+
+		if (!evsel->attr.sample_id_all)
+			return -1;
+		if (perf_evsel__parse_id_sample(evsel, event, &data))
+			return -1;
+
+		*timestamp = data.time;
+		return 0;
+	}
+
+	array = event->sample.array;
+
+	if (perf_event__check_size(event, evsel->sample_size))
+		return -EFAULT;
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		array++;
+
+	if (type & PERF_SAMPLE_IP)
+		array++;
+
+	if (type & PERF_SAMPLE_TID)
+		array++;
+
+	if (type & PERF_SAMPLE_TIME)
+		*timestamp = *array;
+
+	return 0;
+}
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+				     u64 read_format)
+{
+	size_t sz, result = sizeof(struct sample_event);
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_IP)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TIME)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_ADDR)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_ID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_CPU)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_PERIOD)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_READ) {
+		result += sizeof(u64);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+			result += sizeof(u64);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+			result += sizeof(u64);
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			sz = sample->read.group.nr *
+			     sizeof(struct sample_read_value);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		sz = (sample->callchain->nr + 1) * sizeof(u64);
+		result += sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		result += sizeof(u32);
+		result += sample->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+		sz += sizeof(u64);
+		result += sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		if (sample->user_regs.abi) {
+			result += sizeof(u64);
+			sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		sz = sample->user_stack.size;
+		result += sizeof(u64);
+		if (sz) {
+			result += sz;
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TRANSACTION)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		if (sample->intr_regs.abi) {
+			result += sizeof(u64);
+			sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		result += sizeof(u64);
+
+	return result;
+}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type,
+				  u64 read_format,
+				  const struct perf_sample *sample)
+{
+	u64 *array;
+	size_t sz;
+	/*
+	 * used for cross-endian analysis. See git commit 65014ab3
+	 * for why this goofiness is needed.
+	 */
+	union u64_swap u;
+
+	array = event->sample.array;
+
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		*array = sample->id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_IP) {
+		*array = sample->ip;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u.val32[0] = sample->pid;
+		u.val32[1] = sample->tid;
+		*array = u.val64;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		*array = sample->time;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ADDR) {
+		*array = sample->addr;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		*array = sample->id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		*array = sample->stream_id;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		u.val32[0] = sample->cpu;
+		u.val32[1] = 0;
+		*array = u.val64;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		*array = sample->period;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_READ) {
+		if (read_format & PERF_FORMAT_GROUP)
+			*array = sample->read.group.nr;
+		else
+			*array = sample->read.one.value;
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			*array = sample->read.time_enabled;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			*array = sample->read.time_running;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			sz = sample->read.group.nr *
+			     sizeof(struct sample_read_value);
+			memcpy(array, sample->read.group.values, sz);
+			array = (void *)array + sz;
+		} else {
+			*array = sample->read.one.id;
+			array++;
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		sz = (sample->callchain->nr + 1) * sizeof(u64);
+		memcpy(array, sample->callchain, sz);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		u.val32[0] = sample->raw_size;
+		*array = u.val64;
+		array = (void *)array + sizeof(u32);
+
+		memcpy(array, sample->raw_data, sample->raw_size);
+		array = (void *)array + sample->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+		sz += sizeof(u64);
+		memcpy(array, sample->branch_stack, sz);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		if (sample->user_regs.abi) {
+			*array++ = sample->user_regs.abi;
+			sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+			memcpy(array, sample->user_regs.regs, sz);
+			array = (void *)array + sz;
+		} else {
+			*array++ = 0;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		sz = sample->user_stack.size;
+		*array++ = sz;
+		if (sz) {
+			memcpy(array, sample->user_stack.data, sz);
+			array = (void *)array + sz;
+			*array++ = sz;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT) {
+		*array = sample->weight;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		*array = sample->data_src;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		*array = sample->transaction;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_REGS_INTR) {
+		if (sample->intr_regs.abi) {
+			*array++ = sample->intr_regs.abi;
+			sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+			memcpy(array, sample->intr_regs.regs, sz);
+			array = (void *)array + sz;
+		} else {
+			*array++ = 0;
+		}
+	}
+
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		*array = sample->phys_addr;
+		array++;
+	}
+
+	return 0;
+}
+
+struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
+{
+	return pevent_find_field(evsel->tp_format, name);
+}
+
+void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
+			 const char *name)
+{
+	struct format_field *field = perf_evsel__field(evsel, name);
+	int offset;
+
+	if (!field)
+		return NULL;
+
+	offset = field->offset;
+
+	if (field->flags & FIELD_IS_DYNAMIC) {
+		offset = *(int *)(sample->raw_data + field->offset);
+		offset &= 0xffff;
+	}
+
+	return sample->raw_data + offset;
+}
+
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+			 bool needs_swap)
+{
+	u64 value;
+	void *ptr = sample->raw_data + field->offset;
+
+	switch (field->size) {
+	case 1:
+		return *(u8 *)ptr;
+	case 2:
+		value = *(u16 *)ptr;
+		break;
+	case 4:
+		value = *(u32 *)ptr;
+		break;
+	case 8:
+		memcpy(&value, ptr, sizeof(u64));
+		break;
+	default:
+		return 0;
+	}
+
+	if (!needs_swap)
+		return value;
+
+	switch (field->size) {
+	case 2:
+		return bswap_16(value);
+	case 4:
+		return bswap_32(value);
+	case 8:
+		return bswap_64(value);
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+		       const char *name)
+{
+	struct format_field *field = perf_evsel__field(evsel, name);
+
+	if (!field)
+		return 0;
+
+	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
+bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
+			  char *msg, size_t msgsize)
+{
+	int paranoid;
+
+	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
+	    evsel->attr.type   == PERF_TYPE_HARDWARE &&
+	    evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+		/*
+		 * If it's cycles then fall back to hrtimer based
+		 * cpu-clock-tick sw counter, which is always available even if
+		 * no PMU support.
+		 *
+		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
+		 * b0a873e).
+		 */
+		scnprintf(msg, msgsize, "%s",
+"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
+
+		evsel->attr.type   = PERF_TYPE_SOFTWARE;
+		evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
+
+		zfree(&evsel->name);
+		return true;
+	} else if (err == EACCES && !evsel->attr.exclude_kernel &&
+		   (paranoid = perf_event_paranoid()) > 1) {
+		const char *name = perf_evsel__name(evsel);
+		char *new_name;
+		const char *sep = ":";
+
+		/* Is there already the separator in the name. */
+		if (strchr(name, '/') ||
+		    strchr(name, ':'))
+			sep = "";
+
+		if (asprintf(&new_name, "%s%su", name, sep) < 0)
+			return false;
+
+		if (evsel->name)
+			free(evsel->name);
+		evsel->name = new_name;
+		scnprintf(msg, msgsize,
+"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid);
+		evsel->attr.exclude_kernel = 1;
+
+		return true;
+	}
+
+	return false;
+}
+
+static bool find_process(const char *name)
+{
+	size_t len = strlen(name);
+	DIR *dir;
+	struct dirent *d;
+	int ret = -1;
+
+	dir = opendir(procfs__mountpoint());
+	if (!dir)
+		return false;
+
+	/* Walk through the directory. */
+	while (ret && (d = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		char *data;
+		size_t size;
+
+		if ((d->d_type != DT_DIR) ||
+		     !strcmp(".", d->d_name) ||
+		     !strcmp("..", d->d_name))
+			continue;
+
+		scnprintf(path, sizeof(path), "%s/%s/comm",
+			  procfs__mountpoint(), d->d_name);
+
+		if (filename__read_str(path, &data, &size))
+			continue;
+
+		ret = strncmp(name, data, len);
+		free(data);
+	}
+
+	closedir(dir);
+	return ret ? false : true;
+}
+
+int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
+			      int err, char *msg, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE];
+	int printed = 0;
+
+	switch (err) {
+	case EPERM:
+	case EACCES:
+		if (err == EPERM)
+			printed = scnprintf(msg, size,
+				"No permission to enable %s event.\n\n",
+				perf_evsel__name(evsel));
+
+		return scnprintf(msg + printed, size - printed,
+		 "You may not have permission to collect %sstats.\n\n"
+		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
+		 "which controls use of the performance events system by\n"
+		 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+		 "The current value is %d:\n\n"
+		 "  -1: Allow use of (almost) all events by all users\n"
+		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
+		 ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
+		 "      Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
+		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
+		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
+		 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
+		 "	kernel.perf_event_paranoid = -1\n" ,
+				 target->system_wide ? "system-wide " : "",
+				 perf_event_paranoid());
+	case ENOENT:
+		return scnprintf(msg, size, "The %s event is not supported.",
+				 perf_evsel__name(evsel));
+	case EMFILE:
+		return scnprintf(msg, size, "%s",
+			 "Too many events are opened.\n"
+			 "Probably the maximum number of open file descriptors has been reached.\n"
+			 "Hint: Try again after reducing the number of events.\n"
+			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
+	case ENOMEM:
+		if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 &&
+		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+			return scnprintf(msg, size,
+					 "Not enough memory to setup event with callchain.\n"
+					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+					 "Hint: Current value: %d", sysctl_perf_event_max_stack);
+		break;
+	case ENODEV:
+		if (target->cpu_list)
+			return scnprintf(msg, size, "%s",
+	 "No such device - did you specify an out-of-range profile CPU?");
+		break;
+	case EOPNOTSUPP:
+		if (evsel->attr.sample_period != 0)
+			return scnprintf(msg, size,
+	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
+					 perf_evsel__name(evsel));
+		if (evsel->attr.precise_ip)
+			return scnprintf(msg, size, "%s",
+	"\'precise\' request may not be supported. Try removing 'p' modifier.");
+#if defined(__i386__) || defined(__x86_64__)
+		if (evsel->attr.type == PERF_TYPE_HARDWARE)
+			return scnprintf(msg, size, "%s",
+	"No hardware sampling interrupt available.\n");
+#endif
+		break;
+	case EBUSY:
+		if (find_process("oprofiled"))
+			return scnprintf(msg, size,
+	"The PMU counters are busy/taken by another profiler.\n"
+	"We found oprofile daemon running, please stop it and try again.");
+		break;
+	case EINVAL:
+		if (evsel->attr.write_backward && perf_missing_features.write_backward)
+			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
+		if (perf_missing_features.clockid)
+			return scnprintf(msg, size, "clockid feature not supported.");
+		if (perf_missing_features.clockid_wrong)
+			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+		break;
+	default:
+		break;
+	}
+
+	return scnprintf(msg, size,
+	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
+	"/bin/dmesg | grep -i perf may provide additional information.\n",
+			 err, str_error_r(err, sbuf, sizeof(sbuf)),
+			 perf_evsel__name(evsel));
+}
+
+struct perf_env *perf_evsel__env(struct perf_evsel *evsel)
+{
+	if (evsel && evsel->evlist)
+		return evsel->evlist->env;
+	return NULL;
+}
diff --git a/util/evsel.h b/util/evsel.h
new file mode 100644
index 0000000..92ec009
--- /dev/null
+++ b/util/evsel.h
@@ -0,0 +1,468 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVSEL_H
+#define __PERF_EVSEL_H 1
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include "xyarray.h"
+#include "symbol.h"
+#include "cpumap.h"
+#include "counts.h"
+
+struct perf_evsel;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+	struct hlist_node 	node;
+	u64		 	id;
+	struct perf_evsel	*evsel;
+	int			idx;
+	int			cpu;
+	pid_t			tid;
+
+	/* Holds total ID period value for PERF_SAMPLE_READ processing. */
+	u64			period;
+};
+
+struct cgroup;
+
+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum term_type {
+	PERF_EVSEL__CONFIG_TERM_PERIOD,
+	PERF_EVSEL__CONFIG_TERM_FREQ,
+	PERF_EVSEL__CONFIG_TERM_TIME,
+	PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+	PERF_EVSEL__CONFIG_TERM_STACK_USER,
+	PERF_EVSEL__CONFIG_TERM_INHERIT,
+	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
+	PERF_EVSEL__CONFIG_TERM_BRANCH,
+};
+
+struct perf_evsel_config_term {
+	struct list_head	list;
+	enum term_type	type;
+	union {
+		u64	period;
+		u64	freq;
+		bool	time;
+		char	*callgraph;
+		char	*drv_cfg;
+		u64	stack_user;
+		int	max_stack;
+		bool	inherit;
+		bool	overwrite;
+		char	*branch;
+	} val;
+	bool weak;
+};
+
+struct perf_stat_evsel;
+
+/** struct perf_evsel - event selector
+ *
+ * @evlist - evlist this evsel is in, if it is in one.
+ * @node - To insert it into evlist->entries or in other list_heads, say in
+ *         the event parsing routines.
+ * @name - Can be set to retain the original event name passed by the user,
+ *         so that when showing results in tools such as 'perf stat', we
+ *         show the name used, not some alias.
+ * @id_pos: the position of the event id (PERF_SAMPLE_ID or
+ *          PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of
+ *          struct sample_event
+ * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
+ *          PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
+ *          is used there is an id sample appended to non-sample events
+ * @priv:   And what is in its containing unnamed union are tool specific
+ */
+struct perf_evsel {
+	struct list_head	node;
+	struct perf_evlist	*evlist;
+	struct perf_event_attr	attr;
+	char			*filter;
+	struct xyarray		*fd;
+	struct xyarray		*sample_id;
+	u64			*id;
+	struct perf_counts	*counts;
+	struct perf_counts	*prev_raw_counts;
+	int			idx;
+	u32			ids;
+	char			*name;
+	double			scale;
+	const char		*unit;
+	struct event_format	*tp_format;
+	off_t			id_offset;
+	struct perf_stat_evsel  *stats;
+	void			*priv;
+	u64			db_id;
+	struct cgroup		*cgrp;
+	void			*handler;
+	struct cpu_map		*cpus;
+	struct cpu_map		*own_cpus;
+	struct thread_map	*threads;
+	unsigned int		sample_size;
+	int			id_pos;
+	int			is_pos;
+	bool			uniquified_name;
+	bool			snapshot;
+	bool 			supported;
+	bool 			needs_swap;
+	bool			no_aux_samples;
+	bool			immediate;
+	bool			system_wide;
+	bool			tracking;
+	bool			per_pkg;
+	bool			precise_max;
+	bool			ignore_missing_thread;
+	bool			forced_leader;
+	/* parse modifier helper */
+	int			exclude_GH;
+	int			nr_members;
+	int			sample_read;
+	unsigned long		*per_pkg_mask;
+	struct perf_evsel	*leader;
+	char			*group_name;
+	bool			cmdline_group_boundary;
+	struct list_head	config_terms;
+	int			bpf_fd;
+	bool			auto_merge_stats;
+	bool			merged_stat;
+	const char *		metric_expr;
+	const char *		metric_name;
+	struct perf_evsel	**metric_events;
+	bool			collect_stat;
+	bool			weak_group;
+	const char		*pmu_name;
+};
+
+union u64_swap {
+	u64 val64;
+	u32 val32[2];
+};
+
+struct perf_missing_features {
+	bool sample_id_all;
+	bool exclude_guest;
+	bool mmap2;
+	bool cloexec;
+	bool clockid;
+	bool clockid_wrong;
+	bool lbr_flags;
+	bool write_backward;
+	bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
+struct cpu_map;
+struct target;
+struct thread_map;
+struct record_opts;
+
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+{
+	return evsel->cpus;
+}
+
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+{
+	return perf_evsel__cpus(evsel)->nr;
+}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+			       bool scale, s8 *pscaled);
+
+void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
+				struct perf_counts_values *count);
+
+int perf_evsel__object_config(size_t object_size,
+			      int (*init)(struct perf_evsel *evsel),
+			      void (*fini)(struct perf_evsel *evsel));
+
+struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
+
+static inline struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
+{
+	return perf_evsel__new_idx(attr, 0);
+}
+
+struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx);
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name)
+{
+	return perf_evsel__newtp_idx(sys, name, 0);
+}
+
+struct perf_evsel *perf_evsel__new_cycles(bool precise);
+
+struct event_format *event_format__new(const char *sys, const char *name);
+
+void perf_evsel__init(struct perf_evsel *evsel,
+		      struct perf_event_attr *attr, int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
+void perf_evsel__delete(struct perf_evsel *evsel);
+
+struct callchain_param;
+
+void perf_evsel__config(struct perf_evsel *evsel,
+			struct record_opts *opts,
+			struct callchain_param *callchain);
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+				  struct record_opts *opts,
+				  struct callchain_param *callchain);
+
+int __perf_evsel__sample_size(u64 sample_type);
+void perf_evsel__calc_id_pos(struct perf_evsel *evsel);
+
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
+
+#define PERF_EVSEL__MAX_ALIASES 8
+
+extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+				       [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+					      [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+					    char *bf, size_t size);
+const char *perf_evsel__name(struct perf_evsel *evsel);
+
+const char *perf_evsel__group_name(struct perf_evsel *evsel);
+int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__close_fd(struct perf_evsel *evsel);
+
+void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
+				  enum perf_event_sample_format bit);
+void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
+				    enum perf_event_sample_format bit);
+
+#define perf_evsel__set_sample_bit(evsel, bit) \
+	__perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define perf_evsel__reset_sample_bit(evsel, bit) \
+	__perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void perf_evsel__set_sample_id(struct perf_evsel *evsel,
+			       bool use_sample_identifier);
+
+int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel,
+				   const char *filter);
+int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__enable(struct perf_evsel *evsel);
+int perf_evsel__disable(struct perf_evsel *evsel);
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+			     struct cpu_map *cpus);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+				struct thread_map *threads);
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+		     struct thread_map *threads);
+void perf_evsel__close(struct perf_evsel *evsel);
+
+struct perf_sample;
+
+void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
+			 const char *name);
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+		       const char *name);
+
+static inline char *perf_evsel__strval(struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       const char *name)
+{
+	return perf_evsel__rawptr(evsel, sample, name);
+}
+
+struct format_field;
+
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
+struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
+
+#define perf_evsel__match(evsel, t, c)		\
+	(evsel->attr.type == PERF_TYPE_##t &&	\
+	 evsel->attr.config == PERF_COUNT_##c)
+
+static inline bool perf_evsel__match2(struct perf_evsel *e1,
+				      struct perf_evsel *e2)
+{
+	return (e1->attr.type == e2->attr.type) &&
+	       (e1->attr.config == e2->attr.config);
+}
+
+#define perf_evsel__cmp(a, b)			\
+	((a) &&					\
+	 (b) &&					\
+	 (a)->attr.type == (b)->attr.type &&	\
+	 (a)->attr.config == (b)->attr.config)
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+		     struct perf_counts_values *count);
+
+int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread);
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+			      int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+					  int cpu, int thread)
+{
+	return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+						 int cpu, int thread)
+{
+	return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
+			     struct perf_sample *sample);
+
+int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
+				       union perf_event *event,
+				       u64 *timestamp);
+
+static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
+{
+	return list_entry(evsel->node.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evsel__prev(struct perf_evsel *evsel)
+{
+	return list_entry(evsel->node.prev, struct perf_evsel, node);
+}
+
+/**
+ * perf_evsel__is_group_leader - Return whether given evsel is a leader event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if @evsel is a group leader or a stand-alone event
+ */
+static inline bool perf_evsel__is_group_leader(const struct perf_evsel *evsel)
+{
+	return evsel->leader == evsel;
+}
+
+/**
+ * perf_evsel__is_group_event - Return whether given evsel is a group event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true iff event group view is enabled and @evsel is a actual group
+ * leader which has other members in the group
+ */
+static inline bool perf_evsel__is_group_event(struct perf_evsel *evsel)
+{
+	if (!symbol_conf.event_group)
+		return false;
+
+	return perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1;
+}
+
+bool perf_evsel__is_function_event(struct perf_evsel *evsel);
+
+static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
+		(attr->type == PERF_TYPE_SOFTWARE);
+}
+
+struct perf_attr_details {
+	bool freq;
+	bool verbose;
+	bool event_group;
+	bool force;
+	bool trace_fields;
+};
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+			struct perf_attr_details *details, FILE *fp);
+
+#define EVSEL__PRINT_IP			(1<<0)
+#define EVSEL__PRINT_SYM		(1<<1)
+#define EVSEL__PRINT_DSO		(1<<2)
+#define EVSEL__PRINT_SYMOFFSET		(1<<3)
+#define EVSEL__PRINT_ONELINE		(1<<4)
+#define EVSEL__PRINT_SRCLINE		(1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR	(1<<6)
+#define EVSEL__PRINT_CALLCHAIN_ARROW	(1<<7)
+#define EVSEL__PRINT_SKIP_IGNORED	(1<<8)
+
+struct callchain_cursor;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+			      unsigned int print_opts,
+			      struct callchain_cursor *cursor, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+			int left_alignment, unsigned int print_opts,
+			struct callchain_cursor *cursor, FILE *fp);
+
+bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
+			  char *msg, size_t msgsize);
+int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
+			      int err, char *msg, size_t size);
+
+static inline int perf_evsel__group_idx(struct perf_evsel *evsel)
+{
+	return evsel->idx - evsel->leader->idx;
+}
+
+#define for_each_group_member(_evsel, _leader) 					\
+for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); 	\
+     (_evsel) && (_evsel)->leader == (_leader);					\
+     (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
+
+static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel)
+{
+	return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+			     attr__fprintf_f attr__fprintf, void *priv);
+
+struct perf_env *perf_evsel__env(struct perf_evsel *evsel);
+
+#endif /* __PERF_EVSEL_H */
diff --git a/util/evsel_fprintf.c b/util/evsel_fprintf.c
new file mode 100644
index 0000000..06dfb02
--- /dev/null
+++ b/util/evsel_fprintf.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "callchain.h"
+#include "map.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "srcline.h"
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (!*first) {
+		ret += fprintf(fp, ",");
+	} else {
+		ret += fprintf(fp, ":");
+		*first = false;
+	}
+
+	va_start(args, fmt);
+	ret += vfprintf(fp, fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+	return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+			struct perf_attr_details *details, FILE *fp)
+{
+	bool first = true;
+	int printed = 0;
+
+	if (details->event_group) {
+		struct perf_evsel *pos;
+
+		if (!perf_evsel__is_group_leader(evsel))
+			return 0;
+
+		if (evsel->nr_members > 1)
+			printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+		printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+		for_each_group_member(pos, evsel)
+			printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+
+		if (evsel->nr_members > 1)
+			printed += fprintf(fp, "}");
+		goto out;
+	}
+
+	printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+
+	if (details->verbose) {
+		printed += perf_event_attr__fprintf(fp, &evsel->attr,
+						    __print_attr__fprintf, &first);
+	} else if (details->freq) {
+		const char *term = "sample_freq";
+
+		if (!evsel->attr.freq)
+			term = "sample_period";
+
+		printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+					 term, (u64)evsel->attr.sample_freq);
+	}
+
+	if (details->trace_fields) {
+		struct format_field *field;
+
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+			goto out;
+		}
+
+		field = evsel->tp_format->format.fields;
+		if (field == NULL) {
+			printed += comma_fprintf(fp, &first, " (no trace field)");
+			goto out;
+		}
+
+		printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+		field = field->next;
+		while (field) {
+			printed += comma_fprintf(fp, &first, "%s", field->name);
+			field = field->next;
+		}
+	}
+out:
+	fputc('\n', fp);
+	return ++printed;
+}
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+			      unsigned int print_opts, struct callchain_cursor *cursor,
+			      FILE *fp)
+{
+	int printed = 0;
+	struct callchain_cursor_node *node;
+	int print_ip = print_opts & EVSEL__PRINT_IP;
+	int print_sym = print_opts & EVSEL__PRINT_SYM;
+	int print_dso = print_opts & EVSEL__PRINT_DSO;
+	int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+	int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+	int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
+	int print_skip_ignored = print_opts & EVSEL__PRINT_SKIP_IGNORED;
+	char s = print_oneline ? ' ' : '\t';
+	bool first = true;
+
+	if (sample->callchain) {
+		struct addr_location node_al;
+
+		callchain_cursor_commit(cursor);
+
+		while (1) {
+			u64 addr = 0;
+
+			node = callchain_cursor_current(cursor);
+			if (!node)
+				break;
+
+			if (node->sym && node->sym->ignore && print_skip_ignored)
+				goto next;
+
+			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+			if (print_arrow && !first)
+				printed += fprintf(fp, " <-");
+
+			if (print_ip)
+				printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+
+			if (node->map)
+				addr = node->map->map_ip(node->map, node->ip);
+
+			if (print_sym) {
+				printed += fprintf(fp, " ");
+				node_al.addr = addr;
+				node_al.map  = node->map;
+
+				if (print_symoffset) {
+					printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
+										  print_unknown_as_addr,
+										  true, fp);
+				} else {
+					printed += __symbol__fprintf_symname(node->sym, &node_al,
+									     print_unknown_as_addr, fp);
+				}
+			}
+
+			if (print_dso && (!node->sym || !node->sym->inlined)) {
+				printed += fprintf(fp, " (");
+				printed += map__fprintf_dsoname(node->map, fp);
+				printed += fprintf(fp, ")");
+			}
+
+			if (print_srcline)
+				printed += map__fprintf_srcline(node->map, addr, "\n  ", fp);
+
+			if (node->sym && node->sym->inlined)
+				printed += fprintf(fp, " (inlined)");
+
+			if (!print_oneline)
+				printed += fprintf(fp, "\n");
+
+			if (symbol_conf.bt_stop_list &&
+			    node->sym &&
+			    strlist__has_entry(symbol_conf.bt_stop_list,
+					       node->sym->name)) {
+				break;
+			}
+
+			first = false;
+next:
+			callchain_cursor_advance(cursor);
+		}
+	}
+
+	return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+			int left_alignment, unsigned int print_opts,
+			struct callchain_cursor *cursor, FILE *fp)
+{
+	int printed = 0;
+	int print_ip = print_opts & EVSEL__PRINT_IP;
+	int print_sym = print_opts & EVSEL__PRINT_SYM;
+	int print_dso = print_opts & EVSEL__PRINT_DSO;
+	int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+	if (cursor != NULL) {
+		printed += sample__fprintf_callchain(sample, left_alignment,
+						     print_opts, cursor, fp);
+	} else {
+		printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+		if (print_ip)
+			printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+		if (print_sym) {
+			printed += fprintf(fp, " ");
+			if (print_symoffset) {
+				printed += __symbol__fprintf_symname_offs(al->sym, al,
+									  print_unknown_as_addr,
+									  true, fp);
+			} else {
+				printed += __symbol__fprintf_symname(al->sym, al,
+								     print_unknown_as_addr, fp);
+			}
+		}
+
+		if (print_dso) {
+			printed += fprintf(fp, " (");
+			printed += map__fprintf_dsoname(al->map, fp);
+			printed += fprintf(fp, ")");
+		}
+
+		if (print_srcline)
+			printed += map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
+	}
+
+	return printed;
+}
diff --git a/util/expr.h b/util/expr.h
new file mode 100644
index 0000000..0461608
--- /dev/null
+++ b/util/expr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PARSE_CTX_H
+#define PARSE_CTX_H 1
+
+#define EXPR_MAX_OTHER 15
+#define MAX_PARSE_ID EXPR_MAX_OTHER
+
+struct parse_id {
+	const char *name;
+	double val;
+};
+
+struct parse_ctx {
+	int num_ids;
+	struct parse_id ids[MAX_PARSE_ID];
+};
+
+void expr__ctx_init(struct parse_ctx *ctx);
+void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
+#ifndef IN_EXPR_Y
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
+#endif
+int expr__find_other(const char *p, const char *one, const char ***other,
+		int *num_other);
+
+#endif
diff --git a/util/expr.y b/util/expr.y
new file mode 100644
index 0000000..432b856
--- /dev/null
+++ b/util/expr.y
@@ -0,0 +1,235 @@
+/* Simple expression parser */
+%{
+#include "util.h"
+#include "util/debug.h"
+#define IN_EXPR_Y 1
+#include "expr.h"
+#include "smt.h"
+#include <string.h>
+
+#define MAXIDLEN 256
+%}
+
+%pure-parser
+%parse-param { double *final_val }
+%parse-param { struct parse_ctx *ctx }
+%parse-param { const char **pp }
+%lex-param { const char **pp }
+
+%union {
+	double num;
+	char id[MAXIDLEN+1];
+}
+
+%token <num> NUMBER
+%token <id> ID
+%token MIN MAX IF ELSE SMT_ON
+%left MIN MAX IF
+%left '|'
+%left '^'
+%left '&'
+%left '-' '+'
+%left '*' '/' '%'
+%left NEG NOT
+%type <num> expr if_expr
+
+%{
+static int expr__lex(YYSTYPE *res, const char **pp);
+
+static void expr__error(double *final_val __maybe_unused,
+		       struct parse_ctx *ctx __maybe_unused,
+		       const char **pp __maybe_unused,
+		       const char *s)
+{
+	pr_debug("%s\n", s);
+}
+
+static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
+{
+	int i;
+
+	for (i = 0; i < ctx->num_ids; i++) {
+		if (!strcasecmp(ctx->ids[i].name, id)) {
+			*val = ctx->ids[i].val;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+%}
+%%
+
+all_expr: if_expr			{ *final_val = $1; }
+	;
+
+if_expr:
+	expr IF expr ELSE expr { $$ = $3 ? $1 : $5; }
+	| expr
+	;
+
+expr:	  NUMBER
+	| ID			{ if (lookup_id(ctx, $1, &$$) < 0) {
+					pr_debug("%s not found\n", $1);
+					YYABORT;
+				  }
+				}
+	| expr '|' expr		{ $$ = (long)$1 | (long)$3; }
+	| expr '&' expr		{ $$ = (long)$1 & (long)$3; }
+	| expr '^' expr		{ $$ = (long)$1 ^ (long)$3; }
+	| expr '+' expr		{ $$ = $1 + $3; }
+	| expr '-' expr		{ $$ = $1 - $3; }
+	| expr '*' expr		{ $$ = $1 * $3; }
+	| expr '/' expr		{ if ($3 == 0) YYABORT; $$ = $1 / $3; }
+	| expr '%' expr		{ if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; }
+	| '-' expr %prec NEG	{ $$ = -$2; }
+	| '(' if_expr ')'	{ $$ = $2; }
+	| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
+	| MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
+	| SMT_ON		 { $$ = smt_on() > 0; }
+	;
+
+%%
+
+static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
+{
+	char *dst = res->id;
+	const char *s = p;
+
+	if (*p == '#')
+		*dst++ = *p++;
+
+	while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
+		if (p - s >= MAXIDLEN)
+			return -1;
+		/*
+		 * Allow @ instead of / to be able to specify pmu/event/ without
+		 * conflicts with normal division.
+		 */
+		if (*p == '@')
+			*dst++ = '/';
+		else if (*p == '\\')
+			*dst++ = *++p;
+		else
+			*dst++ = *p;
+		p++;
+	}
+	*dst = 0;
+	*pp = p;
+	dst = res->id;
+	switch (dst[0]) {
+	case 'm':
+		if (!strcmp(dst, "min"))
+			return MIN;
+		if (!strcmp(dst, "max"))
+			return MAX;
+		break;
+	case 'i':
+		if (!strcmp(dst, "if"))
+			return IF;
+		break;
+	case 'e':
+		if (!strcmp(dst, "else"))
+			return ELSE;
+		break;
+	case '#':
+		if (!strcasecmp(dst, "#smt_on"))
+			return SMT_ON;
+		break;
+	}
+	return ID;
+}
+
+static int expr__lex(YYSTYPE *res, const char **pp)
+{
+	int tok;
+	const char *s;
+	const char *p = *pp;
+
+	while (isspace(*p))
+		p++;
+	s = p;
+	switch (*p++) {
+	case '#':
+	case 'a' ... 'z':
+	case 'A' ... 'Z':
+		return expr__symbol(res, p - 1, pp);
+	case '0' ... '9': case '.':
+		res->num = strtod(s, (char **)&p);
+		tok = NUMBER;
+		break;
+	default:
+		tok = *s;
+		break;
+	}
+	*pp = p;
+	return tok;
+}
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+	int idx;
+	assert(ctx->num_ids < MAX_PARSE_ID);
+	idx = ctx->num_ids++;
+	ctx->ids[idx].name = name;
+	ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+	ctx->num_ids = 0;
+}
+
+static bool already_seen(const char *val, const char *one, const char **other,
+			 int num_other)
+{
+	int i;
+
+	if (one && !strcasecmp(one, val))
+		return true;
+	for (i = 0; i < num_other; i++)
+		if (!strcasecmp(other[i], val))
+			return true;
+	return false;
+}
+
+int expr__find_other(const char *p, const char *one, const char ***other,
+		     int *num_otherp)
+{
+	const char *orig = p;
+	int err = -1;
+	int num_other;
+
+	*other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
+	if (!*other)
+		return -1;
+
+	num_other = 0;
+	for (;;) {
+		YYSTYPE val;
+		int tok = expr__lex(&val, &p);
+		if (tok == 0) {
+			err = 0;
+			break;
+		}
+		if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
+			if (num_other >= EXPR_MAX_OTHER - 1) {
+				pr_debug("Too many extra events in %s\n", orig);
+				break;
+			}
+			(*other)[num_other] = strdup(val.id);
+			if (!(*other)[num_other])
+				return -1;
+			num_other++;
+		}
+	}
+	(*other)[num_other] = NULL;
+	*num_otherp = num_other;
+	if (err) {
+		*num_otherp = 0;
+		free(*other);
+		*other = NULL;
+	}
+	return err;
+}
diff --git a/util/find-vdso-map.c b/util/find-vdso-map.c
new file mode 100644
index 0000000..d7823e3
--- /dev/null
+++ b/util/find-vdso-map.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+static int find_vdso_map(void **start, void **end)
+{
+	FILE *maps;
+	char line[128];
+	int found = 0;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		fprintf(stderr, "vdso: cannot open maps\n");
+		return -1;
+	}
+
+	while (!found && fgets(line, sizeof(line), maps)) {
+		int m = -1;
+
+		/* We care only about private r-x mappings. */
+		if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n",
+				start, end, &m))
+			continue;
+		if (m < 0)
+			continue;
+
+		if (!strncmp(&line[m], VDSO__MAP_NAME,
+			     sizeof(VDSO__MAP_NAME) - 1))
+			found = 1;
+	}
+
+	fclose(maps);
+	return !found;
+}
diff --git a/util/genelf.c b/util/genelf.c
new file mode 100644
index 0000000..c540d47
--- /dev/null
+++ b/util/genelf.c
@@ -0,0 +1,554 @@
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@gmail.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
+#include <dwarf.h>
+#endif
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+#define JVMTI
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA	/* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+  unsigned int namesz;  /* Size of entry's owner string */
+  unsigned int descsz;  /* Size of the note descriptor */
+  unsigned int type;    /* Interpretation of the descriptor */
+  char         name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+	char *output;
+	int fd;
+};
+
+static char shd_string_table[] = {
+	0,
+	'.', 't', 'e', 'x', 't', 0,			/*  1 */
+	'.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /*  7 */
+	'.', 's', 'y', 'm', 't', 'a', 'b', 0,		/* 17 */
+	'.', 's', 't', 'r', 't', 'a', 'b', 0,		/* 25 */
+	'.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+	'.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */
+	'.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */
+};
+
+static struct buildid_note {
+	Elf_Note desc;		/* descsz: size of build-id, must be multiple of 4 */
+	char	 name[4];	/* GNU\0 */
+	char	 build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+	/* symbol 0 MUST be the undefined symbol */
+	{ .st_name  = 0, /* index in sym_string table */
+	  .st_info  = ELF_ST_TYPE(STT_NOTYPE),
+	  .st_shndx = 0, /* for now */
+	  .st_value = 0x0,
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0,
+	},
+	{ .st_name  = 1, /* index in sym_string table */
+	  .st_info  = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+	  .st_shndx = 1,
+	  .st_value = 0, /* for now */
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0, /* for now */
+	}
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code __maybe_unused,
+	     size_t csize __maybe_unused)
+{
+	int fd;
+	size_t sz = sizeof(note->build_id);
+	ssize_t sret;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd == -1)
+		err(1, "cannot access /dev/urandom for builid");
+
+	sret = read(fd, note->build_id, sz);
+
+	close(fd);
+
+	if (sret != (ssize_t)sz)
+		memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code,
+	     size_t csize)
+{
+	if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+		errx(1, "build_id too small for SHA1");
+
+	SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+	MD5_CTX context;
+
+	if (sizeof(note->build_id) < 16)
+		errx(1, "build_id too small for MD5");
+
+	MD5_Init(&context);
+	MD5_Update(&context, &load_addr, sizeof(load_addr));
+	MD5_Update(&context, code, csize);
+	MD5_Final((unsigned char *)note->build_id, &context);
+}
+#endif
+
+static int
+jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
+		      uint64_t unwinding_size, uint64_t base_offset)
+{
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Shdr *shdr;
+	uint64_t unwinding_table_size = unwinding_size - unwinding_header_size;
+
+	/*
+	 * setup eh_frame section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 8;
+	d->d_off = 0LL;
+	d->d_buf = unwinding;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = unwinding_table_size;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 104;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = base_offset;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup eh_frame_hdr section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 4;
+	d->d_off = 0LL;
+	d->d_buf = unwinding + unwinding_table_size;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = unwinding_header_size;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 90;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = base_offset + unwinding_table_size;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	return 0;
+}
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+	      const void *code, int csize,
+	      void *debug __maybe_unused, int nr_debug_entries __maybe_unused,
+	      void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size)
+{
+	Elf *e;
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Ehdr *ehdr;
+	Elf_Shdr *shdr;
+	uint64_t eh_frame_base_offset;
+	char *strsym = NULL;
+	int symlen;
+	int retval = -1;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		warnx("ELF initialization failed");
+		return -1;
+	}
+
+	e = elf_begin(fd, ELF_C_WRITE, NULL);
+	if (!e) {
+		warnx("elf_begin failed");
+		goto error;
+	}
+
+	/*
+	 * setup ELF header
+	 */
+	ehdr = elf_newehdr(e);
+	if (!ehdr) {
+		warnx("cannot get ehdr");
+		goto error;
+	}
+
+	ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+	ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+	ehdr->e_machine = GEN_ELF_ARCH;
+	ehdr->e_type = ET_DYN;
+	ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
+
+	/*
+	 * setup text section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 16;
+	d->d_off = 0LL;
+	d->d_buf = (void *)code;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = csize;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 1;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+	shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * Setup .eh_frame_hdr and .eh_frame
+	 */
+	if (unwinding) {
+		eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize);
+		retval = jit_add_eh_frame_info(e, unwinding,
+					       unwinding_header_size, unwinding_size,
+					       eh_frame_base_offset);
+		if (retval)
+			goto error;
+	}
+
+	/*
+	 * setup section headers string table
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = shd_string_table;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(shd_string_table);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup symtab section
+	 */
+	symtab[1].st_size  = csize;
+	symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 8;
+	d->d_off = 0LL;
+	d->d_buf = symtab;
+	d->d_type = ELF_T_SYM;
+	d->d_size = sizeof(symtab);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+	shdr->sh_type = SHT_SYMTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = sizeof(Elf_Sym);
+	shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */
+
+	/*
+	 * setup symbols string table
+	 * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+	 */
+	symlen = 2 + strlen(sym);
+	strsym = calloc(1, symlen);
+	if (!strsym) {
+		warnx("cannot allocate strsym");
+		goto error;
+	}
+	strcpy(strsym + 1, sym);
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = strsym;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = symlen;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 25; /* offset in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup build-id section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	/*
+	 * build-id generation
+	 */
+	gen_build_id(&bnote, load_addr, code, csize);
+	bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+	bnote.desc.descsz = sizeof(bnote.build_id);
+	bnote.desc.type   = NT_GNU_BUILD_ID;
+	strcpy(bnote.name, "GNU");
+
+	d->d_align = 4;
+	d->d_off = 0LL;
+	d->d_buf = &bnote;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(bnote);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 33; /* offset in shd_string_table */
+	shdr->sh_type = SHT_NOTE;
+	shdr->sh_addr = 0x0;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_size = sizeof(bnote);
+	shdr->sh_entsize = 0;
+
+#ifdef HAVE_DWARF_SUPPORT
+	if (debug && nr_debug_entries) {
+		retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+		if (retval)
+			goto error;
+	} else
+#endif
+	{
+		if (elf_update(e, ELF_C_WRITE) < 0) {
+			warnx("elf_update 4 failed");
+			goto error;
+		}
+	}
+
+	retval = 0;
+error:
+	(void)elf_end(e);
+
+	free(strsym);
+
+
+	return retval;
+}
+
+#ifndef JVMTI
+
+static unsigned char x86_code[] = {
+    0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+    0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+    0xCD, 0x80            /* int $0x80 */
+};
+
+static struct options options;
+
+int main(int argc, char **argv)
+{
+	int c, fd, ret;
+
+	while ((c = getopt(argc, argv, "o:h")) != -1) {
+		switch (c) {
+		case 'o':
+			options.output = optarg;
+			break;
+		case 'h':
+			printf("Usage: genelf -o output_file [-h]\n");
+			return 0;
+		default:
+			errx(1, "unknown option");
+		}
+	}
+
+	fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
+	if (fd == -1)
+		err(1, "cannot create file %s", options.output);
+
+	ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
+	close(fd);
+
+	if (ret != 0)
+		unlink(options.output);
+
+	return ret;
+}
+#endif
diff --git a/util/genelf.h b/util/genelf.h
new file mode 100644
index 0000000..de322d5
--- /dev/null
+++ b/util/genelf.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+/* genelf.c */
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+		  const void *code, int csize, void *debug, int nr_debug_entries,
+		  void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size);
+#ifdef HAVE_DWARF_SUPPORT
+/* genelf_debug.c */
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif
+
+#if   defined(__arm__)
+#define GEN_ELF_ARCH	EM_ARM
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH	EM_AARCH64
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH	EM_X86_64
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH	EM_386
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__powerpc64__)
+#define GEN_ELF_ARCH	EM_PPC64
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH	EM_PPC
+#define GEN_ELF_CLASS	ELFCLASS32
+#else
+#error "unsupported architecture"
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define GEN_ELF_ENDIAN	ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr	elf64_newehdr
+#define elf_getshdr	elf64_getshdr
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define ELF_ST_TYPE(a)	ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr	elf32_newehdr
+#define elf_getshdr	elf32_getshdr
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define ELF_ST_TYPE(a)	ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+
+#endif
diff --git a/util/genelf_debug.c b/util/genelf_debug.c
new file mode 100644
index 0000000..40789d8
--- /dev/null
+++ b/util/genelf_debug.c
@@ -0,0 +1,611 @@
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@google.com>
+ *
+ * Released under the GPL v2.
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE	(4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t  sword;
+typedef int16_t  shalf;
+typedef uint8_t  ubyte;
+typedef int8_t   sbyte;
+
+struct buffer_ext {
+	size_t cur_pos;
+	size_t max_sz;
+	void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+	size_t i;
+	warnx("DUMP for %s", msg);
+	for (i = 0 ; i < be->cur_pos; i++)
+		warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+	void *tmp;
+	size_t be_sz = be->max_sz;
+
+retry:
+	if ((be->cur_pos + sz) < be_sz) {
+		memcpy(be->data + be->cur_pos, addr, sz);
+		be->cur_pos += sz;
+		return 0;
+	}
+
+	if (!be_sz)
+		be_sz = BUFFER_EXT_DFL_SIZE;
+	else
+		be_sz <<= 1;
+
+	tmp = realloc(be->data, be_sz);
+	if (!tmp)
+		return -1;
+
+	be->data   = tmp;
+	be->max_sz = be_sz;
+
+	goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+	be->data = NULL;
+	be->cur_pos = 0;
+	be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+	return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+	return be->data;
+}
+
+struct debug_line_header {
+	// Not counting this field
+	uword total_length;
+	// version number (2 currently)
+	uhalf version;
+	// relative offset from next field to
+	// program statement
+	uword prolog_length;
+	ubyte minimum_instruction_length;
+	ubyte default_is_stmt;
+	// line_base - see DWARF 2 specs
+	sbyte line_base;
+	// line_range - see DWARF 2 specs
+	ubyte line_range;
+	// number of opcode + 1
+	ubyte opcode_base;
+	/* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+	/* follow the search directories index, zero terminated string
+	 * terminated by an empty string.
+	 */
+	/* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+	 * the directory index entry, 0 means current directory, then mtime
+	 * and filesize, last entry is followed by en empty string.
+	 */
+	/* follow the first program statement */
+} __packed;
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+	uword total_length;
+	uhalf version;
+	uword debug_abbrev_offset;
+	ubyte pointer_size;
+} __packed;
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+	.total_length = -1,
+	.version = 2,
+	.prolog_length = -1,
+	.minimum_instruction_length = 1,	/* could be better when min instruction size != 1 */
+	.default_is_stmt = 1,	/* we don't take care about basic block */
+	.line_base = -5,	/* sensible value for line base ... */
+	.line_range = -14,     /* ... and line range are guessed statically */
+	.opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+	0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+	[DW_LNS_advance_pc]   = 1,
+	[DW_LNS_advance_line] = 1,
+	[DW_LNS_set_file] =  1,
+	[DW_LNS_set_column] = 1,
+	[DW_LNS_fixed_advance_pc] = 1,
+	[DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+	.total_length = -1,
+	.version = 2,
+	.debug_abbrev_offset = 0,     /* we reuse the same abbrev entries for all comp unit */
+	.pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+	buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+	buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+				 unsigned long data)
+{
+	do {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (data)
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	} while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+	int more = 1;
+	int negative = data < 0;
+	int size = sizeof(long) * CHAR_BIT;
+	while (more) {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (negative)
+			data |= - (1 << (size - 7));
+		if ((data == 0 && !(cur & 0x40)) ||
+		    (data == -1l && (cur & 0x40)))
+			more = 0;
+		else
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	}
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+				 void *data, size_t data_len)
+{
+	buffer_ext_add(be, (char *)"", 1);
+
+	emit_unsigned_LEB128(be, data_len + 1);
+
+	buffer_ext_add(be, &opcode, 1);
+	buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+	buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext  *be,
+			       ubyte opcode, long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+				 unsigned long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+	emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext  *be, long delta_lineno)
+{
+	emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+	emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+	emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+				     const char *filename)
+{
+	buffer_ext_add(be, (void *)"", 1);
+
+	/* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+	emit_unsigned_LEB128(be, strlen(filename) + 5);
+	emit_opcode(be, DW_LNE_define_file);
+	emit_string(be, filename);
+	/* directory index 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* last modification date on file 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* filesize 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+				 void *address)
+{
+	emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+				unsigned int last_line,
+				unsigned long last_vma)
+{
+	unsigned int temp;
+	unsigned long delta_addr;
+
+	/*
+	 * delta from line_base
+	 */
+	temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+	if (temp >= default_debug_line_header.line_range)
+		return 0;
+
+	/*
+	 * delta of addresses
+	 */
+	delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+	/* This is not sufficient to ensure opcode will be in [0-256] but
+	 * sufficient to ensure when summing with the delta lineno we will
+	 * not overflow the unsigned long opcode */
+
+	if (delta_addr <= 256 / default_debug_line_header.line_range) {
+		unsigned long opcode = temp +
+			(delta_addr * default_debug_line_header.line_range) +
+			default_debug_line_header.opcode_base;
+
+		return opcode <= 255 ? opcode : 0;
+	}
+	return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+			     struct debug_entry *ent, size_t nr_entry,
+			     unsigned long code_addr)
+{
+	size_t i;
+
+	/*
+	 * Machine state at start of a statement program
+	 * address = 0
+	 * file    = 1
+	 * line    = 1
+	 * column  = 0
+	 * is_stmt = default_is_stmt as given in the debug_line_header
+	 * basic block = 0
+	 * end sequence = 0
+	 */
+
+	/* start state of the state machine we take care of */
+	unsigned long last_vma = code_addr;
+	char const  *cur_filename = NULL;
+	unsigned long cur_file_idx = 0;
+	int last_line = 1;
+
+	emit_lne_set_address(be, (void *)code_addr);
+
+	for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+		int need_copy = 0;
+		ubyte special_opcode;
+
+		/*
+		 * check if filename changed, if so add it
+		 */
+		if (!cur_filename || strcmp(cur_filename, ent->name)) {
+			emit_lne_define_filename(be, ent->name);
+			cur_filename = ent->name;
+			emit_set_file(be, ++cur_file_idx);
+			need_copy = 1;
+		}
+
+		special_opcode = get_special_opcode(ent, last_line, last_vma);
+		if (special_opcode != 0) {
+			last_line = ent->lineno;
+			last_vma  = ent->addr;
+			emit_opcode(be, special_opcode);
+		} else {
+			/*
+			 * lines differ, emit line delta
+			 */
+			if (last_line != ent->lineno) {
+				emit_advance_lineno(be, ent->lineno - last_line);
+				last_line = ent->lineno;
+				need_copy = 1;
+			}
+			/*
+			 * addresses differ, emit address delta
+			 */
+			if (last_vma != ent->addr) {
+				emit_advance_pc(be, ent->addr - last_vma);
+				last_vma = ent->addr;
+				need_copy = 1;
+			}
+			/*
+			 * add new row to matrix
+			 */
+			if (need_copy)
+				emit_opcode(be, DW_LNS_copy);
+		}
+	}
+}
+
+static void add_debug_line(struct buffer_ext *be,
+	struct debug_entry *ent, size_t nr_entry,
+	unsigned long code_addr)
+{
+	struct debug_line_header * dbg_header;
+	size_t old_size;
+
+	old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, (void *)&default_debug_line_header,
+		 sizeof(default_debug_line_header));
+
+	buffer_ext_add(be, &standard_opcode_length,  sizeof(standard_opcode_length));
+
+	// empty directory entry
+	buffer_ext_add(be, (void *)"", 1);
+
+	// empty filename directory
+	buffer_ext_add(be, (void *)"", 1);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, minimum_instruction_length);
+
+	emit_lineno_info(be, ent, nr_entry, code_addr);
+
+	emit_lne_end_of_sequence(be);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+        emit_unsigned_LEB128(be, 1);
+        emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+        emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+        emit_unsigned_LEB128(be, DW_AT_stmt_list);
+        emit_unsigned_LEB128(be, DW_FORM_data4);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+		     size_t offset_debug_line)
+{
+	struct compilation_unit_header *comp_unit_header;
+	size_t old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, &default_comp_unit_header,
+		       sizeof(default_comp_unit_header));
+
+	emit_unsigned_LEB128(be, 1);
+	emit_uword(be, offset_debug_line);
+
+	comp_unit_header = buffer_ext_addr(be) + old_size;
+	comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+		       void *debug, int nr_debug_entries,
+		       struct buffer_ext *dl,
+		       struct buffer_ext *da,
+		       struct buffer_ext *di)
+{
+	struct debug_entry *ent = debug;
+	int i;
+
+	for (i = 0; i < nr_debug_entries; i++) {
+		ent->addr = ent->addr - code_addr;
+		ent = debug_entry_next(ent);
+	}
+	add_compilation_unit(di, buffer_ext_size(dl));
+	add_debug_line(dl, debug, nr_debug_entries, 0);
+	add_debug_abbrev(da);
+	if (0) buffer_ext_dump(da, "abbrev");
+
+	return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Shdr *shdr;
+	struct buffer_ext dl, di, da;
+	int ret;
+
+	buffer_ext_init(&dl);
+	buffer_ext_init(&di);
+	buffer_ext_init(&da);
+
+	ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+	if (ret)
+		return -1;
+	/*
+	 * setup .debug_line section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&dl);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&dl);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 52; /* .debug_line */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_info section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&di);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&di);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 64; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_abbrev section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&da);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&da);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 76; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * now we update the ELF image with all the sections
+	 */
+	if (elf_update(e, ELF_C_WRITE) < 0) {
+		warnx("elf_update debug failed");
+		return -1;
+	}
+	return 0;
+}
diff --git a/util/generate-cmdlist.sh b/util/generate-cmdlist.sh
new file mode 100755
index 0000000..c3cef36
--- /dev/null
+++ b/util/generate-cmdlist.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+    char name[16];
+    char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+
+echo "#ifdef HAVE_LIBELF_SUPPORT"
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* full.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+
+echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)"
+sed -n -e 's/^perf-\([^ 	]*\)[ 	].* audit*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+     sed -n '
+     /^NAME/,/perf-'"$cmd"'/H
+     ${
+            x
+            s/.*perf-'"$cmd"' - \(.*\)/  {"'"$cmd"'", "\1"},/
+	    p
+     }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+echo "};"
diff --git a/util/group.h b/util/group.h
new file mode 100644
index 0000000..f36c7e3
--- /dev/null
+++ b/util/group.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/util/header.c b/util/header.c
new file mode 100644
index 0000000..a8bff21
--- /dev/null
+++ b/util/header.c
@@ -0,0 +1,3918 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include "util.h"
+#include "string2.h"
+#include <sys/param.h>
+#include <sys/types.h>
+#include <byteswap.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/stringify.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <linux/time64.h>
+#include <dirent.h>
+
+#include "evlist.h"
+#include "evsel.h"
+#include "header.h"
+#include "memswap.h"
+#include "../perf.h"
+#include "trace-event.h"
+#include "session.h"
+#include "symbol.h"
+#include "debug.h"
+#include "cpumap.h"
+#include "pmu.h"
+#include "vdso.h"
+#include "strbuf.h"
+#include "build-id.h"
+#include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
+#include "tool.h"
+#include "time-utils.h"
+#include "units.h"
+
+#include "sane_ctype.h"
+
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2    = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
+
+#define PERF_MAGIC	__perf_magic2
+
+const char perf_version_string[] = PERF_VERSION;
+
+struct perf_file_attr {
+	struct perf_event_attr	attr;
+	struct perf_file_section	ids;
+};
+
+struct feat_fd {
+	struct perf_header	*ph;
+	int			fd;
+	void			*buf;	/* Either buf != NULL or fd >= 0 */
+	ssize_t			offset;
+	size_t			size;
+	struct perf_evsel	*events;
+};
+
+void perf_header__set_feat(struct perf_header *header, int feat)
+{
+	set_bit(feat, header->adds_features);
+}
+
+void perf_header__clear_feat(struct perf_header *header, int feat)
+{
+	clear_bit(feat, header->adds_features);
+}
+
+bool perf_header__has_feat(const struct perf_header *header, int feat)
+{
+	return test_bit(feat, header->adds_features);
+}
+
+static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
+{
+	ssize_t ret = writen(ff->fd, buf, size);
+
+	if (ret != (ssize_t)size)
+		return ret < 0 ? (int)ret : -1;
+	return 0;
+}
+
+static int __do_write_buf(struct feat_fd *ff,  const void *buf, size_t size)
+{
+	/* struct perf_event_header::size is u16 */
+	const size_t max_size = 0xffff - sizeof(struct perf_event_header);
+	size_t new_size = ff->size;
+	void *addr;
+
+	if (size + ff->offset > max_size)
+		return -E2BIG;
+
+	while (size > (new_size - ff->offset))
+		new_size <<= 1;
+	new_size = min(max_size, new_size);
+
+	if (ff->size < new_size) {
+		addr = realloc(ff->buf, new_size);
+		if (!addr)
+			return -ENOMEM;
+		ff->buf = addr;
+		ff->size = new_size;
+	}
+
+	memcpy(ff->buf + ff->offset, buf, size);
+	ff->offset += size;
+
+	return 0;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+int do_write(struct feat_fd *ff, const void *buf, size_t size)
+{
+	if (!ff->buf)
+		return __do_write_fd(ff, buf, size);
+	return __do_write_buf(ff, buf, size);
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+	u64 *p = (u64 *) set;
+	int i, ret;
+
+	ret = do_write(ff, &size, sizeof(size));
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+		ret = do_write(ff, p + i, sizeof(*p));
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+int write_padded(struct feat_fd *ff, const void *bf,
+		 size_t count, size_t count_aligned)
+{
+	static const char zero_buf[NAME_ALIGN];
+	int err = do_write(ff, bf, count);
+
+	if (!err)
+		err = do_write(ff, zero_buf, count_aligned - count);
+
+	return err;
+}
+
+#define string_size(str)						\
+	(PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32))
+
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_string(struct feat_fd *ff, const char *str)
+{
+	u32 len, olen;
+	int ret;
+
+	olen = strlen(str) + 1;
+	len = PERF_ALIGN(olen, NAME_ALIGN);
+
+	/* write len, incl. \0 */
+	ret = do_write(ff, &len, sizeof(len));
+	if (ret < 0)
+		return ret;
+
+	return write_padded(ff, str, olen, len);
+}
+
+static int __do_read_fd(struct feat_fd *ff, void *addr, ssize_t size)
+{
+	ssize_t ret = readn(ff->fd, addr, size);
+
+	if (ret != size)
+		return ret < 0 ? (int)ret : -1;
+	return 0;
+}
+
+static int __do_read_buf(struct feat_fd *ff, void *addr, ssize_t size)
+{
+	if (size > (ssize_t)ff->size - ff->offset)
+		return -1;
+
+	memcpy(addr, ff->buf + ff->offset, size);
+	ff->offset += size;
+
+	return 0;
+
+}
+
+static int __do_read(struct feat_fd *ff, void *addr, ssize_t size)
+{
+	if (!ff->buf)
+		return __do_read_fd(ff, addr, size);
+	return __do_read_buf(ff, addr, size);
+}
+
+static int do_read_u32(struct feat_fd *ff, u32 *addr)
+{
+	int ret;
+
+	ret = __do_read(ff, addr, sizeof(*addr));
+	if (ret)
+		return ret;
+
+	if (ff->ph->needs_swap)
+		*addr = bswap_32(*addr);
+	return 0;
+}
+
+static int do_read_u64(struct feat_fd *ff, u64 *addr)
+{
+	int ret;
+
+	ret = __do_read(ff, addr, sizeof(*addr));
+	if (ret)
+		return ret;
+
+	if (ff->ph->needs_swap)
+		*addr = bswap_64(*addr);
+	return 0;
+}
+
+static char *do_read_string(struct feat_fd *ff)
+{
+	u32 len;
+	char *buf;
+
+	if (do_read_u32(ff, &len))
+		return NULL;
+
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	if (!__do_read(ff, buf, len)) {
+		/*
+		 * strings are padded by zeroes
+		 * thus the actual strlen of buf
+		 * may be less than len
+		 */
+		return buf;
+	}
+
+	free(buf);
+	return NULL;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+	unsigned long *set;
+	u64 size, *p;
+	int i, ret;
+
+	ret = do_read_u64(ff, &size);
+	if (ret)
+		return ret;
+
+	set = bitmap_alloc(size);
+	if (!set)
+		return -ENOMEM;
+
+	bitmap_zero(set, size);
+
+	p = (u64 *) set;
+
+	for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+		ret = do_read_u64(ff, p + i);
+		if (ret < 0) {
+			free(set);
+			return ret;
+		}
+	}
+
+	*pset  = set;
+	*psize = size;
+	return 0;
+}
+
+static int write_tracing_data(struct feat_fd *ff,
+			      struct perf_evlist *evlist)
+{
+	if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+		return -1;
+
+	return read_tracing_data(ff->fd, &evlist->entries);
+}
+
+static int write_build_id(struct feat_fd *ff,
+			  struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	if (!perf_session__read_build_ids(session, true))
+		return -1;
+
+	if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+		return -1;
+
+	err = perf_session__write_buildid_table(session, ff);
+	if (err < 0) {
+		pr_debug("failed to write buildid table\n");
+		return err;
+	}
+	perf_session__cache_build_ids(session);
+
+	return 0;
+}
+
+static int write_hostname(struct feat_fd *ff,
+			  struct perf_evlist *evlist __maybe_unused)
+{
+	struct utsname uts;
+	int ret;
+
+	ret = uname(&uts);
+	if (ret < 0)
+		return -1;
+
+	return do_write_string(ff, uts.nodename);
+}
+
+static int write_osrelease(struct feat_fd *ff,
+			   struct perf_evlist *evlist __maybe_unused)
+{
+	struct utsname uts;
+	int ret;
+
+	ret = uname(&uts);
+	if (ret < 0)
+		return -1;
+
+	return do_write_string(ff, uts.release);
+}
+
+static int write_arch(struct feat_fd *ff,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	struct utsname uts;
+	int ret;
+
+	ret = uname(&uts);
+	if (ret < 0)
+		return -1;
+
+	return do_write_string(ff, uts.machine);
+}
+
+static int write_version(struct feat_fd *ff,
+			 struct perf_evlist *evlist __maybe_unused)
+{
+	return do_write_string(ff, perf_version_string);
+}
+
+static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc)
+{
+	FILE *file;
+	char *buf = NULL;
+	char *s, *p;
+	const char *search = cpuinfo_proc;
+	size_t len = 0;
+	int ret = -1;
+
+	if (!search)
+		return -1;
+
+	file = fopen("/proc/cpuinfo", "r");
+	if (!file)
+		return -1;
+
+	while (getline(&buf, &len, file) > 0) {
+		ret = strncmp(buf, search, strlen(search));
+		if (!ret)
+			break;
+	}
+
+	if (ret) {
+		ret = -1;
+		goto done;
+	}
+
+	s = buf;
+
+	p = strchr(buf, ':');
+	if (p && *(p+1) == ' ' && *(p+2))
+		s = p + 2;
+	p = strchr(s, '\n');
+	if (p)
+		*p = '\0';
+
+	/* squash extra space characters (branding string) */
+	p = s;
+	while (*p) {
+		if (isspace(*p)) {
+			char *r = p + 1;
+			char *q = r;
+			*p = ' ';
+			while (*q && isspace(*q))
+				q++;
+			if (q != (p+1))
+				while ((*r++ = *q++));
+		}
+		p++;
+	}
+	ret = do_write_string(ff, s);
+done:
+	free(buf);
+	fclose(file);
+	return ret;
+}
+
+static int write_cpudesc(struct feat_fd *ff,
+		       struct perf_evlist *evlist __maybe_unused)
+{
+	const char *cpuinfo_procs[] = CPUINFO_PROC;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cpuinfo_procs); i++) {
+		int ret;
+		ret = __write_cpudesc(ff, cpuinfo_procs[i]);
+		if (ret >= 0)
+			return ret;
+	}
+	return -1;
+}
+
+
+static int write_nrcpus(struct feat_fd *ff,
+			struct perf_evlist *evlist __maybe_unused)
+{
+	long nr;
+	u32 nrc, nra;
+	int ret;
+
+	nrc = cpu__max_present_cpu();
+
+	nr = sysconf(_SC_NPROCESSORS_ONLN);
+	if (nr < 0)
+		return -1;
+
+	nra = (u32)(nr & UINT_MAX);
+
+	ret = do_write(ff, &nrc, sizeof(nrc));
+	if (ret < 0)
+		return ret;
+
+	return do_write(ff, &nra, sizeof(nra));
+}
+
+static int write_event_desc(struct feat_fd *ff,
+			    struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	u32 nre, nri, sz;
+	int ret;
+
+	nre = evlist->nr_entries;
+
+	/*
+	 * write number of events
+	 */
+	ret = do_write(ff, &nre, sizeof(nre));
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * size of perf_event_attr struct
+	 */
+	sz = (u32)sizeof(evsel->attr);
+	ret = do_write(ff, &sz, sizeof(sz));
+	if (ret < 0)
+		return ret;
+
+	evlist__for_each_entry(evlist, evsel) {
+		ret = do_write(ff, &evsel->attr, sz);
+		if (ret < 0)
+			return ret;
+		/*
+		 * write number of unique id per event
+		 * there is one id per instance of an event
+		 *
+		 * copy into an nri to be independent of the
+		 * type of ids,
+		 */
+		nri = evsel->ids;
+		ret = do_write(ff, &nri, sizeof(nri));
+		if (ret < 0)
+			return ret;
+
+		/*
+		 * write event string as passed on cmdline
+		 */
+		ret = do_write_string(ff, perf_evsel__name(evsel));
+		if (ret < 0)
+			return ret;
+		/*
+		 * write unique ids for this event
+		 */
+		ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64));
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int write_cmdline(struct feat_fd *ff,
+			 struct perf_evlist *evlist __maybe_unused)
+{
+	char buf[MAXPATHLEN];
+	u32 n;
+	int i, ret;
+
+	/* actual path to perf binary */
+	ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+	if (ret <= 0)
+		return -1;
+
+	/* readlink() does not add null termination */
+	buf[ret] = '\0';
+
+	/* account for binary path */
+	n = perf_env.nr_cmdline + 1;
+
+	ret = do_write(ff, &n, sizeof(n));
+	if (ret < 0)
+		return ret;
+
+	ret = do_write_string(ff, buf);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0 ; i < perf_env.nr_cmdline; i++) {
+		ret = do_write_string(ff, perf_env.cmdline_argv[i]);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+#define CORE_SIB_FMT \
+	"/sys/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define THRD_SIB_FMT \
+	"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+
+struct cpu_topo {
+	u32 cpu_nr;
+	u32 core_sib;
+	u32 thread_sib;
+	char **core_siblings;
+	char **thread_siblings;
+};
+
+static int build_cpu_topo(struct cpu_topo *tp, int cpu)
+{
+	FILE *fp;
+	char filename[MAXPATHLEN];
+	char *buf = NULL, *p;
+	size_t len = 0;
+	ssize_t sret;
+	u32 i = 0;
+	int ret = -1;
+
+	sprintf(filename, CORE_SIB_FMT, cpu);
+	fp = fopen(filename, "r");
+	if (!fp)
+		goto try_threads;
+
+	sret = getline(&buf, &len, fp);
+	fclose(fp);
+	if (sret <= 0)
+		goto try_threads;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	for (i = 0; i < tp->core_sib; i++) {
+		if (!strcmp(buf, tp->core_siblings[i]))
+			break;
+	}
+	if (i == tp->core_sib) {
+		tp->core_siblings[i] = buf;
+		tp->core_sib++;
+		buf = NULL;
+		len = 0;
+	}
+	ret = 0;
+
+try_threads:
+	sprintf(filename, THRD_SIB_FMT, cpu);
+	fp = fopen(filename, "r");
+	if (!fp)
+		goto done;
+
+	if (getline(&buf, &len, fp) <= 0)
+		goto done;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	for (i = 0; i < tp->thread_sib; i++) {
+		if (!strcmp(buf, tp->thread_siblings[i]))
+			break;
+	}
+	if (i == tp->thread_sib) {
+		tp->thread_siblings[i] = buf;
+		tp->thread_sib++;
+		buf = NULL;
+	}
+	ret = 0;
+done:
+	if(fp)
+		fclose(fp);
+	free(buf);
+	return ret;
+}
+
+static void free_cpu_topo(struct cpu_topo *tp)
+{
+	u32 i;
+
+	if (!tp)
+		return;
+
+	for (i = 0 ; i < tp->core_sib; i++)
+		zfree(&tp->core_siblings[i]);
+
+	for (i = 0 ; i < tp->thread_sib; i++)
+		zfree(&tp->thread_siblings[i]);
+
+	free(tp);
+}
+
+static struct cpu_topo *build_cpu_topology(void)
+{
+	struct cpu_topo *tp = NULL;
+	void *addr;
+	u32 nr, i;
+	size_t sz;
+	long ncpus;
+	int ret = -1;
+	struct cpu_map *map;
+
+	ncpus = cpu__max_present_cpu();
+
+	/* build online CPU map */
+	map = cpu_map__new(NULL);
+	if (map == NULL) {
+		pr_debug("failed to get system cpumap\n");
+		return NULL;
+	}
+
+	nr = (u32)(ncpus & UINT_MAX);
+
+	sz = nr * sizeof(char *);
+	addr = calloc(1, sizeof(*tp) + 2 * sz);
+	if (!addr)
+		goto out_free;
+
+	tp = addr;
+	tp->cpu_nr = nr;
+	addr += sizeof(*tp);
+	tp->core_siblings = addr;
+	addr += sz;
+	tp->thread_siblings = addr;
+
+	for (i = 0; i < nr; i++) {
+		if (!cpu_map__has(map, i))
+			continue;
+
+		ret = build_cpu_topo(tp, i);
+		if (ret < 0)
+			break;
+	}
+
+out_free:
+	cpu_map__put(map);
+	if (ret) {
+		free_cpu_topo(tp);
+		tp = NULL;
+	}
+	return tp;
+}
+
+static int write_cpu_topology(struct feat_fd *ff,
+			      struct perf_evlist *evlist __maybe_unused)
+{
+	struct cpu_topo *tp;
+	u32 i;
+	int ret, j;
+
+	tp = build_cpu_topology();
+	if (!tp)
+		return -1;
+
+	ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib));
+	if (ret < 0)
+		goto done;
+
+	for (i = 0; i < tp->core_sib; i++) {
+		ret = do_write_string(ff, tp->core_siblings[i]);
+		if (ret < 0)
+			goto done;
+	}
+	ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib));
+	if (ret < 0)
+		goto done;
+
+	for (i = 0; i < tp->thread_sib; i++) {
+		ret = do_write_string(ff, tp->thread_siblings[i]);
+		if (ret < 0)
+			break;
+	}
+
+	ret = perf_env__read_cpu_topology_map(&perf_env);
+	if (ret < 0)
+		goto done;
+
+	for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+		ret = do_write(ff, &perf_env.cpu[j].core_id,
+			       sizeof(perf_env.cpu[j].core_id));
+		if (ret < 0)
+			return ret;
+		ret = do_write(ff, &perf_env.cpu[j].socket_id,
+			       sizeof(perf_env.cpu[j].socket_id));
+		if (ret < 0)
+			return ret;
+	}
+done:
+	free_cpu_topo(tp);
+	return ret;
+}
+
+
+
+static int write_total_mem(struct feat_fd *ff,
+			   struct perf_evlist *evlist __maybe_unused)
+{
+	char *buf = NULL;
+	FILE *fp;
+	size_t len = 0;
+	int ret = -1, n;
+	uint64_t mem;
+
+	fp = fopen("/proc/meminfo", "r");
+	if (!fp)
+		return -1;
+
+	while (getline(&buf, &len, fp) > 0) {
+		ret = strncmp(buf, "MemTotal:", 9);
+		if (!ret)
+			break;
+	}
+	if (!ret) {
+		n = sscanf(buf, "%*s %"PRIu64, &mem);
+		if (n == 1)
+			ret = do_write(ff, &mem, sizeof(mem));
+	} else
+		ret = -1;
+	free(buf);
+	fclose(fp);
+	return ret;
+}
+
+static int write_topo_node(struct feat_fd *ff, int node)
+{
+	char str[MAXPATHLEN];
+	char field[32];
+	char *buf = NULL, *p;
+	size_t len = 0;
+	FILE *fp;
+	u64 mem_total, mem_free, mem;
+	int ret = -1;
+
+	sprintf(str, "/sys/devices/system/node/node%d/meminfo", node);
+	fp = fopen(str, "r");
+	if (!fp)
+		return -1;
+
+	while (getline(&buf, &len, fp) > 0) {
+		/* skip over invalid lines */
+		if (!strchr(buf, ':'))
+			continue;
+		if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
+			goto done;
+		if (!strcmp(field, "MemTotal:"))
+			mem_total = mem;
+		if (!strcmp(field, "MemFree:"))
+			mem_free = mem;
+	}
+
+	fclose(fp);
+	fp = NULL;
+
+	ret = do_write(ff, &mem_total, sizeof(u64));
+	if (ret)
+		goto done;
+
+	ret = do_write(ff, &mem_free, sizeof(u64));
+	if (ret)
+		goto done;
+
+	ret = -1;
+	sprintf(str, "/sys/devices/system/node/node%d/cpulist", node);
+
+	fp = fopen(str, "r");
+	if (!fp)
+		goto done;
+
+	if (getline(&buf, &len, fp) <= 0)
+		goto done;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+
+	ret = do_write_string(ff, buf);
+done:
+	free(buf);
+	if (fp)
+		fclose(fp);
+	return ret;
+}
+
+static int write_numa_topology(struct feat_fd *ff,
+			       struct perf_evlist *evlist __maybe_unused)
+{
+	char *buf = NULL;
+	size_t len = 0;
+	FILE *fp;
+	struct cpu_map *node_map = NULL;
+	char *c;
+	u32 nr, i, j;
+	int ret = -1;
+
+	fp = fopen("/sys/devices/system/node/online", "r");
+	if (!fp)
+		return -1;
+
+	if (getline(&buf, &len, fp) <= 0)
+		goto done;
+
+	c = strchr(buf, '\n');
+	if (c)
+		*c = '\0';
+
+	node_map = cpu_map__new(buf);
+	if (!node_map)
+		goto done;
+
+	nr = (u32)node_map->nr;
+
+	ret = do_write(ff, &nr, sizeof(nr));
+	if (ret < 0)
+		goto done;
+
+	for (i = 0; i < nr; i++) {
+		j = (u32)node_map->map[i];
+		ret = do_write(ff, &j, sizeof(j));
+		if (ret < 0)
+			break;
+
+		ret = write_topo_node(ff, i);
+		if (ret < 0)
+			break;
+	}
+done:
+	free(buf);
+	fclose(fp);
+	cpu_map__put(node_map);
+	return ret;
+}
+
+/*
+ * File format:
+ *
+ * struct pmu_mappings {
+ *	u32	pmu_num;
+ *	struct pmu_map {
+ *		u32	type;
+ *		char	name[];
+ *	}[pmu_num];
+ * };
+ */
+
+static int write_pmu_mappings(struct feat_fd *ff,
+			      struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_pmu *pmu = NULL;
+	u32 pmu_num = 0;
+	int ret;
+
+	/*
+	 * Do a first pass to count number of pmu to avoid lseek so this
+	 * works in pipe mode as well.
+	 */
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+		pmu_num++;
+	}
+
+	ret = do_write(ff, &pmu_num, sizeof(pmu_num));
+	if (ret < 0)
+		return ret;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+		if (!pmu->name)
+			continue;
+
+		ret = do_write(ff, &pmu->type, sizeof(pmu->type));
+		if (ret < 0)
+			return ret;
+
+		ret = do_write_string(ff, pmu->name);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * File format:
+ *
+ * struct group_descs {
+ *	u32	nr_groups;
+ *	struct group_desc {
+ *		char	name[];
+ *		u32	leader_idx;
+ *		u32	nr_members;
+ *	}[nr_groups];
+ * };
+ */
+static int write_group_desc(struct feat_fd *ff,
+			    struct perf_evlist *evlist)
+{
+	u32 nr_groups = evlist->nr_groups;
+	struct perf_evsel *evsel;
+	int ret;
+
+	ret = do_write(ff, &nr_groups, sizeof(nr_groups));
+	if (ret < 0)
+		return ret;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (perf_evsel__is_group_leader(evsel) &&
+		    evsel->nr_members > 1) {
+			const char *name = evsel->group_name ?: "{anon_group}";
+			u32 leader_idx = evsel->idx;
+			u32 nr_members = evsel->nr_members;
+
+			ret = do_write_string(ff, name);
+			if (ret < 0)
+				return ret;
+
+			ret = do_write(ff, &leader_idx, sizeof(leader_idx));
+			if (ret < 0)
+				return ret;
+
+			ret = do_write(ff, &nr_members, sizeof(nr_members));
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/*
+ * default get_cpuid(): nothing gets recorded
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
+ */
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
+{
+	return -1;
+}
+
+static int write_cpuid(struct feat_fd *ff,
+		       struct perf_evlist *evlist __maybe_unused)
+{
+	char buffer[64];
+	int ret;
+
+	ret = get_cpuid(buffer, sizeof(buffer));
+	if (!ret)
+		goto write_it;
+
+	return -1;
+write_it:
+	return do_write_string(ff, buffer);
+}
+
+static int write_branch_stack(struct feat_fd *ff __maybe_unused,
+			      struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static int write_auxtrace(struct feat_fd *ff,
+			  struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+		return -1;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	err = auxtrace_index__write(ff->fd, &session->auxtrace_index);
+	if (err < 0)
+		pr_err("Failed to write auxtrace index\n");
+	return err;
+}
+
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+	struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+	return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+	if (a->level != b->level)
+		return false;
+
+	if (a->line_size != b->line_size)
+		return false;
+
+	if (a->sets != b->sets)
+		return false;
+
+	if (a->ways != b->ways)
+		return false;
+
+	if (strcmp(a->type, b->type))
+		return false;
+
+	if (strcmp(a->size, b->size))
+		return false;
+
+	if (strcmp(a->map, b->map))
+		return false;
+
+	return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+	char path[PATH_MAX], file[PATH_MAX];
+	struct stat st;
+	size_t len;
+
+	scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+	scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+	if (stat(file, &st))
+		return 1;
+
+	scnprintf(file, PATH_MAX, "%s/level", path);
+	if (sysfs__read_int(file, (int *) &cache->level))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+	if (sysfs__read_int(file, (int *) &cache->line_size))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+	if (sysfs__read_int(file, (int *) &cache->sets))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+	if (sysfs__read_int(file, (int *) &cache->ways))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/type", path);
+	if (sysfs__read_str(file, &cache->type, &len))
+		return -1;
+
+	cache->type[len] = 0;
+	cache->type = rtrim(cache->type);
+
+	scnprintf(file, PATH_MAX, "%s/size", path);
+	if (sysfs__read_str(file, &cache->size, &len)) {
+		free(cache->type);
+		return -1;
+	}
+
+	cache->size[len] = 0;
+	cache->size = rtrim(cache->size);
+
+	scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+	if (sysfs__read_str(file, &cache->map, &len)) {
+		free(cache->map);
+		free(cache->type);
+		return -1;
+	}
+
+	cache->map[len] = 0;
+	cache->map = rtrim(cache->map);
+	return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+	fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+{
+	u32 i, cnt = 0;
+	long ncpus;
+	u32 nr, cpu;
+	u16 level;
+
+	ncpus = sysconf(_SC_NPROCESSORS_CONF);
+	if (ncpus < 0)
+		return -1;
+
+	nr = (u32)(ncpus & UINT_MAX);
+
+	for (cpu = 0; cpu < nr; cpu++) {
+		for (level = 0; level < 10; level++) {
+			struct cpu_cache_level c;
+			int err;
+
+			err = cpu_cache_level__read(&c, cpu, level);
+			if (err < 0)
+				return err;
+
+			if (err == 1)
+				break;
+
+			for (i = 0; i < cnt; i++) {
+				if (cpu_cache_level__cmp(&c, &caches[i]))
+					break;
+			}
+
+			if (i == cnt)
+				caches[cnt++] = c;
+			else
+				cpu_cache_level__free(&c);
+
+			if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
+				goto out;
+		}
+	}
+ out:
+	*cntp = cnt;
+	return 0;
+}
+
+#define MAX_CACHES 2000
+
+static int write_cache(struct feat_fd *ff,
+		       struct perf_evlist *evlist __maybe_unused)
+{
+	struct cpu_cache_level caches[MAX_CACHES];
+	u32 cnt = 0, i, version = 1;
+	int ret;
+
+	ret = build_caches(caches, MAX_CACHES, &cnt);
+	if (ret)
+		goto out;
+
+	qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+	ret = do_write(ff, &version, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &cnt, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level *c = &caches[i];
+
+		#define _W(v)					\
+			ret = do_write(ff, &c->v, sizeof(u32));	\
+			if (ret < 0)				\
+				goto out;
+
+		_W(level)
+		_W(line_size)
+		_W(sets)
+		_W(ways)
+		#undef _W
+
+		#define _W(v)						\
+			ret = do_write_string(ff, (const char *) c->v);	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(type)
+		_W(size)
+		_W(map)
+		#undef _W
+	}
+
+out:
+	for (i = 0; i < cnt; i++)
+		cpu_cache_level__free(&caches[i]);
+	return ret;
+}
+
+static int write_stat(struct feat_fd *ff __maybe_unused,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static int write_sample_time(struct feat_fd *ff,
+			     struct perf_evlist *evlist)
+{
+	int ret;
+
+	ret = do_write(ff, &evlist->first_sample_time,
+		       sizeof(evlist->first_sample_time));
+	if (ret < 0)
+		return ret;
+
+	return do_write(ff, &evlist->last_sample_time,
+			sizeof(evlist->last_sample_time));
+}
+
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+	unsigned int phys, size = 0;
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+
+#define for_each_memory(mem, dir)					\
+	while ((ent = readdir(dir)))					\
+		if (strcmp(ent->d_name, ".") &&				\
+		    strcmp(ent->d_name, "..") &&			\
+		    sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+	scnprintf(path, PATH_MAX,
+		  "%s/devices/system/node/node%lu",
+		  sysfs__mountpoint(), idx);
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_warning("failed: cant' open memory sysfs data\n");
+		return -1;
+	}
+
+	for_each_memory(phys, dir) {
+		size = max(phys, size);
+	}
+
+	size++;
+
+	n->set = bitmap_alloc(size);
+	if (!n->set) {
+		closedir(dir);
+		return -ENOMEM;
+	}
+
+	bitmap_zero(n->set, size);
+	n->node = idx;
+	n->size = size;
+
+	rewinddir(dir);
+
+	for_each_memory(phys, dir) {
+		set_bit(phys, n->set);
+	}
+
+	closedir(dir);
+	return 0;
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+	const struct memory_node *na = a;
+	const struct memory_node *nb = b;
+
+	return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+{
+	char path[PATH_MAX];
+	struct dirent *ent;
+	DIR *dir;
+	u64 cnt = 0;
+	int ret = 0;
+
+	scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+		  sysfs__mountpoint());
+
+	dir = opendir(path);
+	if (!dir) {
+		pr_debug2("%s: could't read %s, does this arch have topology information?\n",
+			  __func__, path);
+		return -1;
+	}
+
+	while (!ret && (ent = readdir(dir))) {
+		unsigned int idx;
+		int r;
+
+		if (!strcmp(ent->d_name, ".") ||
+		    !strcmp(ent->d_name, ".."))
+			continue;
+
+		r = sscanf(ent->d_name, "node%u", &idx);
+		if (r != 1)
+			continue;
+
+		if (WARN_ONCE(cnt >= size,
+			      "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+			return -1;
+
+		ret = memory_node__read(&nodes[cnt++], idx);
+	}
+
+	*cntp = cnt;
+	closedir(dir);
+
+	if (!ret)
+		qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+
+	return ret;
+}
+
+#define MAX_MEMORY_NODES 2000
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ *  0 - version          | for future changes
+ *  8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count            | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id          | node index
+ * 40 - size             | size of bitmap
+ * 48 - bitmap           | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+			      struct perf_evlist *evlist __maybe_unused)
+{
+	static struct memory_node nodes[MAX_MEMORY_NODES];
+	u64 bsize, version = 1, i, nr;
+	int ret;
+
+	ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+			      (unsigned long long *) &bsize);
+	if (ret)
+		return ret;
+
+	ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+	if (ret)
+		return ret;
+
+	ret = do_write(ff, &version, sizeof(version));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &bsize, sizeof(bsize));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(ff, &nr, sizeof(nr));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < nr; i++) {
+		struct memory_node *n = &nodes[i];
+
+		#define _W(v)						\
+			ret = do_write(ff, &n->v, sizeof(n->v));	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(node)
+		_W(size)
+
+		#undef _W
+
+		ret = do_write_bitmap(ff, n->set, n->size);
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static void print_hostname(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
+}
+
+static void print_osrelease(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# os release : %s\n", ff->ph->env.os_release);
+}
+
+static void print_arch(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# arch : %s\n", ff->ph->env.arch);
+}
+
+static void print_cpudesc(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# cpudesc : %s\n", ff->ph->env.cpu_desc);
+}
+
+static void print_nrcpus(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# nrcpus online : %u\n", ff->ph->env.nr_cpus_online);
+	fprintf(fp, "# nrcpus avail : %u\n", ff->ph->env.nr_cpus_avail);
+}
+
+static void print_version(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# perf version : %s\n", ff->ph->env.version);
+}
+
+static void print_cmdline(struct feat_fd *ff, FILE *fp)
+{
+	int nr, i;
+
+	nr = ff->ph->env.nr_cmdline;
+
+	fprintf(fp, "# cmdline : ");
+
+	for (i = 0; i < nr; i++)
+		fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
+	fputc('\n', fp);
+}
+
+static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_header *ph = ff->ph;
+	int cpu_nr = ph->env.nr_cpus_avail;
+	int nr, i;
+	char *str;
+
+	nr = ph->env.nr_sibling_cores;
+	str = ph->env.sibling_cores;
+
+	for (i = 0; i < nr; i++) {
+		fprintf(fp, "# sibling cores   : %s\n", str);
+		str += strlen(str) + 1;
+	}
+
+	nr = ph->env.nr_sibling_threads;
+	str = ph->env.sibling_threads;
+
+	for (i = 0; i < nr; i++) {
+		fprintf(fp, "# sibling threads : %s\n", str);
+		str += strlen(str) + 1;
+	}
+
+	if (ph->env.cpu != NULL) {
+		for (i = 0; i < cpu_nr; i++)
+			fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i,
+				ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id);
+	} else
+		fprintf(fp, "# Core ID and Socket ID information is not available\n");
+}
+
+static void free_event_desc(struct perf_evsel *events)
+{
+	struct perf_evsel *evsel;
+
+	if (!events)
+		return;
+
+	for (evsel = events; evsel->attr.size; evsel++) {
+		zfree(&evsel->name);
+		zfree(&evsel->id);
+	}
+
+	free(events);
+}
+
+static struct perf_evsel *read_event_desc(struct feat_fd *ff)
+{
+	struct perf_evsel *evsel, *events = NULL;
+	u64 *id;
+	void *buf = NULL;
+	u32 nre, sz, nr, i, j;
+	size_t msz;
+
+	/* number of events */
+	if (do_read_u32(ff, &nre))
+		goto error;
+
+	if (do_read_u32(ff, &sz))
+		goto error;
+
+	/* buffer to hold on file attr struct */
+	buf = malloc(sz);
+	if (!buf)
+		goto error;
+
+	/* the last event terminates with evsel->attr.size == 0: */
+	events = calloc(nre + 1, sizeof(*events));
+	if (!events)
+		goto error;
+
+	msz = sizeof(evsel->attr);
+	if (sz < msz)
+		msz = sz;
+
+	for (i = 0, evsel = events; i < nre; evsel++, i++) {
+		evsel->idx = i;
+
+		/*
+		 * must read entire on-file attr struct to
+		 * sync up with layout.
+		 */
+		if (__do_read(ff, buf, sz))
+			goto error;
+
+		if (ff->ph->needs_swap)
+			perf_event__attr_swap(buf);
+
+		memcpy(&evsel->attr, buf, msz);
+
+		if (do_read_u32(ff, &nr))
+			goto error;
+
+		if (ff->ph->needs_swap)
+			evsel->needs_swap = true;
+
+		evsel->name = do_read_string(ff);
+		if (!evsel->name)
+			goto error;
+
+		if (!nr)
+			continue;
+
+		id = calloc(nr, sizeof(*id));
+		if (!id)
+			goto error;
+		evsel->ids = nr;
+		evsel->id = id;
+
+		for (j = 0 ; j < nr; j++) {
+			if (do_read_u64(ff, id))
+				goto error;
+			id++;
+		}
+	}
+out:
+	free(buf);
+	return events;
+error:
+	free_event_desc(events);
+	events = NULL;
+	goto out;
+}
+
+static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
+				void *priv __maybe_unused)
+{
+	return fprintf(fp, ", %s = %s", name, val);
+}
+
+static void print_event_desc(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_evsel *evsel, *events;
+	u32 j;
+	u64 *id;
+
+	if (ff->events)
+		events = ff->events;
+	else
+		events = read_event_desc(ff);
+
+	if (!events) {
+		fprintf(fp, "# event desc: not available or unable to read\n");
+		return;
+	}
+
+	for (evsel = events; evsel->attr.size; evsel++) {
+		fprintf(fp, "# event : name = %s, ", evsel->name);
+
+		if (evsel->ids) {
+			fprintf(fp, ", id = {");
+			for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
+				if (j)
+					fputc(',', fp);
+				fprintf(fp, " %"PRIu64, *id);
+			}
+			fprintf(fp, " }");
+		}
+
+		perf_event_attr__fprintf(fp, &evsel->attr, __desc_attr__fprintf, NULL);
+
+		fputc('\n', fp);
+	}
+
+	free_event_desc(events);
+	ff->events = NULL;
+}
+
+static void print_total_mem(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# total memory : %llu kB\n", ff->ph->env.total_mem);
+}
+
+static void print_numa_topology(struct feat_fd *ff, FILE *fp)
+{
+	int i;
+	struct numa_node *n;
+
+	for (i = 0; i < ff->ph->env.nr_numa_nodes; i++) {
+		n = &ff->ph->env.numa_nodes[i];
+
+		fprintf(fp, "# node%u meminfo  : total = %"PRIu64" kB,"
+			    " free = %"PRIu64" kB\n",
+			n->node, n->mem_total, n->mem_free);
+
+		fprintf(fp, "# node%u cpu list : ", n->node);
+		cpu_map__fprintf(n->map, fp);
+	}
+}
+
+static void print_cpuid(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# cpuid : %s\n", ff->ph->env.cpuid);
+}
+
+static void print_branch_stack(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains samples with branch stack\n");
+}
+
+static void print_auxtrace(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
+static void print_stat(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains stat data\n");
+}
+
+static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+	int i;
+
+	fprintf(fp, "# CPU cache info:\n");
+	for (i = 0; i < ff->ph->env.caches_cnt; i++) {
+		fprintf(fp, "#  ");
+		cpu_cache_level__fprintf(fp, &ff->ph->env.caches[i]);
+	}
+}
+
+static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
+{
+	const char *delimiter = "# pmu mappings: ";
+	char *str, *tmp;
+	u32 pmu_num;
+	u32 type;
+
+	pmu_num = ff->ph->env.nr_pmu_mappings;
+	if (!pmu_num) {
+		fprintf(fp, "# pmu mappings: not available\n");
+		return;
+	}
+
+	str = ff->ph->env.pmu_mappings;
+
+	while (pmu_num) {
+		type = strtoul(str, &tmp, 0);
+		if (*tmp != ':')
+			goto error;
+
+		str = tmp + 1;
+		fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type);
+
+		delimiter = ", ";
+		str += strlen(str) + 1;
+		pmu_num--;
+	}
+
+	fprintf(fp, "\n");
+
+	if (!pmu_num)
+		return;
+error:
+	fprintf(fp, "# pmu mappings: unable to read\n");
+}
+
+static void print_group_desc(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_session *session;
+	struct perf_evsel *evsel;
+	u32 nr = 0;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (perf_evsel__is_group_leader(evsel) &&
+		    evsel->nr_members > 1) {
+			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
+				perf_evsel__name(evsel));
+
+			nr = evsel->nr_members - 1;
+		} else if (nr) {
+			fprintf(fp, ",%s", perf_evsel__name(evsel));
+
+			if (--nr == 0)
+				fprintf(fp, "}\n");
+		}
+	}
+}
+
+static void print_sample_time(struct feat_fd *ff, FILE *fp)
+{
+	struct perf_session *session;
+	char time_buf[32];
+	double d;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	timestamp__scnprintf_usec(session->evlist->first_sample_time,
+				  time_buf, sizeof(time_buf));
+	fprintf(fp, "# time of first sample : %s\n", time_buf);
+
+	timestamp__scnprintf_usec(session->evlist->last_sample_time,
+				  time_buf, sizeof(time_buf));
+	fprintf(fp, "# time of last sample : %s\n", time_buf);
+
+	d = (double)(session->evlist->last_sample_time -
+		session->evlist->first_sample_time) / NSEC_PER_MSEC;
+
+	fprintf(fp, "# sample duration : %10.3f ms\n", d);
+}
+
+static void memory_node__fprintf(struct memory_node *n,
+				 unsigned long long bsize, FILE *fp)
+{
+	char buf_map[100], buf_size[50];
+	unsigned long long size;
+
+	size = bsize * bitmap_weight(n->set, n->size);
+	unit_number__scnprintf(buf_size, 50, size);
+
+	bitmap_scnprintf(n->set, n->size, buf_map, 100);
+	fprintf(fp, "#  %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+	struct memory_node *nodes;
+	int i, nr;
+
+	nodes = ff->ph->env.memory_nodes;
+	nr    = ff->ph->env.nr_memory_nodes;
+
+	fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+		nr, ff->ph->env.memory_bsize);
+
+	for (i = 0; i < nr; i++) {
+		memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+	}
+}
+
+static int __event_process_build_id(struct build_id_event *bev,
+				    char *filename,
+				    struct perf_session *session)
+{
+	int err = -1;
+	struct machine *machine;
+	u16 cpumode;
+	struct dso *dso;
+	enum dso_kernel_type dso_type;
+
+	machine = perf_session__findnew_machine(session, bev->pid);
+	if (!machine)
+		goto out;
+
+	cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	switch (cpumode) {
+	case PERF_RECORD_MISC_KERNEL:
+		dso_type = DSO_TYPE_KERNEL;
+		break;
+	case PERF_RECORD_MISC_GUEST_KERNEL:
+		dso_type = DSO_TYPE_GUEST_KERNEL;
+		break;
+	case PERF_RECORD_MISC_USER:
+	case PERF_RECORD_MISC_GUEST_USER:
+		dso_type = DSO_TYPE_USER;
+		break;
+	default:
+		goto out;
+	}
+
+	dso = machine__findnew_dso(machine, filename);
+	if (dso != NULL) {
+		char sbuild_id[SBUILD_ID_SIZE];
+
+		dso__set_build_id(dso, &bev->build_id);
+
+		if (dso_type != DSO_TYPE_USER) {
+			struct kmod_path m = { .name = NULL, };
+
+			if (!kmod_path__parse_name(&m, filename) && m.kmod)
+				dso__set_module_info(dso, &m, machine);
+			else
+				dso->kernel = dso_type;
+
+			free(m.name);
+		}
+
+		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
+				  sbuild_id);
+		pr_debug("build id event received for %s: %s\n",
+			 dso->long_name, sbuild_id);
+		dso__put(dso);
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+						 int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct {
+		struct perf_event_header   header;
+		u8			   build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+		char			   filename[0];
+	} old_bev;
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+			return -1;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&old_bev.header);
+
+		len = old_bev.header.size - sizeof(old_bev);
+		if (readn(input, filename, len) != len)
+			return -1;
+
+		bev.header = old_bev.header;
+
+		/*
+		 * As the pid is the missing value, we need to fill
+		 * it properly. The header.misc value give us nice hint.
+		 */
+		bev.pid	= HOST_KERNEL_ID;
+		if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+		    bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+			bev.pid	= DEFAULT_GUEST_KERNEL_ID;
+
+		memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+
+	return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+				       int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size, orig_offset = offset;
+	int err = -1;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
+			goto out;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&bev.header);
+
+		len = bev.header.size - sizeof(bev);
+		if (readn(input, filename, len) != len)
+			goto out;
+		/*
+		 * The a1645ce1 changeset:
+		 *
+		 * "perf: 'perf kvm' tool for monitoring guest performance from host"
+		 *
+		 * Added a field to struct build_id_event that broke the file
+		 * format.
+		 *
+		 * Since the kernel build-id is the first entry, process the
+		 * table using the old format if the well known
+		 * '[kernel.kallsyms]' string for the kernel build-id has the
+		 * first 4 characters chopped off (where the pid_t sits).
+		 */
+		if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+			if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+				return -1;
+			return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+		}
+
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+	err = 0;
+out:
+	return err;
+}
+
+/* Macro for features that simply need to read and store a string. */
+#define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \
+static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \
+{\
+	ff->ph->env.__feat_env = do_read_string(ff); \
+	return ff->ph->env.__feat_env ? 0 : -ENOMEM; \
+}
+
+FEAT_PROCESS_STR_FUN(hostname, hostname);
+FEAT_PROCESS_STR_FUN(osrelease, os_release);
+FEAT_PROCESS_STR_FUN(version, version);
+FEAT_PROCESS_STR_FUN(arch, arch);
+FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc);
+FEAT_PROCESS_STR_FUN(cpuid, cpuid);
+
+static int process_tracing_data(struct feat_fd *ff, void *data)
+{
+	ssize_t ret = trace_report(ff->fd, data, false);
+
+	return ret < 0 ? -1 : 0;
+}
+
+static int process_build_id(struct feat_fd *ff, void *data __maybe_unused)
+{
+	if (perf_header__read_build_ids(ff->ph, ff->fd, ff->offset, ff->size))
+		pr_debug("Failed to read buildids, continuing...\n");
+	return 0;
+}
+
+static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
+{
+	int ret;
+	u32 nr_cpus_avail, nr_cpus_online;
+
+	ret = do_read_u32(ff, &nr_cpus_avail);
+	if (ret)
+		return ret;
+
+	ret = do_read_u32(ff, &nr_cpus_online);
+	if (ret)
+		return ret;
+	ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail;
+	ff->ph->env.nr_cpus_online = (int)nr_cpus_online;
+	return 0;
+}
+
+static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
+{
+	u64 total_mem;
+	int ret;
+
+	ret = do_read_u64(ff, &total_mem);
+	if (ret)
+		return -1;
+	ff->ph->env.total_mem = (unsigned long long)total_mem;
+	return 0;
+}
+
+static struct perf_evsel *
+perf_evlist__find_by_index(struct perf_evlist *evlist, int idx)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->idx == idx)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static void
+perf_evlist__set_event_name(struct perf_evlist *evlist,
+			    struct perf_evsel *event)
+{
+	struct perf_evsel *evsel;
+
+	if (!event->name)
+		return;
+
+	evsel = perf_evlist__find_by_index(evlist, event->idx);
+	if (!evsel)
+		return;
+
+	if (evsel->name)
+		return;
+
+	evsel->name = strdup(event->name);
+}
+
+static int
+process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	struct perf_evsel *evsel, *events = read_event_desc(ff);
+
+	if (!events)
+		return 0;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	if (session->data->is_pipe) {
+		/* Save events for reading later by print_event_desc,
+		 * since they can't be read again in pipe mode. */
+		ff->events = events;
+	}
+
+	for (evsel = events; evsel->attr.size; evsel++)
+		perf_evlist__set_event_name(session->evlist, evsel);
+
+	if (!session->data->is_pipe)
+		free_event_desc(events);
+
+	return 0;
+}
+
+static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused)
+{
+	char *str, *cmdline = NULL, **argv = NULL;
+	u32 nr, i, len = 0;
+
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	ff->ph->env.nr_cmdline = nr;
+
+	cmdline = zalloc(ff->size + nr + 1);
+	if (!cmdline)
+		return -1;
+
+	argv = zalloc(sizeof(char *) * (nr + 1));
+	if (!argv)
+		goto error;
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		argv[i] = cmdline + len;
+		memcpy(argv[i], str, strlen(str) + 1);
+		len += strlen(str) + 1;
+		free(str);
+	}
+	ff->ph->env.cmdline = cmdline;
+	ff->ph->env.cmdline_argv = (const char **) argv;
+	return 0;
+
+error:
+	free(argv);
+	free(cmdline);
+	return -1;
+}
+
+static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+	u32 nr, i;
+	char *str;
+	struct strbuf sb;
+	int cpu_nr = ff->ph->env.nr_cpus_avail;
+	u64 size = 0;
+	struct perf_header *ph = ff->ph;
+
+	ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
+	if (!ph->env.cpu)
+		return -1;
+
+	if (do_read_u32(ff, &nr))
+		goto free_cpu;
+
+	ph->env.nr_sibling_cores = nr;
+	size += sizeof(u32);
+	if (strbuf_init(&sb, 128) < 0)
+		goto free_cpu;
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
+		size += string_size(str);
+		free(str);
+	}
+	ph->env.sibling_cores = strbuf_detach(&sb, NULL);
+
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	ph->env.nr_sibling_threads = nr;
+	size += sizeof(u32);
+
+	for (i = 0; i < nr; i++) {
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
+		size += string_size(str);
+		free(str);
+	}
+	ph->env.sibling_threads = strbuf_detach(&sb, NULL);
+
+	/*
+	 * The header may be from old perf,
+	 * which doesn't include core id and socket id information.
+	 */
+	if (ff->size <= size) {
+		zfree(&ph->env.cpu);
+		return 0;
+	}
+
+	for (i = 0; i < (u32)cpu_nr; i++) {
+		if (do_read_u32(ff, &nr))
+			goto free_cpu;
+
+		ph->env.cpu[i].core_id = nr;
+
+		if (do_read_u32(ff, &nr))
+			goto free_cpu;
+
+		if (nr != (u32)-1 && nr > (u32)cpu_nr) {
+			pr_debug("socket_id number is too big."
+				 "You may need to upgrade the perf tool.\n");
+			goto free_cpu;
+		}
+
+		ph->env.cpu[i].socket_id = nr;
+	}
+
+	return 0;
+
+error:
+	strbuf_release(&sb);
+free_cpu:
+	zfree(&ph->env.cpu);
+	return -1;
+}
+
+static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct numa_node *nodes, *n;
+	u32 nr, i;
+	char *str;
+
+	/* nr nodes */
+	if (do_read_u32(ff, &nr))
+		return -1;
+
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -ENOMEM;
+
+	for (i = 0; i < nr; i++) {
+		n = &nodes[i];
+
+		/* node number */
+		if (do_read_u32(ff, &n->node))
+			goto error;
+
+		if (do_read_u64(ff, &n->mem_total))
+			goto error;
+
+		if (do_read_u64(ff, &n->mem_free))
+			goto error;
+
+		str = do_read_string(ff);
+		if (!str)
+			goto error;
+
+		n->map = cpu_map__new(str);
+		if (!n->map)
+			goto error;
+
+		free(str);
+	}
+	ff->ph->env.nr_numa_nodes = nr;
+	ff->ph->env.numa_nodes = nodes;
+	return 0;
+
+error:
+	free(nodes);
+	return -1;
+}
+
+static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
+{
+	char *name;
+	u32 pmu_num;
+	u32 type;
+	struct strbuf sb;
+
+	if (do_read_u32(ff, &pmu_num))
+		return -1;
+
+	if (!pmu_num) {
+		pr_debug("pmu mappings not available\n");
+		return 0;
+	}
+
+	ff->ph->env.nr_pmu_mappings = pmu_num;
+	if (strbuf_init(&sb, 128) < 0)
+		return -1;
+
+	while (pmu_num) {
+		if (do_read_u32(ff, &type))
+			goto error;
+
+		name = do_read_string(ff);
+		if (!name)
+			goto error;
+
+		if (strbuf_addf(&sb, "%u:%s", type, name) < 0)
+			goto error;
+		/* include a NULL character at the end */
+		if (strbuf_add(&sb, "", 1) < 0)
+			goto error;
+
+		if (!strcmp(name, "msr"))
+			ff->ph->env.msr_pmu_type = type;
+
+		free(name);
+		pmu_num--;
+	}
+	ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
+	return 0;
+
+error:
+	strbuf_release(&sb);
+	return -1;
+}
+
+static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+	size_t ret = -1;
+	u32 i, nr, nr_groups;
+	struct perf_session *session;
+	struct perf_evsel *evsel, *leader = NULL;
+	struct group_desc {
+		char *name;
+		u32 leader_idx;
+		u32 nr_members;
+	} *desc;
+
+	if (do_read_u32(ff, &nr_groups))
+		return -1;
+
+	ff->ph->env.nr_groups = nr_groups;
+	if (!nr_groups) {
+		pr_debug("group desc not available\n");
+		return 0;
+	}
+
+	desc = calloc(nr_groups, sizeof(*desc));
+	if (!desc)
+		return -1;
+
+	for (i = 0; i < nr_groups; i++) {
+		desc[i].name = do_read_string(ff);
+		if (!desc[i].name)
+			goto out_free;
+
+		if (do_read_u32(ff, &desc[i].leader_idx))
+			goto out_free;
+
+		if (do_read_u32(ff, &desc[i].nr_members))
+			goto out_free;
+	}
+
+	/*
+	 * Rebuild group relationship based on the group_desc
+	 */
+	session = container_of(ff->ph, struct perf_session, header);
+	session->evlist->nr_groups = nr_groups;
+
+	i = nr = 0;
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->idx == (int) desc[i].leader_idx) {
+			evsel->leader = evsel;
+			/* {anon_group} is a dummy name */
+			if (strcmp(desc[i].name, "{anon_group}")) {
+				evsel->group_name = desc[i].name;
+				desc[i].name = NULL;
+			}
+			evsel->nr_members = desc[i].nr_members;
+
+			if (i >= nr_groups || nr > 0) {
+				pr_debug("invalid group desc\n");
+				goto out_free;
+			}
+
+			leader = evsel;
+			nr = evsel->nr_members - 1;
+			i++;
+		} else if (nr) {
+			/* This is a group member */
+			evsel->leader = leader;
+
+			nr--;
+		}
+	}
+
+	if (i != nr_groups || nr != 0) {
+		pr_debug("invalid group desc\n");
+		goto out_free;
+	}
+
+	ret = 0;
+out_free:
+	for (i = 0; i < nr_groups; i++)
+		zfree(&desc[i].name);
+	free(desc);
+
+	return ret;
+}
+
+static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	err = auxtrace_index__process(ff->fd, ff->size, session,
+				      ff->ph->needs_swap);
+	if (err < 0)
+		pr_err("Failed to process auxtrace index\n");
+	return err;
+}
+
+static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct cpu_cache_level *caches;
+	u32 cnt, i, version;
+
+	if (do_read_u32(ff, &version))
+		return -1;
+
+	if (version != 1)
+		return -1;
+
+	if (do_read_u32(ff, &cnt))
+		return -1;
+
+	caches = zalloc(sizeof(*caches) * cnt);
+	if (!caches)
+		return -1;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level c;
+
+		#define _R(v)						\
+			if (do_read_u32(ff, &c.v))\
+				goto out_free_caches;			\
+
+		_R(level)
+		_R(line_size)
+		_R(sets)
+		_R(ways)
+		#undef _R
+
+		#define _R(v)					\
+			c.v = do_read_string(ff);		\
+			if (!c.v)				\
+				goto out_free_caches;
+
+		_R(type)
+		_R(size)
+		_R(map)
+		#undef _R
+
+		caches[i] = c;
+	}
+
+	ff->ph->env.caches = caches;
+	ff->ph->env.caches_cnt = cnt;
+	return 0;
+out_free_caches:
+	free(caches);
+	return -1;
+}
+
+static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
+{
+	struct perf_session *session;
+	u64 first_sample_time, last_sample_time;
+	int ret;
+
+	session = container_of(ff->ph, struct perf_session, header);
+
+	ret = do_read_u64(ff, &first_sample_time);
+	if (ret)
+		return -1;
+
+	ret = do_read_u64(ff, &last_sample_time);
+	if (ret)
+		return -1;
+
+	session->evlist->first_sample_time = first_sample_time;
+	session->evlist->last_sample_time = last_sample_time;
+	return 0;
+}
+
+static int process_mem_topology(struct feat_fd *ff,
+				void *data __maybe_unused)
+{
+	struct memory_node *nodes;
+	u64 version, i, nr, bsize;
+	int ret = -1;
+
+	if (do_read_u64(ff, &version))
+		return -1;
+
+	if (version != 1)
+		return -1;
+
+	if (do_read_u64(ff, &bsize))
+		return -1;
+
+	if (do_read_u64(ff, &nr))
+		return -1;
+
+	nodes = zalloc(sizeof(*nodes) * nr);
+	if (!nodes)
+		return -1;
+
+	for (i = 0; i < nr; i++) {
+		struct memory_node n;
+
+		#define _R(v)				\
+			if (do_read_u64(ff, &n.v))	\
+				goto out;		\
+
+		_R(node)
+		_R(size)
+
+		#undef _R
+
+		if (do_read_bitmap(ff, &n.set, &n.size))
+			goto out;
+
+		nodes[i] = n;
+	}
+
+	ff->ph->env.memory_bsize    = bsize;
+	ff->ph->env.memory_nodes    = nodes;
+	ff->ph->env.nr_memory_nodes = nr;
+	ret = 0;
+
+out:
+	if (ret)
+		free(nodes);
+	return ret;
+}
+
+struct feature_ops {
+	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
+	void (*print)(struct feat_fd *ff, FILE *fp);
+	int (*process)(struct feat_fd *ff, void *data);
+	const char *name;
+	bool full_only;
+	bool synthesize;
+};
+
+#define FEAT_OPR(n, func, __full_only) \
+	[HEADER_##n] = {					\
+		.name	    = __stringify(n),			\
+		.write	    = write_##func,			\
+		.print	    = print_##func,			\
+		.full_only  = __full_only,			\
+		.process    = process_##func,			\
+		.synthesize = true				\
+	}
+
+#define FEAT_OPN(n, func, __full_only) \
+	[HEADER_##n] = {					\
+		.name	    = __stringify(n),			\
+		.write	    = write_##func,			\
+		.print	    = print_##func,			\
+		.full_only  = __full_only,			\
+		.process    = process_##func			\
+	}
+
+/* feature_ops not implemented: */
+#define print_tracing_data	NULL
+#define print_build_id		NULL
+
+#define process_branch_stack	NULL
+#define process_stat		NULL
+
+
+static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+	FEAT_OPN(TRACING_DATA,	tracing_data,	false),
+	FEAT_OPN(BUILD_ID,	build_id,	false),
+	FEAT_OPR(HOSTNAME,	hostname,	false),
+	FEAT_OPR(OSRELEASE,	osrelease,	false),
+	FEAT_OPR(VERSION,	version,	false),
+	FEAT_OPR(ARCH,		arch,		false),
+	FEAT_OPR(NRCPUS,	nrcpus,		false),
+	FEAT_OPR(CPUDESC,	cpudesc,	false),
+	FEAT_OPR(CPUID,		cpuid,		false),
+	FEAT_OPR(TOTAL_MEM,	total_mem,	false),
+	FEAT_OPR(EVENT_DESC,	event_desc,	false),
+	FEAT_OPR(CMDLINE,	cmdline,	false),
+	FEAT_OPR(CPU_TOPOLOGY,	cpu_topology,	true),
+	FEAT_OPR(NUMA_TOPOLOGY,	numa_topology,	true),
+	FEAT_OPN(BRANCH_STACK,	branch_stack,	false),
+	FEAT_OPR(PMU_MAPPINGS,	pmu_mappings,	false),
+	FEAT_OPN(GROUP_DESC,	group_desc,	false),
+	FEAT_OPN(AUXTRACE,	auxtrace,	false),
+	FEAT_OPN(STAT,		stat,		false),
+	FEAT_OPN(CACHE,		cache,		true),
+	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
+	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
+};
+
+struct header_print_data {
+	FILE *fp;
+	bool full; /* extended list of headers */
+};
+
+static int perf_file_section__fprintf_info(struct perf_file_section *section,
+					   struct perf_header *ph,
+					   int feat, int fd, void *data)
+{
+	struct header_print_data *hd = data;
+	struct feat_fd ff;
+
+	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+				"%d, continuing...\n", section->offset, feat);
+		return 0;
+	}
+	if (feat >= HEADER_LAST_FEATURE) {
+		pr_warning("unknown feature %d\n", feat);
+		return 0;
+	}
+	if (!feat_ops[feat].print)
+		return 0;
+
+	ff = (struct  feat_fd) {
+		.fd = fd,
+		.ph = ph,
+	};
+
+	if (!feat_ops[feat].full_only || hd->full)
+		feat_ops[feat].print(&ff, hd->fp);
+	else
+		fprintf(hd->fp, "# %s info available, use -I to display\n",
+			feat_ops[feat].name);
+
+	return 0;
+}
+
+int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
+{
+	struct header_print_data hd;
+	struct perf_header *header = &session->header;
+	int fd = perf_data__fd(session->data);
+	struct stat st;
+	int ret, bit;
+
+	hd.fp = fp;
+	hd.full = full;
+
+	ret = fstat(fd, &st);
+	if (ret == -1)
+		return -1;
+
+	fprintf(fp, "# captured on    : %s", ctime(&st.st_ctime));
+
+	fprintf(fp, "# header version : %u\n", header->version);
+	fprintf(fp, "# data offset    : %" PRIu64 "\n", header->data_offset);
+	fprintf(fp, "# data size      : %" PRIu64 "\n", header->data_size);
+	fprintf(fp, "# feat offset    : %" PRIu64 "\n", header->feat_offset);
+
+	perf_header__process_sections(header, fd, &hd,
+				      perf_file_section__fprintf_info);
+
+	if (session->data->is_pipe)
+		return 0;
+
+	fprintf(fp, "# missing features: ");
+	for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) {
+		if (bit)
+			fprintf(fp, "%s ", feat_ops[bit].name);
+	}
+
+	fprintf(fp, "\n");
+	return 0;
+}
+
+static int do_write_feat(struct feat_fd *ff, int type,
+			 struct perf_file_section **p,
+			 struct perf_evlist *evlist)
+{
+	int err;
+	int ret = 0;
+
+	if (perf_header__has_feat(ff->ph, type)) {
+		if (!feat_ops[type].write)
+			return -1;
+
+		if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+			return -1;
+
+		(*p)->offset = lseek(ff->fd, 0, SEEK_CUR);
+
+		err = feat_ops[type].write(ff, evlist);
+		if (err < 0) {
+			pr_debug("failed to write feature %s\n", feat_ops[type].name);
+
+			/* undo anything written */
+			lseek(ff->fd, (*p)->offset, SEEK_SET);
+
+			return -1;
+		}
+		(*p)->size = lseek(ff->fd, 0, SEEK_CUR) - (*p)->offset;
+		(*p)++;
+	}
+	return ret;
+}
+
+static int perf_header__adds_write(struct perf_header *header,
+				   struct perf_evlist *evlist, int fd)
+{
+	int nr_sections;
+	struct feat_fd ff;
+	struct perf_file_section *feat_sec, *p;
+	int sec_size;
+	u64 sec_start;
+	int feat;
+	int err;
+
+	ff = (struct feat_fd){
+		.fd  = fd,
+		.ph = header,
+	};
+
+	nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+	if (!nr_sections)
+		return 0;
+
+	feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
+	if (feat_sec == NULL)
+		return -ENOMEM;
+
+	sec_size = sizeof(*feat_sec) * nr_sections;
+
+	sec_start = header->feat_offset;
+	lseek(fd, sec_start + sec_size, SEEK_SET);
+
+	for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+		if (do_write_feat(&ff, feat, &p, evlist))
+			perf_header__clear_feat(header, feat);
+	}
+
+	lseek(fd, sec_start, SEEK_SET);
+	/*
+	 * may write more than needed due to dropped feature, but
+	 * this is okay, reader will skip the mising entries
+	 */
+	err = do_write(&ff, feat_sec, sec_size);
+	if (err < 0)
+		pr_debug("failed to write feature section\n");
+	free(feat_sec);
+	return err;
+}
+
+int perf_header__write_pipe(int fd)
+{
+	struct perf_pipe_file_header f_header;
+	struct feat_fd ff;
+	int err;
+
+	ff = (struct feat_fd){ .fd = fd };
+
+	f_header = (struct perf_pipe_file_header){
+		.magic	   = PERF_MAGIC,
+		.size	   = sizeof(f_header),
+	};
+
+	err = do_write(&ff, &f_header, sizeof(f_header));
+	if (err < 0) {
+		pr_debug("failed to write perf pipe header\n");
+		return err;
+	}
+
+	return 0;
+}
+
+int perf_session__write_header(struct perf_session *session,
+			       struct perf_evlist *evlist,
+			       int fd, bool at_exit)
+{
+	struct perf_file_header f_header;
+	struct perf_file_attr   f_attr;
+	struct perf_header *header = &session->header;
+	struct perf_evsel *evsel;
+	struct feat_fd ff;
+	u64 attr_offset;
+	int err;
+
+	ff = (struct feat_fd){ .fd = fd};
+	lseek(fd, sizeof(f_header), SEEK_SET);
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		evsel->id_offset = lseek(fd, 0, SEEK_CUR);
+		err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64));
+		if (err < 0) {
+			pr_debug("failed to write perf header\n");
+			return err;
+		}
+	}
+
+	attr_offset = lseek(ff.fd, 0, SEEK_CUR);
+
+	evlist__for_each_entry(evlist, evsel) {
+		f_attr = (struct perf_file_attr){
+			.attr = evsel->attr,
+			.ids  = {
+				.offset = evsel->id_offset,
+				.size   = evsel->ids * sizeof(u64),
+			}
+		};
+		err = do_write(&ff, &f_attr, sizeof(f_attr));
+		if (err < 0) {
+			pr_debug("failed to write perf header attribute\n");
+			return err;
+		}
+	}
+
+	if (!header->data_offset)
+		header->data_offset = lseek(fd, 0, SEEK_CUR);
+	header->feat_offset = header->data_offset + header->data_size;
+
+	if (at_exit) {
+		err = perf_header__adds_write(header, evlist, fd);
+		if (err < 0)
+			return err;
+	}
+
+	f_header = (struct perf_file_header){
+		.magic	   = PERF_MAGIC,
+		.size	   = sizeof(f_header),
+		.attr_size = sizeof(f_attr),
+		.attrs = {
+			.offset = attr_offset,
+			.size   = evlist->nr_entries * sizeof(f_attr),
+		},
+		.data = {
+			.offset = header->data_offset,
+			.size	= header->data_size,
+		},
+		/* event_types is ignored, store zeros */
+	};
+
+	memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
+
+	lseek(fd, 0, SEEK_SET);
+	err = do_write(&ff, &f_header, sizeof(f_header));
+	if (err < 0) {
+		pr_debug("failed to write perf header\n");
+		return err;
+	}
+	lseek(fd, header->data_offset + header->data_size, SEEK_SET);
+
+	return 0;
+}
+
+static int perf_header__getbuffer64(struct perf_header *header,
+				    int fd, void *buf, size_t size)
+{
+	if (readn(fd, buf, size) <= 0)
+		return -1;
+
+	if (header->needs_swap)
+		mem_bswap_64(buf, size);
+
+	return 0;
+}
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+				  void *data,
+				  int (*process)(struct perf_file_section *section,
+						 struct perf_header *ph,
+						 int feat, int fd, void *data))
+{
+	struct perf_file_section *feat_sec, *sec;
+	int nr_sections;
+	int sec_size;
+	int feat;
+	int err;
+
+	nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+	if (!nr_sections)
+		return 0;
+
+	feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
+	if (!feat_sec)
+		return -1;
+
+	sec_size = sizeof(*feat_sec) * nr_sections;
+
+	lseek(fd, header->feat_offset, SEEK_SET);
+
+	err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
+	if (err < 0)
+		goto out_free;
+
+	for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
+		err = process(sec++, header, feat, fd, data);
+		if (err < 0)
+			goto out_free;
+	}
+	err = 0;
+out_free:
+	free(feat_sec);
+	return err;
+}
+
+static const int attr_file_abi_sizes[] = {
+	[0] = PERF_ATTR_SIZE_VER0,
+	[1] = PERF_ATTR_SIZE_VER1,
+	[2] = PERF_ATTR_SIZE_VER2,
+	[3] = PERF_ATTR_SIZE_VER3,
+	[4] = PERF_ATTR_SIZE_VER4,
+	0,
+};
+
+/*
+ * In the legacy file format, the magic number is not used to encode endianness.
+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
+ * on ABI revisions, we need to try all combinations for all endianness to
+ * detect the endianness.
+ */
+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+	uint64_t ref_size, attr_size;
+	int i;
+
+	for (i = 0 ; attr_file_abi_sizes[i]; i++) {
+		ref_size = attr_file_abi_sizes[i]
+			 + sizeof(struct perf_file_section);
+		if (hdr_sz != ref_size) {
+			attr_size = bswap_64(hdr_sz);
+			if (attr_size != ref_size)
+				continue;
+
+			ph->needs_swap = true;
+		}
+		pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
+			 i,
+			 ph->needs_swap);
+		return 0;
+	}
+	/* could not determine endianness */
+	return -1;
+}
+
+#define PERF_PIPE_HDR_VER0	16
+
+static const size_t attr_pipe_abi_sizes[] = {
+	[0] = PERF_PIPE_HDR_VER0,
+	0,
+};
+
+/*
+ * In the legacy pipe format, there is an implicit assumption that endiannesss
+ * between host recording the samples, and host parsing the samples is the
+ * same. This is not always the case given that the pipe output may always be
+ * redirected into a file and analyzed on a different machine with possibly a
+ * different endianness and perf_event ABI revsions in the perf tool itself.
+ */
+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+	u64 attr_size;
+	int i;
+
+	for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
+		if (hdr_sz != attr_pipe_abi_sizes[i]) {
+			attr_size = bswap_64(hdr_sz);
+			if (attr_size != hdr_sz)
+				continue;
+
+			ph->needs_swap = true;
+		}
+		pr_debug("Pipe ABI%d perf.data file detected\n", i);
+		return 0;
+	}
+	return -1;
+}
+
+bool is_perf_magic(u64 magic)
+{
+	if (!memcmp(&magic, __perf_magic1, sizeof(magic))
+		|| magic == __perf_magic2
+		|| magic == __perf_magic2_sw)
+		return true;
+
+	return false;
+}
+
+static int check_magic_endian(u64 magic, uint64_t hdr_sz,
+			      bool is_pipe, struct perf_header *ph)
+{
+	int ret;
+
+	/* check for legacy format */
+	ret = memcmp(&magic, __perf_magic1, sizeof(magic));
+	if (ret == 0) {
+		ph->version = PERF_HEADER_VERSION_1;
+		pr_debug("legacy perf.data format\n");
+		if (is_pipe)
+			return try_all_pipe_abis(hdr_sz, ph);
+
+		return try_all_file_abis(hdr_sz, ph);
+	}
+	/*
+	 * the new magic number serves two purposes:
+	 * - unique number to identify actual perf.data files
+	 * - encode endianness of file
+	 */
+	ph->version = PERF_HEADER_VERSION_2;
+
+	/* check magic number with one endianness */
+	if (magic == __perf_magic2)
+		return 0;
+
+	/* check magic number with opposite endianness */
+	if (magic != __perf_magic2_sw)
+		return -1;
+
+	ph->needs_swap = true;
+
+	return 0;
+}
+
+int perf_file_header__read(struct perf_file_header *header,
+			   struct perf_header *ph, int fd)
+{
+	ssize_t ret;
+
+	lseek(fd, 0, SEEK_SET);
+
+	ret = readn(fd, header, sizeof(*header));
+	if (ret <= 0)
+		return -1;
+
+	if (check_magic_endian(header->magic,
+			       header->attr_size, false, ph) < 0) {
+		pr_debug("magic/endian check failed\n");
+		return -1;
+	}
+
+	if (ph->needs_swap) {
+		mem_bswap_64(header, offsetof(struct perf_file_header,
+			     adds_features));
+	}
+
+	if (header->size != sizeof(*header)) {
+		/* Support the previous format */
+		if (header->size == offsetof(typeof(*header), adds_features))
+			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+		else
+			return -1;
+	} else if (ph->needs_swap) {
+		/*
+		 * feature bitmap is declared as an array of unsigned longs --
+		 * not good since its size can differ between the host that
+		 * generated the data file and the host analyzing the file.
+		 *
+		 * We need to handle endianness, but we don't know the size of
+		 * the unsigned long where the file was generated. Take a best
+		 * guess at determining it: try 64-bit swap first (ie., file
+		 * created on a 64-bit host), and check if the hostname feature
+		 * bit is set (this feature bit is forced on as of fbe96f2).
+		 * If the bit is not, undo the 64-bit swap and try a 32-bit
+		 * swap. If the hostname bit is still not set (e.g., older data
+		 * file), punt and fallback to the original behavior --
+		 * clearing all feature bits and setting buildid.
+		 */
+		mem_bswap_64(&header->adds_features,
+			    BITS_TO_U64(HEADER_FEAT_BITS));
+
+		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+			/* unswap as u64 */
+			mem_bswap_64(&header->adds_features,
+				    BITS_TO_U64(HEADER_FEAT_BITS));
+
+			/* unswap as u32 */
+			mem_bswap_32(&header->adds_features,
+				    BITS_TO_U32(HEADER_FEAT_BITS));
+		}
+
+		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+			set_bit(HEADER_BUILD_ID, header->adds_features);
+		}
+	}
+
+	memcpy(&ph->adds_features, &header->adds_features,
+	       sizeof(ph->adds_features));
+
+	ph->data_offset  = header->data.offset;
+	ph->data_size	 = header->data.size;
+	ph->feat_offset  = header->data.offset + header->data.size;
+	return 0;
+}
+
+static int perf_file_section__process(struct perf_file_section *section,
+				      struct perf_header *ph,
+				      int feat, int fd, void *data)
+{
+	struct feat_fd fdd = {
+		.fd	= fd,
+		.ph	= ph,
+		.size	= section->size,
+		.offset	= section->offset,
+	};
+
+	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+			  "%d, continuing...\n", section->offset, feat);
+		return 0;
+	}
+
+	if (feat >= HEADER_LAST_FEATURE) {
+		pr_debug("unknown feature %d, continuing...\n", feat);
+		return 0;
+	}
+
+	if (!feat_ops[feat].process)
+		return 0;
+
+	return feat_ops[feat].process(&fdd, data);
+}
+
+static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
+				       struct perf_header *ph, int fd,
+				       bool repipe)
+{
+	struct feat_fd ff = {
+		.fd = STDOUT_FILENO,
+		.ph = ph,
+	};
+	ssize_t ret;
+
+	ret = readn(fd, header, sizeof(*header));
+	if (ret <= 0)
+		return -1;
+
+	if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
+		pr_debug("endian/magic failed\n");
+		return -1;
+	}
+
+	if (ph->needs_swap)
+		header->size = bswap_64(header->size);
+
+	if (repipe && do_write(&ff, header, sizeof(*header)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int perf_header__read_pipe(struct perf_session *session)
+{
+	struct perf_header *header = &session->header;
+	struct perf_pipe_file_header f_header;
+
+	if (perf_file_header__read_pipe(&f_header, header,
+					perf_data__fd(session->data),
+					session->repipe) < 0) {
+		pr_debug("incompatible file format\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int read_attr(int fd, struct perf_header *ph,
+		     struct perf_file_attr *f_attr)
+{
+	struct perf_event_attr *attr = &f_attr->attr;
+	size_t sz, left;
+	size_t our_sz = sizeof(f_attr->attr);
+	ssize_t ret;
+
+	memset(f_attr, 0, sizeof(*f_attr));
+
+	/* read minimal guaranteed structure */
+	ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
+	if (ret <= 0) {
+		pr_debug("cannot read %d bytes of header attr\n",
+			 PERF_ATTR_SIZE_VER0);
+		return -1;
+	}
+
+	/* on file perf_event_attr size */
+	sz = attr->size;
+
+	if (ph->needs_swap)
+		sz = bswap_32(sz);
+
+	if (sz == 0) {
+		/* assume ABI0 */
+		sz =  PERF_ATTR_SIZE_VER0;
+	} else if (sz > our_sz) {
+		pr_debug("file uses a more recent and unsupported ABI"
+			 " (%zu bytes extra)\n", sz - our_sz);
+		return -1;
+	}
+	/* what we have not yet read and that we know about */
+	left = sz - PERF_ATTR_SIZE_VER0;
+	if (left) {
+		void *ptr = attr;
+		ptr += PERF_ATTR_SIZE_VER0;
+
+		ret = readn(fd, ptr, left);
+	}
+	/* read perf_file_section, ids are read in caller */
+	ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
+
+	return ret <= 0 ? -1 : 0;
+}
+
+static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
+						struct pevent *pevent)
+{
+	struct event_format *event;
+	char bf[128];
+
+	/* already prepared */
+	if (evsel->tp_format)
+		return 0;
+
+	if (pevent == NULL) {
+		pr_debug("broken or missing trace data\n");
+		return -1;
+	}
+
+	event = pevent_find_event(pevent, evsel->attr.config);
+	if (event == NULL) {
+		pr_debug("cannot find event format for %d\n", (int)evsel->attr.config);
+		return -1;
+	}
+
+	if (!evsel->name) {
+		snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
+		evsel->name = strdup(bf);
+		if (evsel->name == NULL)
+			return -1;
+	}
+
+	evsel->tp_format = event;
+	return 0;
+}
+
+static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
+						  struct pevent *pevent)
+{
+	struct perf_evsel *pos;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
+		    perf_evsel__prepare_tracepoint_event(pos, pevent))
+			return -1;
+	}
+
+	return 0;
+}
+
+int perf_session__read_header(struct perf_session *session)
+{
+	struct perf_data *data = session->data;
+	struct perf_header *header = &session->header;
+	struct perf_file_header	f_header;
+	struct perf_file_attr	f_attr;
+	u64			f_id;
+	int nr_attrs, nr_ids, i, j;
+	int fd = perf_data__fd(data);
+
+	session->evlist = perf_evlist__new();
+	if (session->evlist == NULL)
+		return -ENOMEM;
+
+	session->evlist->env = &header->env;
+	session->machines.host.env = &header->env;
+	if (perf_data__is_pipe(data))
+		return perf_header__read_pipe(session);
+
+	if (perf_file_header__read(&f_header, header, fd) < 0)
+		return -EINVAL;
+
+	/*
+	 * Sanity check that perf.data was written cleanly; data size is
+	 * initialized to 0 and updated only if the on_exit function is run.
+	 * If data size is still 0 then the file contains only partial
+	 * information.  Just warn user and process it as much as it can.
+	 */
+	if (f_header.data.size == 0) {
+		pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
+			   "Was the 'perf record' command properly terminated?\n",
+			   data->file.path);
+	}
+
+	nr_attrs = f_header.attrs.size / f_header.attr_size;
+	lseek(fd, f_header.attrs.offset, SEEK_SET);
+
+	for (i = 0; i < nr_attrs; i++) {
+		struct perf_evsel *evsel;
+		off_t tmp;
+
+		if (read_attr(fd, header, &f_attr) < 0)
+			goto out_errno;
+
+		if (header->needs_swap) {
+			f_attr.ids.size   = bswap_64(f_attr.ids.size);
+			f_attr.ids.offset = bswap_64(f_attr.ids.offset);
+			perf_event__attr_swap(&f_attr.attr);
+		}
+
+		tmp = lseek(fd, 0, SEEK_CUR);
+		evsel = perf_evsel__new(&f_attr.attr);
+
+		if (evsel == NULL)
+			goto out_delete_evlist;
+
+		evsel->needs_swap = header->needs_swap;
+		/*
+		 * Do it before so that if perf_evsel__alloc_id fails, this
+		 * entry gets purged too at perf_evlist__delete().
+		 */
+		perf_evlist__add(session->evlist, evsel);
+
+		nr_ids = f_attr.ids.size / sizeof(u64);
+		/*
+		 * We don't have the cpu and thread maps on the header, so
+		 * for allocating the perf_sample_id table we fake 1 cpu and
+		 * hattr->ids threads.
+		 */
+		if (perf_evsel__alloc_id(evsel, 1, nr_ids))
+			goto out_delete_evlist;
+
+		lseek(fd, f_attr.ids.offset, SEEK_SET);
+
+		for (j = 0; j < nr_ids; j++) {
+			if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
+				goto out_errno;
+
+			perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
+		}
+
+		lseek(fd, tmp, SEEK_SET);
+	}
+
+	symbol_conf.nr_events = nr_attrs;
+
+	perf_header__process_sections(header, fd, &session->tevent,
+				      perf_file_section__process);
+
+	if (perf_evlist__prepare_tracepoint_events(session->evlist,
+						   session->tevent.pevent))
+		goto out_delete_evlist;
+
+	return 0;
+out_errno:
+	return -errno;
+
+out_delete_evlist:
+	perf_evlist__delete(session->evlist);
+	session->evlist = NULL;
+	return -ENOMEM;
+}
+
+int perf_event__synthesize_attr(struct perf_tool *tool,
+				struct perf_event_attr *attr, u32 ids, u64 *id,
+				perf_event__handler_t process)
+{
+	union perf_event *ev;
+	size_t size;
+	int err;
+
+	size = sizeof(struct perf_event_attr);
+	size = PERF_ALIGN(size, sizeof(u64));
+	size += sizeof(struct perf_event_header);
+	size += ids * sizeof(u64);
+
+	ev = malloc(size);
+
+	if (ev == NULL)
+		return -ENOMEM;
+
+	ev->attr.attr = *attr;
+	memcpy(ev->attr.id, id, ids * sizeof(u64));
+
+	ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
+	ev->attr.header.size = (u16)size;
+
+	if (ev->attr.header.size == size)
+		err = process(tool, ev, NULL, NULL);
+	else
+		err = -E2BIG;
+
+	free(ev);
+
+	return err;
+}
+
+int perf_event__synthesize_features(struct perf_tool *tool,
+				    struct perf_session *session,
+				    struct perf_evlist *evlist,
+				    perf_event__handler_t process)
+{
+	struct perf_header *header = &session->header;
+	struct feat_fd ff;
+	struct feature_event *fe;
+	size_t sz, sz_hdr;
+	int feat, ret;
+
+	sz_hdr = sizeof(fe->header);
+	sz = sizeof(union perf_event);
+	/* get a nice alignment */
+	sz = PERF_ALIGN(sz, page_size);
+
+	memset(&ff, 0, sizeof(ff));
+
+	ff.buf = malloc(sz);
+	if (!ff.buf)
+		return -ENOMEM;
+
+	ff.size = sz - sz_hdr;
+
+	for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+		if (!feat_ops[feat].synthesize) {
+			pr_debug("No record header feature for header :%d\n", feat);
+			continue;
+		}
+
+		ff.offset = sizeof(*fe);
+
+		ret = feat_ops[feat].write(&ff, evlist);
+		if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
+			pr_debug("Error writing feature\n");
+			continue;
+		}
+		/* ff.buf may have changed due to realloc in do_write() */
+		fe = ff.buf;
+		memset(fe, 0, sizeof(*fe));
+
+		fe->feat_id = feat;
+		fe->header.type = PERF_RECORD_HEADER_FEATURE;
+		fe->header.size = ff.offset;
+
+		ret = process(tool, ff.buf, NULL, NULL);
+		if (ret) {
+			free(ff.buf);
+			return ret;
+		}
+	}
+
+	/* Send HEADER_LAST_FEATURE mark. */
+	fe = ff.buf;
+	fe->feat_id     = HEADER_LAST_FEATURE;
+	fe->header.type = PERF_RECORD_HEADER_FEATURE;
+	fe->header.size = sizeof(*fe);
+
+	ret = process(tool, ff.buf, NULL, NULL);
+
+	free(ff.buf);
+	return ret;
+}
+
+int perf_event__process_feature(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_session *session __maybe_unused)
+{
+	struct feat_fd ff = { .fd = 0 };
+	struct feature_event *fe = (struct feature_event *)event;
+	int type = fe->header.type;
+	u64 feat = fe->feat_id;
+
+	if (type < 0 || type >= PERF_RECORD_HEADER_MAX) {
+		pr_warning("invalid record type %d in pipe-mode\n", type);
+		return 0;
+	}
+	if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) {
+		pr_warning("invalid record type %d in pipe-mode\n", type);
+		return -1;
+	}
+
+	if (!feat_ops[feat].process)
+		return 0;
+
+	ff.buf  = (void *)fe->data;
+	ff.size = event->header.size - sizeof(event->header);
+	ff.ph = &session->header;
+
+	if (feat_ops[feat].process(&ff, NULL))
+		return -1;
+
+	if (!feat_ops[feat].print || !tool->show_feat_hdr)
+		return 0;
+
+	if (!feat_ops[feat].full_only ||
+	    tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
+		feat_ops[feat].print(&ff, stdout);
+	} else {
+		fprintf(stdout, "# %s info available, use -I to display\n",
+			feat_ops[feat].name);
+	}
+
+	return 0;
+}
+
+static struct event_update_event *
+event_update_event__new(size_t size, u64 type, u64 id)
+{
+	struct event_update_event *ev;
+
+	size += sizeof(*ev);
+	size  = PERF_ALIGN(size, sizeof(u64));
+
+	ev = zalloc(size);
+	if (ev) {
+		ev->header.type = PERF_RECORD_EVENT_UPDATE;
+		ev->header.size = (u16)size;
+		ev->type = type;
+		ev->id = id;
+	}
+	return ev;
+}
+
+int
+perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+					 struct perf_evsel *evsel,
+					 perf_event__handler_t process)
+{
+	struct event_update_event *ev;
+	size_t size = strlen(evsel->unit);
+	int err;
+
+	ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	strncpy(ev->data, evsel->unit, size);
+	err = process(tool, (union perf_event *)ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int
+perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+					  struct perf_evsel *evsel,
+					  perf_event__handler_t process)
+{
+	struct event_update_event *ev;
+	struct event_update_event_scale *ev_data;
+	int err;
+
+	ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	ev_data = (struct event_update_event_scale *) ev->data;
+	ev_data->scale = evsel->scale;
+	err = process(tool, (union perf_event*) ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int
+perf_event__synthesize_event_update_name(struct perf_tool *tool,
+					 struct perf_evsel *evsel,
+					 perf_event__handler_t process)
+{
+	struct event_update_event *ev;
+	size_t len = strlen(evsel->name);
+	int err;
+
+	ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
+	if (ev == NULL)
+		return -ENOMEM;
+
+	strncpy(ev->data, evsel->name, len);
+	err = process(tool, (union perf_event*) ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+int
+perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+					struct perf_evsel *evsel,
+					perf_event__handler_t process)
+{
+	size_t size = sizeof(struct event_update_event);
+	struct event_update_event *ev;
+	int max, err;
+	u16 type;
+
+	if (!evsel->own_cpus)
+		return 0;
+
+	ev = cpu_map_data__alloc(evsel->own_cpus, &size, &type, &max);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->header.type = PERF_RECORD_EVENT_UPDATE;
+	ev->header.size = (u16)size;
+	ev->type = PERF_EVENT_UPDATE__CPUS;
+	ev->id   = evsel->id[0];
+
+	cpu_map_data__synthesize((struct cpu_map_data *) ev->data,
+				 evsel->own_cpus,
+				 type, max);
+
+	err = process(tool, (union perf_event*) ev, NULL, NULL);
+	free(ev);
+	return err;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+	struct event_update_event *ev = &event->event_update;
+	struct event_update_event_scale *ev_scale;
+	struct event_update_event_cpus *ev_cpus;
+	struct cpu_map *map;
+	size_t ret;
+
+	ret = fprintf(fp, "\n... id:    %" PRIu64 "\n", ev->id);
+
+	switch (ev->type) {
+	case PERF_EVENT_UPDATE__SCALE:
+		ev_scale = (struct event_update_event_scale *) ev->data;
+		ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+		break;
+	case PERF_EVENT_UPDATE__UNIT:
+		ret += fprintf(fp, "... unit:  %s\n", ev->data);
+		break;
+	case PERF_EVENT_UPDATE__NAME:
+		ret += fprintf(fp, "... name:  %s\n", ev->data);
+		break;
+	case PERF_EVENT_UPDATE__CPUS:
+		ev_cpus = (struct event_update_event_cpus *) ev->data;
+		ret += fprintf(fp, "... ");
+
+		map = cpu_map__new_data(&ev_cpus->cpus);
+		if (map)
+			ret += cpu_map__fprintf(map, fp);
+		else
+			ret += fprintf(fp, "failed to get cpus\n");
+		break;
+	default:
+		ret += fprintf(fp, "... unknown type\n");
+		break;
+	}
+
+	return ret;
+}
+
+int perf_event__synthesize_attrs(struct perf_tool *tool,
+				   struct perf_session *session,
+				   perf_event__handler_t process)
+{
+	struct perf_evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
+						  evsel->id, process);
+		if (err) {
+			pr_debug("failed to create perf header attribute\n");
+			return err;
+		}
+	}
+
+	return err;
+}
+
+static bool has_unit(struct perf_evsel *counter)
+{
+	return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+	return counter->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+				      struct perf_evlist *evsel_list,
+				      perf_event__handler_t process,
+				      bool is_pipe)
+{
+	struct perf_evsel *counter;
+	int err;
+
+	/*
+	 * Synthesize other events stuff not carried within
+	 * attr event - unit, scale, name
+	 */
+	evlist__for_each_entry(evsel_list, counter) {
+		if (!counter->supported)
+			continue;
+
+		/*
+		 * Synthesize unit and scale only if it's defined.
+		 */
+		if (has_unit(counter)) {
+			err = perf_event__synthesize_event_update_unit(tool, counter, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel unit.\n");
+				return err;
+			}
+		}
+
+		if (has_scale(counter)) {
+			err = perf_event__synthesize_event_update_scale(tool, counter, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel counter.\n");
+				return err;
+			}
+		}
+
+		if (counter->own_cpus) {
+			err = perf_event__synthesize_event_update_cpus(tool, counter, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel cpus.\n");
+				return err;
+			}
+		}
+
+		/*
+		 * Name is needed only for pipe output,
+		 * perf.data carries event names.
+		 */
+		if (is_pipe) {
+			err = perf_event__synthesize_event_update_name(tool, counter, process);
+			if (err < 0) {
+				pr_err("Couldn't synthesize evsel name.\n");
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
+			     struct perf_evlist **pevlist)
+{
+	u32 i, ids, n_ids;
+	struct perf_evsel *evsel;
+	struct perf_evlist *evlist = *pevlist;
+
+	if (evlist == NULL) {
+		*pevlist = evlist = perf_evlist__new();
+		if (evlist == NULL)
+			return -ENOMEM;
+	}
+
+	evsel = perf_evsel__new(&event->attr.attr);
+	if (evsel == NULL)
+		return -ENOMEM;
+
+	perf_evlist__add(evlist, evsel);
+
+	ids = event->header.size;
+	ids -= (void *)&event->attr.id - (void *)event;
+	n_ids = ids / sizeof(u64);
+	/*
+	 * We don't have the cpu and thread maps on the header, so
+	 * for allocating the perf_sample_id table we fake 1 cpu and
+	 * hattr->ids threads.
+	 */
+	if (perf_evsel__alloc_id(evsel, 1, n_ids))
+		return -ENOMEM;
+
+	for (i = 0; i < n_ids; i++) {
+		perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
+	}
+
+	symbol_conf.nr_events = evlist->nr_entries;
+
+	return 0;
+}
+
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_evlist **pevlist)
+{
+	struct event_update_event *ev = &event->event_update;
+	struct event_update_event_scale *ev_scale;
+	struct event_update_event_cpus *ev_cpus;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct cpu_map *map;
+
+	if (!pevlist || *pevlist == NULL)
+		return -EINVAL;
+
+	evlist = *pevlist;
+
+	evsel = perf_evlist__id2evsel(evlist, ev->id);
+	if (evsel == NULL)
+		return -EINVAL;
+
+	switch (ev->type) {
+	case PERF_EVENT_UPDATE__UNIT:
+		evsel->unit = strdup(ev->data);
+		break;
+	case PERF_EVENT_UPDATE__NAME:
+		evsel->name = strdup(ev->data);
+		break;
+	case PERF_EVENT_UPDATE__SCALE:
+		ev_scale = (struct event_update_event_scale *) ev->data;
+		evsel->scale = ev_scale->scale;
+		break;
+	case PERF_EVENT_UPDATE__CPUS:
+		ev_cpus = (struct event_update_event_cpus *) ev->data;
+
+		map = cpu_map__new_data(&ev_cpus->cpus);
+		if (map)
+			evsel->own_cpus = map;
+		else
+			pr_err("failed to get event_update cpus\n");
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
+					struct perf_evlist *evlist,
+					perf_event__handler_t process)
+{
+	union perf_event ev;
+	struct tracing_data *tdata;
+	ssize_t size = 0, aligned_size = 0, padding;
+	struct feat_fd ff;
+	int err __maybe_unused = 0;
+
+	/*
+	 * We are going to store the size of the data followed
+	 * by the data contents. Since the fd descriptor is a pipe,
+	 * we cannot seek back to store the size of the data once
+	 * we know it. Instead we:
+	 *
+	 * - write the tracing data to the temp file
+	 * - get/write the data size to pipe
+	 * - write the tracing data from the temp file
+	 *   to the pipe
+	 */
+	tdata = tracing_data_get(&evlist->entries, fd, true);
+	if (!tdata)
+		return -1;
+
+	memset(&ev, 0, sizeof(ev));
+
+	ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
+	size = tdata->size;
+	aligned_size = PERF_ALIGN(size, sizeof(u64));
+	padding = aligned_size - size;
+	ev.tracing_data.header.size = sizeof(ev.tracing_data);
+	ev.tracing_data.size = aligned_size;
+
+	process(tool, &ev, NULL, NULL);
+
+	/*
+	 * The put function will copy all the tracing data
+	 * stored in temp file to the pipe.
+	 */
+	tracing_data_put(tdata);
+
+	ff = (struct feat_fd){ .fd = fd };
+	if (write_padded(&ff, NULL, 0, padding))
+		return -1;
+
+	return aligned_size;
+}
+
+int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_session *session)
+{
+	ssize_t size_read, padding, size = event->tracing_data.size;
+	int fd = perf_data__fd(session->data);
+	off_t offset = lseek(fd, 0, SEEK_CUR);
+	char buf[BUFSIZ];
+
+	/* setup for reading amidst mmap */
+	lseek(fd, offset + sizeof(struct tracing_data_event),
+	      SEEK_SET);
+
+	size_read = trace_report(fd, &session->tevent,
+				 session->repipe);
+	padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
+
+	if (readn(fd, buf, padding) < 0) {
+		pr_err("%s: reading input file", __func__);
+		return -1;
+	}
+	if (session->repipe) {
+		int retw = write(STDOUT_FILENO, buf, padding);
+		if (retw <= 0 || retw != padding) {
+			pr_err("%s: repiping tracing data padding", __func__);
+			return -1;
+		}
+	}
+
+	if (size_read + padding != size) {
+		pr_err("%s: tracing data size mismatch", __func__);
+		return -1;
+	}
+
+	perf_evlist__prepare_tracepoint_events(session->evlist,
+					       session->tevent.pevent);
+
+	return size_read + padding;
+}
+
+int perf_event__synthesize_build_id(struct perf_tool *tool,
+				    struct dso *pos, u16 misc,
+				    perf_event__handler_t process,
+				    struct machine *machine)
+{
+	union perf_event ev;
+	size_t len;
+	int err = 0;
+
+	if (!pos->hit)
+		return err;
+
+	memset(&ev, 0, sizeof(ev));
+
+	len = pos->long_name_len + 1;
+	len = PERF_ALIGN(len, NAME_ALIGN);
+	memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
+	ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
+	ev.build_id.header.misc = misc;
+	ev.build_id.pid = machine->pid;
+	ev.build_id.header.size = sizeof(ev.build_id) + len;
+	memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
+
+	err = process(tool, &ev, NULL, machine);
+
+	return err;
+}
+
+int perf_event__process_build_id(struct perf_tool *tool __maybe_unused,
+				 union perf_event *event,
+				 struct perf_session *session)
+{
+	__event_process_build_id(&event->build_id,
+				 event->build_id.filename,
+				 session);
+	return 0;
+}
diff --git a/util/header.h b/util/header.h
new file mode 100644
index 0000000..90d4577
--- /dev/null
+++ b/util/header.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HEADER_H
+#define __PERF_HEADER_H
+
+#include <linux/perf_event.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <linux/bitmap.h>
+#include <linux/types.h>
+#include "event.h"
+#include "env.h"
+#include "pmu.h"
+
+enum {
+	HEADER_RESERVED		= 0,	/* always cleared */
+	HEADER_FIRST_FEATURE	= 1,
+	HEADER_TRACING_DATA	= 1,
+	HEADER_BUILD_ID,
+
+	HEADER_HOSTNAME,
+	HEADER_OSRELEASE,
+	HEADER_VERSION,
+	HEADER_ARCH,
+	HEADER_NRCPUS,
+	HEADER_CPUDESC,
+	HEADER_CPUID,
+	HEADER_TOTAL_MEM,
+	HEADER_CMDLINE,
+	HEADER_EVENT_DESC,
+	HEADER_CPU_TOPOLOGY,
+	HEADER_NUMA_TOPOLOGY,
+	HEADER_BRANCH_STACK,
+	HEADER_PMU_MAPPINGS,
+	HEADER_GROUP_DESC,
+	HEADER_AUXTRACE,
+	HEADER_STAT,
+	HEADER_CACHE,
+	HEADER_SAMPLE_TIME,
+	HEADER_MEM_TOPOLOGY,
+	HEADER_LAST_FEATURE,
+	HEADER_FEAT_BITS	= 256,
+};
+
+enum perf_header_version {
+	PERF_HEADER_VERSION_1,
+	PERF_HEADER_VERSION_2,
+};
+
+struct perf_file_section {
+	u64 offset;
+	u64 size;
+};
+
+struct perf_file_header {
+	u64				magic;
+	u64				size;
+	u64				attr_size;
+	struct perf_file_section	attrs;
+	struct perf_file_section	data;
+	/* event_types is ignored */
+	struct perf_file_section	event_types;
+	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+};
+
+struct perf_pipe_file_header {
+	u64				magic;
+	u64				size;
+};
+
+struct perf_header;
+
+int perf_file_header__read(struct perf_file_header *header,
+			   struct perf_header *ph, int fd);
+
+struct perf_header {
+	enum perf_header_version	version;
+	bool				needs_swap;
+	u64				data_offset;
+	u64				data_size;
+	u64				feat_offset;
+	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+	struct perf_env 	env;
+};
+
+struct perf_evlist;
+struct perf_session;
+
+int perf_session__read_header(struct perf_session *session);
+int perf_session__write_header(struct perf_session *session,
+			       struct perf_evlist *evlist,
+			       int fd, bool at_exit);
+int perf_header__write_pipe(int fd);
+
+void perf_header__set_feat(struct perf_header *header, int feat);
+void perf_header__clear_feat(struct perf_header *header, int feat);
+bool perf_header__has_feat(const struct perf_header *header, int feat);
+
+int perf_header__set_cmdline(int argc, const char **argv);
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+				  void *data,
+				  int (*process)(struct perf_file_section *section,
+				  struct perf_header *ph,
+				  int feat, int fd, void *data));
+
+int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+int perf_event__synthesize_features(struct perf_tool *tool,
+				    struct perf_session *session,
+				    struct perf_evlist *evlist,
+				    perf_event__handler_t process);
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+				      struct perf_evlist *evsel_list,
+				      perf_event__handler_t process,
+				      bool is_pipe);
+
+int perf_event__process_feature(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_session *session);
+
+int perf_event__synthesize_attr(struct perf_tool *tool,
+				struct perf_event_attr *attr, u32 ids, u64 *id,
+				perf_event__handler_t process);
+int perf_event__synthesize_attrs(struct perf_tool *tool,
+				 struct perf_session *session,
+				 perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+					     struct perf_evsel *evsel,
+					     perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+					      struct perf_evsel *evsel,
+					      perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool,
+					     struct perf_evsel *evsel,
+					     perf_event__handler_t process);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+					     struct perf_evsel *evsel,
+					     perf_event__handler_t process);
+int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+			     struct perf_evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool,
+					int fd, struct perf_evlist *evlist,
+					perf_event__handler_t process);
+int perf_event__process_tracing_data(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_session *session);
+
+int perf_event__synthesize_build_id(struct perf_tool *tool,
+				    struct dso *pos, u16 misc,
+				    perf_event__handler_t process,
+				    struct machine *machine);
+int perf_event__process_build_id(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session);
+bool is_perf_magic(u64 magic);
+
+#define NAME_ALIGN 64
+
+struct feat_fd;
+
+int do_write(struct feat_fd *fd, const void *buf, size_t size);
+
+int write_padded(struct feat_fd *fd, const void *bf,
+		 size_t count, size_t count_aligned);
+
+/*
+ * arch specific callback
+ */
+int get_cpuid(char *buffer, size_t sz);
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
+int strcmp_cpuid_str(const char *s1, const char *s2);
+#endif /* __PERF_HEADER_H */
diff --git a/util/help-unknown-cmd.c b/util/help-unknown-cmd.c
new file mode 100644
index 0000000..4f07a5b
--- /dev/null
+++ b/util/help-unknown-cmd.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cache.h"
+#include "config.h"
+#include <poll.h>
+#include <stdio.h>
+#include <subcmd/help.h>
+#include "../builtin.h"
+#include "levenshtein.h"
+
+static int autocorrect;
+
+static int perf_unknown_cmd_config(const char *var, const char *value,
+				   void *cb __maybe_unused)
+{
+	if (!strcmp(var, "help.autocorrect"))
+		return perf_config_int(&autocorrect, var,value);
+
+	return 0;
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+	const struct cmdname *const *c1 = p1, *const *c2 = p2;
+	const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+	int l1 = (*c1)->len;
+	int l2 = (*c2)->len;
+	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+	unsigned int i, nr = cmds->cnt + old->cnt;
+	void *tmp;
+
+	if (nr > cmds->alloc) {
+		/* Choose bigger one to alloc */
+		if (alloc_nr(cmds->alloc) < nr)
+			cmds->alloc = nr;
+		else
+			cmds->alloc = alloc_nr(cmds->alloc);
+		tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names));
+		if (!tmp)
+			return -1;
+		cmds->names = tmp;
+	}
+	for (i = 0; i < old->cnt; i++)
+		cmds->names[cmds->cnt++] = old->names[i];
+	zfree(&old->names);
+	old->cnt = 0;
+	return 0;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+	unsigned int i, n = 0, best_similarity = 0;
+	struct cmdnames main_cmds, other_cmds;
+
+	memset(&main_cmds, 0, sizeof(main_cmds));
+	memset(&other_cmds, 0, sizeof(main_cmds));
+
+	perf_config(perf_unknown_cmd_config, NULL);
+
+	load_command_list("perf-", &main_cmds, &other_cmds);
+
+	if (add_cmd_list(&main_cmds, &other_cmds) < 0) {
+		fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
+		goto end;
+	}
+	qsort(main_cmds.names, main_cmds.cnt,
+	      sizeof(main_cmds.names), cmdname_compare);
+	uniq(&main_cmds);
+
+	if (main_cmds.cnt) {
+		/* This reuses cmdname->len for similarity index */
+		for (i = 0; i < main_cmds.cnt; ++i)
+			main_cmds.names[i]->len =
+				levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+		qsort(main_cmds.names, main_cmds.cnt,
+		      sizeof(*main_cmds.names), levenshtein_compare);
+
+		best_similarity = main_cmds.names[0]->len;
+		n = 1;
+		while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+			++n;
+	}
+
+	if (autocorrect && n == 1) {
+		const char *assumed = main_cmds.names[0]->name;
+
+		main_cmds.names[0] = NULL;
+		clean_cmdnames(&main_cmds);
+		fprintf(stderr, "WARNING: You called a perf program named '%s', "
+			"which does not exist.\n"
+			"Continuing under the assumption that you meant '%s'\n",
+			cmd, assumed);
+		if (autocorrect > 0) {
+			fprintf(stderr, "in %0.1f seconds automatically...\n",
+				(float)autocorrect/10.0);
+			poll(NULL, 0, autocorrect * 100);
+		}
+		return assumed;
+	}
+
+	fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+	if (main_cmds.cnt && best_similarity < 6) {
+		fprintf(stderr, "\nDid you mean %s?\n",
+			n < 2 ? "this": "one of these");
+
+		for (i = 0; i < n; i++)
+			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+	}
+end:
+	exit(1);
+}
diff --git a/util/help-unknown-cmd.h b/util/help-unknown-cmd.h
new file mode 100644
index 0000000..e69de29
diff --git a/util/hist.c b/util/hist.c
new file mode 100644
index 0000000..4d602fb
--- /dev/null
+++ b/util/hist.c
@@ -0,0 +1,2643 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "build-id.h"
+#include "hist.h"
+#include "map.h"
+#include "session.h"
+#include "namespaces.h"
+#include "sort.h"
+#include "units.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "annotate.h"
+#include "srcline.h"
+#include "thread.h"
+#include "ui/progress.h"
+#include <errno.h>
+#include <math.h>
+#include <inttypes.h>
+#include <sys/param.h>
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+				       struct hist_entry *he);
+static bool hists__filter_entry_by_thread(struct hists *hists,
+					  struct hist_entry *he);
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+					  struct hist_entry *he);
+static bool hists__filter_entry_by_socket(struct hists *hists,
+					  struct hist_entry *he);
+
+u16 hists__col_len(struct hists *hists, enum hist_column col)
+{
+	return hists->col_len[col];
+}
+
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+	hists->col_len[col] = len;
+}
+
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+	if (len > hists__col_len(hists, col)) {
+		hists__set_col_len(hists, col, len);
+		return true;
+	}
+	return false;
+}
+
+void hists__reset_col_len(struct hists *hists)
+{
+	enum hist_column col;
+
+	for (col = 0; col < HISTC_NR_COLS; ++col)
+		hists__set_col_len(hists, col, 0);
+}
+
+static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
+{
+	const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+	if (hists__col_len(hists, dso) < unresolved_col_width &&
+	    !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+	    !symbol_conf.dso_list)
+		hists__set_col_len(hists, dso, unresolved_col_width);
+}
+
+void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
+{
+	const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+	int symlen;
+	u16 len;
+
+	/*
+	 * +4 accounts for '[x] ' priv level info
+	 * +2 accounts for 0x prefix on raw addresses
+	 * +3 accounts for ' y ' symtab origin info
+	 */
+	if (h->ms.sym) {
+		symlen = h->ms.sym->namelen + 4;
+		if (verbose > 0)
+			symlen += BITS_PER_LONG / 4 + 2 + 3;
+		hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+	} else {
+		symlen = unresolved_col_width + 4 + 2;
+		hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+		hists__set_unres_dso_col_len(hists, HISTC_DSO);
+	}
+
+	len = thread__comm_len(h->thread);
+	if (hists__new_col_len(hists, HISTC_COMM, len))
+		hists__set_col_len(hists, HISTC_THREAD, len + 8);
+
+	if (h->ms.map) {
+		len = dso__name_len(h->ms.map->dso);
+		hists__new_col_len(hists, HISTC_DSO, len);
+	}
+
+	if (h->parent)
+		hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
+
+	if (h->branch_info) {
+		if (h->branch_info->from.sym) {
+			symlen = (int)h->branch_info->from.sym->namelen + 4;
+			if (verbose > 0)
+				symlen += BITS_PER_LONG / 4 + 2 + 3;
+			hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+
+			symlen = dso__name_len(h->branch_info->from.map->dso);
+			hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+			hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
+		}
+
+		if (h->branch_info->to.sym) {
+			symlen = (int)h->branch_info->to.sym->namelen + 4;
+			if (verbose > 0)
+				symlen += BITS_PER_LONG / 4 + 2 + 3;
+			hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+
+			symlen = dso__name_len(h->branch_info->to.map->dso);
+			hists__new_col_len(hists, HISTC_DSO_TO, symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+			hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
+		}
+
+		if (h->branch_info->srcline_from)
+			hists__new_col_len(hists, HISTC_SRCLINE_FROM,
+					strlen(h->branch_info->srcline_from));
+		if (h->branch_info->srcline_to)
+			hists__new_col_len(hists, HISTC_SRCLINE_TO,
+					strlen(h->branch_info->srcline_to));
+	}
+
+	if (h->mem_info) {
+		if (h->mem_info->daddr.sym) {
+			symlen = (int)h->mem_info->daddr.sym->namelen + 4
+			       + unresolved_col_width + 2;
+			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+					   symlen);
+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+					   symlen + 1);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+					   symlen);
+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+					   symlen);
+		}
+
+		if (h->mem_info->iaddr.sym) {
+			symlen = (int)h->mem_info->iaddr.sym->namelen + 4
+			       + unresolved_col_width + 2;
+			hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+					   symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+					   symlen);
+		}
+
+		if (h->mem_info->daddr.map) {
+			symlen = dso__name_len(h->mem_info->daddr.map->dso);
+			hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
+					   symlen);
+		} else {
+			symlen = unresolved_col_width + 4 + 2;
+			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+		}
+
+		hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+				   unresolved_col_width + 4 + 2);
+
+	} else {
+		symlen = unresolved_col_width + 4 + 2;
+		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
+		hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen);
+		hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+	}
+
+	hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+	hists__new_col_len(hists, HISTC_CPU, 3);
+	hists__new_col_len(hists, HISTC_SOCKET, 6);
+	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
+	hists__new_col_len(hists, HISTC_MEM_TLB, 22);
+	hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
+	hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
+	hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
+	hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+
+	if (h->srcline) {
+		len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+		hists__new_col_len(hists, HISTC_SRCLINE, len);
+	}
+
+	if (h->srcfile)
+		hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
+
+	if (h->transaction)
+		hists__new_col_len(hists, HISTC_TRANSACTION,
+				   hist_entry__transaction_len());
+
+	if (h->trace_output)
+		hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+}
+
+void hists__output_recalc_col_len(struct hists *hists, int max_rows)
+{
+	struct rb_node *next = rb_first(&hists->entries);
+	struct hist_entry *n;
+	int row = 0;
+
+	hists__reset_col_len(hists);
+
+	while (next && row++ < max_rows) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		if (!n->filtered)
+			hists__calc_col_len(hists, n);
+		next = rb_next(&n->rb_node);
+	}
+}
+
+static void he_stat__add_cpumode_period(struct he_stat *he_stat,
+					unsigned int cpumode, u64 period)
+{
+	switch (cpumode) {
+	case PERF_RECORD_MISC_KERNEL:
+		he_stat->period_sys += period;
+		break;
+	case PERF_RECORD_MISC_USER:
+		he_stat->period_us += period;
+		break;
+	case PERF_RECORD_MISC_GUEST_KERNEL:
+		he_stat->period_guest_sys += period;
+		break;
+	case PERF_RECORD_MISC_GUEST_USER:
+		he_stat->period_guest_us += period;
+		break;
+	default:
+		break;
+	}
+}
+
+static void he_stat__add_period(struct he_stat *he_stat, u64 period,
+				u64 weight)
+{
+
+	he_stat->period		+= period;
+	he_stat->weight		+= weight;
+	he_stat->nr_events	+= 1;
+}
+
+static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
+{
+	dest->period		+= src->period;
+	dest->period_sys	+= src->period_sys;
+	dest->period_us		+= src->period_us;
+	dest->period_guest_sys	+= src->period_guest_sys;
+	dest->period_guest_us	+= src->period_guest_us;
+	dest->nr_events		+= src->nr_events;
+	dest->weight		+= src->weight;
+}
+
+static void he_stat__decay(struct he_stat *he_stat)
+{
+	he_stat->period = (he_stat->period * 7) / 8;
+	he_stat->nr_events = (he_stat->nr_events * 7) / 8;
+	/* XXX need decay for weight too? */
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
+static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
+{
+	u64 prev_period = he->stat.period;
+	u64 diff;
+
+	if (prev_period == 0)
+		return true;
+
+	he_stat__decay(&he->stat);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__decay(he->stat_acc);
+	decay_callchain(he->callchain);
+
+	diff = prev_period - he->stat.period;
+
+	if (!he->depth) {
+		hists->stats.total_period -= diff;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period -= diff;
+	}
+
+	if (!he->leaf) {
+		struct hist_entry *child;
+		struct rb_node *node = rb_first(&he->hroot_out);
+		while (node) {
+			child = rb_entry(node, struct hist_entry, rb_node);
+			node = rb_next(node);
+
+			if (hists__decay_entry(hists, child))
+				hists__delete_entry(hists, child);
+		}
+	}
+
+	return he->stat.period == 0;
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
+{
+	struct rb_root *root_in;
+	struct rb_root *root_out;
+
+	if (he->parent_he) {
+		root_in  = &he->parent_he->hroot_in;
+		root_out = &he->parent_he->hroot_out;
+	} else {
+		if (hists__has(hists, need_collapse))
+			root_in = &hists->entries_collapsed;
+		else
+			root_in = hists->entries_in;
+		root_out = &hists->entries;
+	}
+
+	rb_erase(&he->rb_node_in, root_in);
+	rb_erase(&he->rb_node, root_out);
+
+	--hists->nr_entries;
+	if (!he->filtered)
+		--hists->nr_non_filtered_entries;
+
+	hist_entry__delete(he);
+}
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
+{
+	struct rb_node *next = rb_first(&hists->entries);
+	struct hist_entry *n;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+		if (((zap_user && n->level == '.') ||
+		     (zap_kernel && n->level != '.') ||
+		     hists__decay_entry(hists, n))) {
+			hists__delete_entry(hists, n);
+		}
+	}
+}
+
+void hists__delete_entries(struct hists *hists)
+{
+	struct rb_node *next = rb_first(&hists->entries);
+	struct hist_entry *n;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node);
+		next = rb_next(&n->rb_node);
+
+		hists__delete_entry(hists, n);
+	}
+}
+
+/*
+ * histogram, sorted on item, collects periods
+ */
+
+static int hist_entry__init(struct hist_entry *he,
+			    struct hist_entry *template,
+			    bool sample_self)
+{
+	*he = *template;
+
+	if (symbol_conf.cumulate_callchain) {
+		he->stat_acc = malloc(sizeof(he->stat));
+		if (he->stat_acc == NULL)
+			return -ENOMEM;
+		memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+		if (!sample_self)
+			memset(&he->stat, 0, sizeof(he->stat));
+	}
+
+	map__get(he->ms.map);
+
+	if (he->branch_info) {
+		/*
+		 * This branch info is (a part of) allocated from
+		 * sample__resolve_bstack() and will be freed after
+		 * adding new entries.  So we need to save a copy.
+		 */
+		he->branch_info = malloc(sizeof(*he->branch_info));
+		if (he->branch_info == NULL) {
+			map__zput(he->ms.map);
+			free(he->stat_acc);
+			return -ENOMEM;
+		}
+
+		memcpy(he->branch_info, template->branch_info,
+		       sizeof(*he->branch_info));
+
+		map__get(he->branch_info->from.map);
+		map__get(he->branch_info->to.map);
+	}
+
+	if (he->mem_info) {
+		map__get(he->mem_info->iaddr.map);
+		map__get(he->mem_info->daddr.map);
+	}
+
+	if (symbol_conf.use_callchain)
+		callchain_init(he->callchain);
+
+	if (he->raw_data) {
+		he->raw_data = memdup(he->raw_data, he->raw_size);
+
+		if (he->raw_data == NULL) {
+			map__put(he->ms.map);
+			if (he->branch_info) {
+				map__put(he->branch_info->from.map);
+				map__put(he->branch_info->to.map);
+				free(he->branch_info);
+			}
+			if (he->mem_info) {
+				map__put(he->mem_info->iaddr.map);
+				map__put(he->mem_info->daddr.map);
+			}
+			free(he->stat_acc);
+			return -ENOMEM;
+		}
+	}
+	INIT_LIST_HEAD(&he->pairs.node);
+	thread__get(he->thread);
+	he->hroot_in  = RB_ROOT;
+	he->hroot_out = RB_ROOT;
+
+	if (!symbol_conf.report_hierarchy)
+		he->leaf = true;
+
+	return 0;
+}
+
+static void *hist_entry__zalloc(size_t size)
+{
+	return zalloc(size + sizeof(struct hist_entry));
+}
+
+static void hist_entry__free(void *ptr)
+{
+	free(ptr);
+}
+
+static struct hist_entry_ops default_ops = {
+	.new	= hist_entry__zalloc,
+	.free	= hist_entry__free,
+};
+
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+					  bool sample_self)
+{
+	struct hist_entry_ops *ops = template->ops;
+	size_t callchain_size = 0;
+	struct hist_entry *he;
+	int err = 0;
+
+	if (!ops)
+		ops = template->ops = &default_ops;
+
+	if (symbol_conf.use_callchain)
+		callchain_size = sizeof(struct callchain_root);
+
+	he = ops->new(callchain_size);
+	if (he) {
+		err = hist_entry__init(he, template, sample_self);
+		if (err) {
+			ops->free(he);
+			he = NULL;
+		}
+	}
+
+	return he;
+}
+
+static u8 symbol__parent_filter(const struct symbol *parent)
+{
+	if (symbol_conf.exclude_other && parent == NULL)
+		return 1 << HIST_FILTER__PARENT;
+	return 0;
+}
+
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	he->hists->callchain_period += period;
+	if (!he->filtered)
+		he->hists->callchain_non_filtered_period += period;
+}
+
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+					       struct hist_entry *entry,
+					       struct addr_location *al,
+					       bool sample_self)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	int64_t cmp;
+	u64 period = entry->stat.period;
+	u64 weight = entry->stat.weight;
+
+	p = &hists->entries_in->rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		/*
+		 * Make sure that it receives arguments in a same order as
+		 * hist_entry__collapse() so that we can use an appropriate
+		 * function when searching an entry regardless which sort
+		 * keys were used.
+		 */
+		cmp = hist_entry__cmp(he, entry);
+
+		if (!cmp) {
+			if (sample_self) {
+				he_stat__add_period(&he->stat, period, weight);
+				hist_entry__add_callchain_period(he, period);
+			}
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_period(he->stat_acc, period, weight);
+
+			/*
+			 * This mem info was allocated from sample__resolve_mem
+			 * and will not be used anymore.
+			 */
+			mem_info__zput(entry->mem_info);
+
+			/* If the map of an existing hist_entry has
+			 * become out-of-date due to an exec() or
+			 * similar, update it.  Otherwise we will
+			 * mis-adjust symbol addresses when computing
+			 * the history counter to increment.
+			 */
+			if (he->ms.map != entry->ms.map) {
+				map__put(he->ms.map);
+				he->ms.map = map__get(entry->ms.map);
+			}
+			goto out;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = hist_entry__new(entry, sample_self);
+	if (!he)
+		return NULL;
+
+	if (sample_self)
+		hist_entry__add_callchain_period(he, period);
+	hists->nr_entries++;
+
+	rb_link_node(&he->rb_node_in, parent, p);
+	rb_insert_color(&he->rb_node_in, hists->entries_in);
+out:
+	if (sample_self)
+		he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+	if (symbol_conf.cumulate_callchain)
+		he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
+	return he;
+}
+
+static struct hist_entry*
+__hists__add_entry(struct hists *hists,
+		   struct addr_location *al,
+		   struct symbol *sym_parent,
+		   struct branch_info *bi,
+		   struct mem_info *mi,
+		   struct perf_sample *sample,
+		   bool sample_self,
+		   struct hist_entry_ops *ops)
+{
+	struct namespaces *ns = thread__namespaces(al->thread);
+	struct hist_entry entry = {
+		.thread	= al->thread,
+		.comm = thread__comm(al->thread),
+		.cgroup_id = {
+			.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
+			.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
+		},
+		.ms = {
+			.map	= al->map,
+			.sym	= al->sym,
+		},
+		.srcline = al->srcline ? strdup(al->srcline) : NULL,
+		.socket	 = al->socket,
+		.cpu	 = al->cpu,
+		.cpumode = al->cpumode,
+		.ip	 = al->addr,
+		.level	 = al->level,
+		.stat = {
+			.nr_events = 1,
+			.period	= sample->period,
+			.weight = sample->weight,
+		},
+		.parent = sym_parent,
+		.filtered = symbol__parent_filter(sym_parent) | al->filtered,
+		.hists	= hists,
+		.branch_info = bi,
+		.mem_info = mi,
+		.transaction = sample->transaction,
+		.raw_data = sample->raw_data,
+		.raw_size = sample->raw_size,
+		.ops = ops,
+	};
+
+	return hists__findnew_entry(hists, &entry, al, sample_self);
+}
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *sym_parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self)
+{
+	return __hists__add_entry(hists, al, sym_parent, bi, mi,
+				  sample, sample_self, NULL);
+}
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+					struct hist_entry_ops *ops,
+					struct addr_location *al,
+					struct symbol *sym_parent,
+					struct branch_info *bi,
+					struct mem_info *mi,
+					struct perf_sample *sample,
+					bool sample_self)
+{
+	return __hists__add_entry(hists, al, sym_parent, bi, mi,
+				  sample, sample_self, ops);
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+		    struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+			struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_sample *sample = iter->sample;
+	struct mem_info *mi;
+
+	mi = sample__resolve_mem(sample, al);
+	if (mi == NULL)
+		return -ENOMEM;
+
+	iter->priv = mi;
+	return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	u64 cost;
+	struct mem_info *mi = iter->priv;
+	struct hists *hists = evsel__hists(iter->evsel);
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he;
+
+	if (mi == NULL)
+		return -EINVAL;
+
+	cost = sample->weight;
+	if (!cost)
+		cost = 1;
+
+	/*
+	 * must pass period=weight in order to get the correct
+	 * sorting from hists__collapse_resort() which is solely
+	 * based on periods. We want sorting be done on nr_events * weight
+	 * and this is indirectly achieved by passing period=weight here
+	 * and the he_stat__add_period() function.
+	 */
+	sample->period = cost;
+
+	he = hists__add_entry(hists, al, iter->parent, NULL, mi,
+			      sample, true);
+	if (!he)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+		      struct addr_location *al __maybe_unused)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct hists *hists = evsel__hists(evsel);
+	struct hist_entry *he = iter->he;
+	int err = -EINVAL;
+
+	if (he == NULL)
+		goto out;
+
+	hists__inc_nr_samples(hists, he->filtered);
+
+	err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+	/*
+	 * We don't need to free iter->priv (mem_info) here since the mem info
+	 * was either already freed in hists__findnew_entry() or passed to a
+	 * new hist entry by hist_entry__new().
+	 */
+	iter->priv = NULL;
+
+	iter->he = NULL;
+	return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_sample *sample = iter->sample;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return -ENOMEM;
+
+	iter->curr = 0;
+	iter->total = sample->branch_stack->nr;
+
+	iter->priv = bi;
+	return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+			     struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi = iter->priv;
+	int i = iter->curr;
+
+	if (bi == NULL)
+		return 0;
+
+	if (iter->curr >= iter->total)
+		return 0;
+
+	al->map = bi[i].to.map;
+	al->sym = bi[i].to.sym;
+	al->addr = bi[i].to.addr;
+	return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct branch_info *bi;
+	struct perf_evsel *evsel = iter->evsel;
+	struct hists *hists = evsel__hists(evsel);
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he = NULL;
+	int i = iter->curr;
+	int err = 0;
+
+	bi = iter->priv;
+
+	if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+		goto out;
+
+	/*
+	 * The report shows the percentage of total branches captured
+	 * and not events sampled. Thus we use a pseudo period of 1.
+	 */
+	sample->period = 1;
+	sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
+
+	he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
+			      sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	hists__inc_nr_samples(hists, he->filtered);
+
+out:
+	iter->he = he;
+	iter->curr++;
+	return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+			  struct addr_location *al __maybe_unused)
+{
+	return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry *he;
+
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+			 struct addr_location *al __maybe_unused)
+{
+	struct hist_entry *he = iter->he;
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+
+	if (he == NULL)
+		return 0;
+
+	iter->he = NULL;
+
+	hists__inc_nr_samples(evsel__hists(evsel), he->filtered);
+
+	return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
+			      struct addr_location *al __maybe_unused)
+{
+	struct hist_entry **he_cache;
+
+	callchain_cursor_commit(&callchain_cursor);
+
+	/*
+	 * This is for detecting cycles or recursions so that they're
+	 * cumulated only one time to prevent entries more than 100%
+	 * overhead.
+	 */
+	he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
+	if (he_cache == NULL)
+		return -ENOMEM;
+
+	iter->priv = he_cache;
+	iter->curr = 0;
+
+	return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+				 struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct hists *hists = evsel__hists(evsel);
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	int err = 0;
+
+	he = hists__add_entry(hists, al, iter->parent, NULL, NULL,
+			      sample, true);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	hist_entry__append_callchain(he, sample);
+
+	/*
+	 * We need to re-initialize the cursor since callchain_append()
+	 * advanced the cursor to the end.
+	 */
+	callchain_cursor_commit(&callchain_cursor);
+
+	hists__inc_nr_samples(hists, he->filtered);
+
+	return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+			   struct addr_location *al)
+{
+	struct callchain_cursor_node *node;
+
+	node = callchain_cursor_current(&callchain_cursor);
+	if (node == NULL)
+		return 0;
+
+	return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+			       struct addr_location *al)
+{
+	struct perf_evsel *evsel = iter->evsel;
+	struct perf_sample *sample = iter->sample;
+	struct hist_entry **he_cache = iter->priv;
+	struct hist_entry *he;
+	struct hist_entry he_tmp = {
+		.hists = evsel__hists(evsel),
+		.cpu = al->cpu,
+		.thread = al->thread,
+		.comm = thread__comm(al->thread),
+		.ip = al->addr,
+		.ms = {
+			.map = al->map,
+			.sym = al->sym,
+		},
+		.srcline = al->srcline ? strdup(al->srcline) : NULL,
+		.parent = iter->parent,
+		.raw_data = sample->raw_data,
+		.raw_size = sample->raw_size,
+	};
+	int i;
+	struct callchain_cursor cursor;
+
+	callchain_cursor_snapshot(&cursor, &callchain_cursor);
+
+	callchain_cursor_advance(&callchain_cursor);
+
+	/*
+	 * Check if there's duplicate entries in the callchain.
+	 * It's possible that it has cycles or recursive calls.
+	 */
+	for (i = 0; i < iter->curr; i++) {
+		if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+			/* to avoid calling callback function */
+			iter->he = NULL;
+			return 0;
+		}
+	}
+
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, false);
+	if (he == NULL)
+		return -ENOMEM;
+
+	iter->he = he;
+	he_cache[iter->curr++] = he;
+
+	if (symbol_conf.use_callchain)
+		callchain_append(he->callchain, &cursor, sample->period);
+	return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+			     struct addr_location *al __maybe_unused)
+{
+	zfree(&iter->priv);
+	iter->he = NULL;
+
+	return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+	.prepare_entry 		= iter_prepare_mem_entry,
+	.add_single_entry 	= iter_add_single_mem_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+	.prepare_entry 		= iter_prepare_branch_entry,
+	.add_single_entry 	= iter_add_single_branch_entry,
+	.next_entry 		= iter_next_branch_entry,
+	.add_next_entry 	= iter_add_next_branch_entry,
+	.finish_entry 		= iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+	.prepare_entry 		= iter_prepare_normal_entry,
+	.add_single_entry 	= iter_add_single_normal_entry,
+	.next_entry 		= iter_next_nop_entry,
+	.add_next_entry 	= iter_add_next_nop_entry,
+	.finish_entry 		= iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+	.prepare_entry 		= iter_prepare_cumulative_entry,
+	.add_single_entry 	= iter_add_single_cumulative_entry,
+	.next_entry 		= iter_next_cumulative_entry,
+	.add_next_entry 	= iter_add_next_cumulative_entry,
+	.finish_entry 		= iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 int max_stack_depth, void *arg)
+{
+	int err, err2;
+	struct map *alm = NULL;
+
+	if (al && al->map)
+		alm = map__get(al->map);
+
+	err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
+					iter->evsel, al, max_stack_depth);
+	if (err)
+		return err;
+
+	err = iter->ops->prepare_entry(iter, al);
+	if (err)
+		goto out;
+
+	err = iter->ops->add_single_entry(iter, al);
+	if (err)
+		goto out;
+
+	if (iter->he && iter->add_entry_cb) {
+		err = iter->add_entry_cb(iter, al, true, arg);
+		if (err)
+			goto out;
+	}
+
+	while (iter->ops->next_entry(iter, al)) {
+		err = iter->ops->add_next_entry(iter, al);
+		if (err)
+			break;
+
+		if (iter->he && iter->add_entry_cb) {
+			err = iter->add_entry_cb(iter, al, false, arg);
+			if (err)
+				goto out;
+		}
+	}
+
+out:
+	err2 = iter->ops->finish_entry(iter, al);
+	if (!err)
+		err = err2;
+
+	map__put(alm);
+
+	return err;
+}
+
+int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct hists *hists = left->hists;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
+		cmp = fmt->cmp(fmt, left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct hists *hists = left->hists;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
+		cmp = fmt->collapse(fmt, left, right);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+void hist_entry__delete(struct hist_entry *he)
+{
+	struct hist_entry_ops *ops = he->ops;
+
+	thread__zput(he->thread);
+	map__zput(he->ms.map);
+
+	if (he->branch_info) {
+		map__zput(he->branch_info->from.map);
+		map__zput(he->branch_info->to.map);
+		free_srcline(he->branch_info->srcline_from);
+		free_srcline(he->branch_info->srcline_to);
+		zfree(&he->branch_info);
+	}
+
+	if (he->mem_info) {
+		map__zput(he->mem_info->iaddr.map);
+		map__zput(he->mem_info->daddr.map);
+		mem_info__zput(he->mem_info);
+	}
+
+	zfree(&he->stat_acc);
+	free_srcline(he->srcline);
+	if (he->srcfile && he->srcfile[0])
+		free(he->srcfile);
+	free_callchain(he->callchain);
+	free(he->trace_output);
+	free(he->raw_data);
+	ops->free(he);
+}
+
+/*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max lenght for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed)
+{
+	if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+		const int width = fmt->width(fmt, hpp, he->hists);
+		if (printed < width) {
+			advance_hpp(hpp, printed);
+			printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+		}
+	}
+
+	return printed;
+}
+
+/*
+ * collapse the histogram
+ */
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+				       enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+	return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+						enum hist_filter type,
+						fmt_chk_fn check)
+{
+	struct perf_hpp_fmt *fmt;
+	bool type_match = false;
+	struct hist_entry *parent = he->parent_he;
+
+	switch (type) {
+	case HIST_FILTER__THREAD:
+		if (symbol_conf.comm_list == NULL &&
+		    symbol_conf.pid_list == NULL &&
+		    symbol_conf.tid_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__DSO:
+		if (symbol_conf.dso_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__SYMBOL:
+		if (symbol_conf.sym_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__PARENT:
+	case HIST_FILTER__GUEST:
+	case HIST_FILTER__HOST:
+	case HIST_FILTER__SOCKET:
+	case HIST_FILTER__C2C:
+	default:
+		return;
+	}
+
+	/* if it's filtered by own fmt, it has to have filter bits */
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (check(fmt)) {
+			type_match = true;
+			break;
+		}
+	}
+
+	if (type_match) {
+		/*
+		 * If the filter is for current level entry, propagate
+		 * filter marker to parents.  The marker bit was
+		 * already set by default so it only needs to clear
+		 * non-filtered entries.
+		 */
+		if (!(he->filtered & (1 << type))) {
+			while (parent) {
+				parent->filtered &= ~(1 << type);
+				parent = parent->parent_he;
+			}
+		}
+	} else {
+		/*
+		 * If current entry doesn't have matching formats, set
+		 * filter marker for upper level entries.  it will be
+		 * cleared if its lower level entries is not filtered.
+		 *
+		 * For lower-level entries, it inherits parent's
+		 * filter bit so that lower level entries of a
+		 * non-filtered entry won't set the filter marker.
+		 */
+		if (parent == NULL)
+			he->filtered |= (1 << type);
+		else
+			he->filtered |= (parent->filtered & (1 << type));
+	}
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+					    check_thread_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+					    perf_hpp__is_dso_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+					    perf_hpp__is_sym_entry);
+
+	hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+						 struct rb_root *root,
+						 struct hist_entry *he,
+						 struct hist_entry *parent_he,
+						 struct perf_hpp_list *hpp_list)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter, *new;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = 0;
+		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (!cmp) {
+			he_stat__add_stat(&iter->stat, &he->stat);
+			return iter;
+		}
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	new = hist_entry__new(he, true);
+	if (new == NULL)
+		return NULL;
+
+	hists->nr_entries++;
+
+	/* save related format list for output */
+	new->hpp_list = hpp_list;
+	new->parent_he = parent_he;
+
+	hist_entry__apply_hierarchy_filters(new);
+
+	/* some fields are now passed to 'new' */
+	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+		if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			he->trace_output = NULL;
+		else
+			new->trace_output = NULL;
+
+		if (perf_hpp__is_srcline_entry(fmt))
+			he->srcline = NULL;
+		else
+			new->srcline = NULL;
+
+		if (perf_hpp__is_srcfile_entry(fmt))
+			he->srcfile = NULL;
+		else
+			new->srcfile = NULL;
+	}
+
+	rb_link_node(&new->rb_node_in, parent, p);
+	rb_insert_color(&new->rb_node_in, root);
+	return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+					 struct rb_root *root,
+					 struct hist_entry *he)
+{
+	struct perf_hpp_list_node *node;
+	struct hist_entry *new_he = NULL;
+	struct hist_entry *parent = NULL;
+	int depth = 0;
+	int ret = 0;
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		/* skip period (overhead) and elided columns */
+		if (node->level == 0 || node->skip)
+			continue;
+
+		/* insert copy of 'he' for each fmt into the hierarchy */
+		new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+		if (new_he == NULL) {
+			ret = -1;
+			break;
+		}
+
+		root = &new_he->hroot_in;
+		new_he->depth = depth++;
+		parent = new_he;
+	}
+
+	if (new_he) {
+		new_he->leaf = true;
+
+		if (symbol_conf.use_callchain) {
+			callchain_cursor_reset(&callchain_cursor);
+			if (callchain_merge(&callchain_cursor,
+					    new_he->callchain,
+					    he->callchain) < 0)
+				ret = -1;
+		}
+	}
+
+	/* 'he' is no longer used */
+	hist_entry__delete(he);
+
+	/* return 0 (or -1) since it already applied filters */
+	return ret;
+}
+
+static int hists__collapse_insert_entry(struct hists *hists,
+					struct rb_root *root,
+					struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	int64_t cmp;
+
+	if (symbol_conf.report_hierarchy)
+		return hists__hierarchy_insert_entry(hists, root, he);
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = hist_entry__collapse(iter, he);
+
+		if (!cmp) {
+			int ret = 0;
+
+			he_stat__add_stat(&iter->stat, &he->stat);
+			if (symbol_conf.cumulate_callchain)
+				he_stat__add_stat(iter->stat_acc, he->stat_acc);
+
+			if (symbol_conf.use_callchain) {
+				callchain_cursor_reset(&callchain_cursor);
+				if (callchain_merge(&callchain_cursor,
+						    iter->callchain,
+						    he->callchain) < 0)
+					ret = -1;
+			}
+			hist_entry__delete(he);
+			return ret;
+		}
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	hists->nr_entries++;
+
+	rb_link_node(&he->rb_node_in, parent, p);
+	rb_insert_color(&he->rb_node_in, root);
+	return 1;
+}
+
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
+{
+	struct rb_root *root;
+
+	pthread_mutex_lock(&hists->lock);
+
+	root = hists->entries_in;
+	if (++hists->entries_in > &hists->entries_in_array[1])
+		hists->entries_in = &hists->entries_in_array[0];
+
+	pthread_mutex_unlock(&hists->lock);
+
+	return root;
+}
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
+{
+	hists__filter_entry_by_dso(hists, he);
+	hists__filter_entry_by_thread(hists, he);
+	hists__filter_entry_by_symbol(hists, he);
+	hists__filter_entry_by_socket(hists, he);
+}
+
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+	struct hist_entry *n;
+	int ret;
+
+	if (!hists__has(hists, need_collapse))
+		return 0;
+
+	hists->nr_entries = 0;
+
+	root = hists__get_rotate_entries_in(hists);
+
+	next = rb_first(root);
+
+	while (next) {
+		if (session_done())
+			break;
+		n = rb_entry(next, struct hist_entry, rb_node_in);
+		next = rb_next(&n->rb_node_in);
+
+		rb_erase(&n->rb_node_in, root);
+		ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+		if (ret < 0)
+			return -1;
+
+		if (ret) {
+			/*
+			 * If it wasn't combined with one of the entries already
+			 * collapsed, we need to apply the filters that may have
+			 * been set by, say, the hist_browser.
+			 */
+			hists__apply_filters(hists, n);
+		}
+		if (prog)
+			ui_progress__update(prog, 1);
+	}
+	return 0;
+}
+
+static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+{
+	struct hists *hists = a->hists;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp = 0;
+
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__should_skip(fmt, a->hists))
+			continue;
+
+		cmp = fmt->sort(fmt, a, b);
+		if (cmp)
+			break;
+	}
+
+	return cmp;
+}
+
+static void hists__reset_filter_stats(struct hists *hists)
+{
+	hists->nr_non_filtered_entries = 0;
+	hists->stats.total_non_filtered_period = 0;
+}
+
+void hists__reset_stats(struct hists *hists)
+{
+	hists->nr_entries = 0;
+	hists->stats.total_period = 0;
+
+	hists__reset_filter_stats(hists);
+}
+
+static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
+{
+	hists->nr_non_filtered_entries++;
+	hists->stats.total_non_filtered_period += h->stat.period;
+}
+
+void hists__inc_stats(struct hists *hists, struct hist_entry *h)
+{
+	if (!h->filtered)
+		hists__inc_filter_stats(hists, h);
+
+	hists->nr_entries++;
+	hists->stats.total_period += h->stat.period;
+}
+
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	node = rb_first(&hists->entries);
+
+	hists->stats.total_period = 0;
+	hists->stats.total_non_filtered_period = 0;
+
+	/*
+	 * recalculate total period using top-level entries only
+	 * since lower level entries only see non-filtered entries
+	 * but upper level entries have sum of both entries.
+	 */
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node);
+		node = rb_next(node);
+
+		hists->stats.total_period += he->stat.period;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period += he->stat.period;
+	}
+}
+
+static void hierarchy_insert_output_entry(struct rb_root *root,
+					  struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	/* update column width of dynamic entry */
+	perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt))
+			fmt->sort(fmt, he, NULL);
+	}
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+					   struct ui_progress *prog,
+					   struct rb_root *root_in,
+					   struct rb_root *root_out,
+					   u64 min_callchain_hits,
+					   bool use_callchain)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	*root_out = RB_ROOT;
+	node = rb_first(root_in);
+
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		node = rb_next(node);
+
+		hierarchy_insert_output_entry(root_out, he);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+
+		hists->nr_entries++;
+		if (!he->filtered) {
+			hists->nr_non_filtered_entries++;
+			hists__calc_col_len(hists, he);
+		}
+
+		if (!he->leaf) {
+			hists__hierarchy_output_resort(hists, prog,
+						       &he->hroot_in,
+						       &he->hroot_out,
+						       min_callchain_hits,
+						       use_callchain);
+			continue;
+		}
+
+		if (!use_callchain)
+			continue;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+	}
+}
+
+static void __hists__insert_output_entry(struct rb_root *entries,
+					 struct hist_entry *he,
+					 u64 min_callchain_hits,
+					 bool use_callchain)
+{
+	struct rb_node **p = &entries->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
+
+	if (use_callchain) {
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				      min_callchain_hits, &callchain_param);
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, entries);
+
+	perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    perf_hpp__defined_dynamic_entry(fmt, he->hists))
+			fmt->sort(fmt, he, NULL);  /* update column width */
+	}
+}
+
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+			  bool use_callchain, hists__resort_cb_t cb)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+	struct hist_entry *n;
+	u64 callchain_total;
+	u64 min_callchain_hits;
+
+	callchain_total = hists->callchain_period;
+	if (symbol_conf.filter_relative)
+		callchain_total = hists->callchain_non_filtered_period;
+
+	min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+	hists__reset_stats(hists);
+	hists__reset_col_len(hists);
+
+	if (symbol_conf.report_hierarchy) {
+		hists__hierarchy_output_resort(hists, prog,
+					       &hists->entries_collapsed,
+					       &hists->entries,
+					       min_callchain_hits,
+					       use_callchain);
+		hierarchy_recalc_total_periods(hists);
+		return;
+	}
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	next = rb_first(root);
+	hists->entries = RB_ROOT;
+
+	while (next) {
+		n = rb_entry(next, struct hist_entry, rb_node_in);
+		next = rb_next(&n->rb_node_in);
+
+		if (cb && cb(n))
+			continue;
+
+		__hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
+		hists__inc_stats(hists, n);
+
+		if (!n->filtered)
+			hists__calc_col_len(hists, n);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+	}
+}
+
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+	bool use_callchain;
+
+	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+	else
+		use_callchain = symbol_conf.use_callchain;
+
+	use_callchain |= symbol_conf.show_branchflag_count;
+
+	output_resort(evsel__hists(evsel), prog, use_callchain, NULL);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain, NULL);
+}
+
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+			     hists__resort_cb_t cb)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain, cb);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+	if (he->leaf || hmd == HMD_FORCE_SIBLING)
+		return false;
+
+	if (he->unfolded || hmd == HMD_FORCE_CHILD)
+		return true;
+
+	return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	while (can_goto_child(he, HMD_NORMAL)) {
+		node = rb_last(&he->hroot_out);
+		he = rb_entry(node, struct hist_entry, rb_node);
+	}
+	return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	if (can_goto_child(he, hmd))
+		node = rb_first(&he->hroot_out);
+	else
+		node = rb_next(node);
+
+	while (node == NULL) {
+		he = he->parent_he;
+		if (he == NULL)
+			break;
+
+		node = rb_next(&he->rb_node);
+	}
+	return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	node = rb_prev(node);
+	if (node)
+		return rb_hierarchy_last(node);
+
+	he = he->parent_he;
+	if (he == NULL)
+		return NULL;
+
+	return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+	struct rb_node *node;
+	struct hist_entry *child;
+	float percent;
+
+	if (he->leaf)
+		return false;
+
+	node = rb_first(&he->hroot_out);
+	child = rb_entry(node, struct hist_entry, rb_node);
+
+	while (node && child->filtered) {
+		node = rb_next(node);
+		child = rb_entry(node, struct hist_entry, rb_node);
+	}
+
+	if (node)
+		percent = hist_entry__get_percent_limit(child);
+	else
+		percent = 0;
+
+	return node && percent >= limit;
+}
+
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
+				       enum hist_filter filter)
+{
+	h->filtered &= ~(1 << filter);
+
+	if (symbol_conf.report_hierarchy) {
+		struct hist_entry *parent = h->parent_he;
+
+		while (parent) {
+			he_stat__add_stat(&parent->stat, &h->stat);
+
+			parent->filtered &= ~(1 << filter);
+
+			if (parent->filtered)
+				goto next;
+
+			/* force fold unfiltered entry for simplicity */
+			parent->unfolded = false;
+			parent->has_no_entry = false;
+			parent->row_offset = 0;
+			parent->nr_rows = 0;
+next:
+			parent = parent->parent_he;
+		}
+	}
+
+	if (h->filtered)
+		return;
+
+	/* force fold unfiltered entry for simplicity */
+	h->unfolded = false;
+	h->has_no_entry = false;
+	h->row_offset = 0;
+	h->nr_rows = 0;
+
+	hists->stats.nr_non_filtered_samples += h->stat.nr_events;
+
+	hists__inc_filter_stats(hists, h);
+	hists__calc_col_len(hists, h);
+}
+
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+				       struct hist_entry *he)
+{
+	if (hists->dso_filter != NULL &&
+	    (he->ms.map == NULL || he->ms.map->dso != hists->dso_filter)) {
+		he->filtered |= (1 << HIST_FILTER__DSO);
+		return true;
+	}
+
+	return false;
+}
+
+static bool hists__filter_entry_by_thread(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if (hists->thread_filter != NULL &&
+	    he->thread != hists->thread_filter) {
+		he->filtered |= (1 << HIST_FILTER__THREAD);
+		return true;
+	}
+
+	return false;
+}
+
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if (hists->symbol_filter_str != NULL &&
+	    (!he->ms.sym || strstr(he->ms.sym->name,
+				   hists->symbol_filter_str) == NULL)) {
+		he->filtered |= (1 << HIST_FILTER__SYMBOL);
+		return true;
+	}
+
+	return false;
+}
+
+static bool hists__filter_entry_by_socket(struct hists *hists,
+					  struct hist_entry *he)
+{
+	if ((hists->socket_filter > -1) &&
+	    (he->socket != hists->socket_filter)) {
+		he->filtered |= (1 << HIST_FILTER__SOCKET);
+		return true;
+	}
+
+	return false;
+}
+
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
+{
+	struct rb_node *nd;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (filter(hists, h))
+			continue;
+
+		hists__remove_entry_filter(hists, h, type);
+	}
+}
+
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct rb_root new_root = RB_ROOT;
+	struct rb_node *nd;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	if (he->leaf || he->filtered)
+		return;
+
+	nd = rb_first(&he->hroot_out);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &he->hroot_out);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+	struct rb_node *nd;
+	struct rb_root new_root = RB_ROOT;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		int ret;
+
+		ret = hist_entry__filter(h, type, arg);
+
+		/*
+		 * case 1. non-matching type
+		 * zero out the period, set filter marker and move to child
+		 */
+		if (ret < 0) {
+			memset(&h->stat, 0, sizeof(h->stat));
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+		}
+		/*
+		 * case 2. matched type (filter out)
+		 * set filter marker and move to next
+		 */
+		else if (ret == 1) {
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+		/*
+		 * case 3. ok (not filtered)
+		 * add period to hists and parents, erase the filter marker
+		 * and move to next sibling
+		 */
+		else {
+			hists__remove_entry_filter(hists, h, type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+	}
+
+	hierarchy_recalc_total_periods(hists);
+
+	/*
+	 * resort output after applying a new filter since filter in a lower
+	 * hierarchy can change periods in a upper hierarchy.
+	 */
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &hists->entries);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+					hists->thread_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__THREAD,
+				      hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+					hists->dso_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__DSO,
+				      hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+					hists->symbol_filter_str);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+				      hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+					&hists->socket_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+				      hists__filter_entry_by_socket);
+}
+
+void events_stats__inc(struct events_stats *stats, u32 type)
+{
+	++stats->nr_events[0];
+	++stats->nr_events[type];
+}
+
+void hists__inc_nr_events(struct hists *hists, u32 type)
+{
+	events_stats__inc(&hists->stats, type);
+}
+
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+	events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
+	if (!filtered)
+		hists->stats.nr_non_filtered_samples++;
+}
+
+static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
+						 struct hist_entry *pair)
+{
+	struct rb_root *root;
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	int64_t cmp;
+
+	if (hists__has(hists, need_collapse))
+		root = &hists->entries_collapsed;
+	else
+		root = hists->entries_in;
+
+	p = &root->rb_node;
+
+	while (*p != NULL) {
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = hist_entry__collapse(he, pair);
+
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	he = hist_entry__new(pair, true);
+	if (he) {
+		memset(&he->stat, 0, sizeof(he->stat));
+		he->hists = hists;
+		if (symbol_conf.cumulate_callchain)
+			memset(he->stat_acc, 0, sizeof(he->stat));
+		rb_link_node(&he->rb_node_in, parent, p);
+		rb_insert_color(&he->rb_node_in, root);
+		hists__inc_stats(hists, he);
+		he->dummy = true;
+	}
+out:
+	return he;
+}
+
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+						    struct rb_root *root,
+						    struct hist_entry *pair)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+
+	p = &root->rb_node;
+	while (*p != NULL) {
+		int64_t cmp = 0;
+
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, he, pair);
+			if (cmp)
+				break;
+		}
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	he = hist_entry__new(pair, true);
+	if (he) {
+		rb_link_node(&he->rb_node_in, parent, p);
+		rb_insert_color(&he->rb_node_in, root);
+
+		he->dummy = true;
+		he->hists = hists;
+		memset(&he->stat, 0, sizeof(he->stat));
+		hists__inc_stats(hists, he);
+	}
+out:
+	return he;
+}
+
+static struct hist_entry *hists__find_entry(struct hists *hists,
+					    struct hist_entry *he)
+{
+	struct rb_node *n;
+
+	if (hists__has(hists, need_collapse))
+		n = hists->entries_collapsed.rb_node;
+	else
+		n = hists->entries_in->rb_node;
+
+	while (n) {
+		struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
+		int64_t cmp = hist_entry__collapse(iter, he);
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
+						      struct hist_entry *he)
+{
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		struct hist_entry *iter;
+		struct perf_hpp_fmt *fmt;
+		int64_t cmp = 0;
+
+		iter = rb_entry(n, struct hist_entry, rb_node_in);
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root *leader_root,
+				   struct rb_root *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+		pair = hists__find_hierarchy_entry(other_root, pos);
+
+		if (pair) {
+			hist_entry__add_pair(pair, pos);
+			hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+		}
+	}
+}
+
+/*
+ * Look for pairs to link to the leader buckets (hist_entries):
+ */
+void hists__match(struct hists *leader, struct hists *other)
+{
+	struct rb_root *root;
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__match_hierarchy(&leader->entries_collapsed,
+					      &other->entries_collapsed);
+	}
+
+	if (hists__has(leader, need_collapse))
+		root = &leader->entries_collapsed;
+	else
+		root = leader->entries_in;
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+		pair = hists__find_entry(other, pos);
+
+		if (pair)
+			hist_entry__add_pair(pair, pos);
+	}
+}
+
+static int hists__link_hierarchy(struct hists *leader_hists,
+				 struct hist_entry *parent,
+				 struct rb_root *leader_root,
+				 struct rb_root *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *leader;
+
+	for (nd = rb_first(other_root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+		if (hist_entry__has_pairs(pos)) {
+			bool found = false;
+
+			list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+				if (leader->hists == leader_hists) {
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+				return -1;
+		} else {
+			leader = add_dummy_hierarchy_entry(leader_hists,
+							   leader_root, pos);
+			if (leader == NULL)
+				return -1;
+
+			/* do not point parent in the pos */
+			leader->parent_he = parent;
+
+			hist_entry__add_pair(pos, leader);
+		}
+
+		if (!pos->leaf) {
+			if (hists__link_hierarchy(leader_hists, leader,
+						  &leader->hroot_in,
+						  &pos->hroot_in) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Look for entries in the other hists that are not present in the leader, if
+ * we find them, just add a dummy entry on the leader hists, with period=0,
+ * nr_events=0, to serve as the list header.
+ */
+int hists__link(struct hists *leader, struct hists *other)
+{
+	struct rb_root *root;
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__link_hierarchy(leader, NULL,
+					     &leader->entries_collapsed,
+					     &other->entries_collapsed);
+	}
+
+	if (hists__has(other, need_collapse))
+		root = &other->entries_collapsed;
+	else
+		root = other->entries_in;
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+		if (!hist_entry__has_pairs(pos)) {
+			pair = hists__add_dummy_entry(leader, pos);
+			if (pair == NULL)
+				return -1;
+			hist_entry__add_pair(pos, pair);
+		}
+	}
+
+	return 0;
+}
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+			  struct perf_sample *sample, bool nonany_branch_mode)
+{
+	struct branch_info *bi;
+
+	/* If we have branch cycles always annotate them. */
+	if (bs && bs->nr && bs->entries[0].flags.cycles) {
+		int i;
+
+		bi = sample__resolve_bstack(sample, al);
+		if (bi) {
+			struct addr_map_symbol *prev = NULL;
+
+			/*
+			 * Ignore errors, still want to process the
+			 * other entries.
+			 *
+			 * For non standard branch modes always
+			 * force no IPC (prev == NULL)
+			 *
+			 * Note that perf stores branches reversed from
+			 * program order!
+			 */
+			for (i = bs->nr - 1; i >= 0; i--) {
+				addr_map_symbol__account_cycles(&bi[i].from,
+					nonany_branch_mode ? NULL : prev,
+					bi[i].flags.cycles);
+				prev = &bi[i].to;
+			}
+			free(bi);
+		}
+	}
+}
+
+size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
+{
+	struct perf_evsel *pos;
+	size_t ret = 0;
+
+	evlist__for_each_entry(evlist, pos) {
+		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
+		ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
+	}
+
+	return ret;
+}
+
+
+u64 hists__total_period(struct hists *hists)
+{
+	return symbol_conf.filter_relative ? hists->stats.total_non_filtered_period :
+		hists->stats.total_period;
+}
+
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
+{
+	char unit;
+	int printed;
+	const struct dso *dso = hists->dso_filter;
+	const struct thread *thread = hists->thread_filter;
+	int socket_id = hists->socket_filter;
+	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+	u64 nr_events = hists->stats.total_period;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[512], sample_freq_str[64] = "";
+	size_t buflen = sizeof(buf);
+	char ref[30] = " show reference callgraph, ";
+	bool enable_ref = false;
+
+	if (symbol_conf.filter_relative) {
+		nr_samples = hists->stats.nr_non_filtered_samples;
+		nr_events = hists->stats.total_non_filtered_period;
+	}
+
+	if (perf_evsel__is_group_event(evsel)) {
+		struct perf_evsel *pos;
+
+		perf_evsel__group_desc(evsel, buf, buflen);
+		ev_name = buf;
+
+		for_each_group_member(pos, evsel) {
+			struct hists *pos_hists = evsel__hists(pos);
+
+			if (symbol_conf.filter_relative) {
+				nr_samples += pos_hists->stats.nr_non_filtered_samples;
+				nr_events += pos_hists->stats.total_non_filtered_period;
+			} else {
+				nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+				nr_events += pos_hists->stats.total_period;
+			}
+		}
+	}
+
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
+
+	if (show_freq)
+		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+	nr_samples = convert_unit(nr_samples, &unit);
+	printed = scnprintf(bf, size,
+			   "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+			   ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+
+
+	if (hists->uid_filter_str)
+		printed += snprintf(bf + printed, size - printed,
+				    ", UID: %s", hists->uid_filter_str);
+	if (thread) {
+		if (hists__has(hists, thread)) {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s(%d)",
+				     (thread->comm_set ? thread__comm_str(thread) : ""),
+				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
+	if (dso)
+		printed += scnprintf(bf + printed, size - printed,
+				    ", DSO: %s", dso->short_name);
+	if (socket_id > -1)
+		printed += scnprintf(bf + printed, size - printed,
+				    ", Processor Socket: %d", socket_id);
+
+	return printed;
+}
+
+int parse_filter_percentage(const struct option *opt __maybe_unused,
+			    const char *arg, int unset __maybe_unused)
+{
+	if (!strcmp(arg, "relative"))
+		symbol_conf.filter_relative = true;
+	else if (!strcmp(arg, "absolute"))
+		symbol_conf.filter_relative = false;
+	else {
+		pr_debug("Invalid percentage: %s\n", arg);
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_hist_config(const char *var, const char *value)
+{
+	if (!strcmp(var, "hist.percentage"))
+		return parse_filter_percentage(NULL, value, 0);
+
+	return 0;
+}
+
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
+{
+	memset(hists, 0, sizeof(*hists));
+	hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
+	hists->entries_in = &hists->entries_in_array[0];
+	hists->entries_collapsed = RB_ROOT;
+	hists->entries = RB_ROOT;
+	pthread_mutex_init(&hists->lock, NULL);
+	hists->socket_filter = -1;
+	hists->hpp_list = hpp_list;
+	INIT_LIST_HEAD(&hists->hpp_formats);
+	return 0;
+}
+
+static void hists__delete_remaining_entries(struct rb_root *root)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	while (!RB_EMPTY_ROOT(root)) {
+		node = rb_first(root);
+		rb_erase(node, root);
+
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		hist_entry__delete(he);
+	}
+}
+
+static void hists__delete_all_entries(struct hists *hists)
+{
+	hists__delete_entries(hists);
+	hists__delete_remaining_entries(&hists->entries_in_array[0]);
+	hists__delete_remaining_entries(&hists->entries_in_array[1]);
+	hists__delete_remaining_entries(&hists->entries_collapsed);
+}
+
+static void hists_evsel__exit(struct perf_evsel *evsel)
+{
+	struct hists *hists = evsel__hists(evsel);
+	struct perf_hpp_fmt *fmt, *pos;
+	struct perf_hpp_list_node *node, *tmp;
+
+	hists__delete_all_entries(hists);
+
+	list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+			list_del(&fmt->list);
+			free(fmt);
+		}
+		list_del(&node->list);
+		free(node);
+	}
+}
+
+static int hists_evsel__init(struct perf_evsel *evsel)
+{
+	struct hists *hists = evsel__hists(evsel);
+
+	__hists__init(hists, &perf_hpp_list);
+	return 0;
+}
+
+/*
+ * XXX We probably need a hists_evsel__exit() to free the hist_entries
+ * stored in the rbtree...
+ */
+
+int hists__init(void)
+{
+	int err = perf_evsel__object_config(sizeof(struct hists_evsel),
+					    hists_evsel__init,
+					    hists_evsel__exit);
+	if (err)
+		fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
+
+	return err;
+}
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+	INIT_LIST_HEAD(&list->fields);
+	INIT_LIST_HEAD(&list->sorts);
+}
diff --git a/util/hist.h b/util/hist.h
new file mode 100644
index 0000000..fbabfd8
--- /dev/null
+++ b/util/hist.h
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HIST_H
+#define __PERF_HIST_H
+
+#include <linux/types.h>
+#include <pthread.h>
+#include "callchain.h"
+#include "evsel.h"
+#include "header.h"
+#include "color.h"
+#include "ui/progress.h"
+
+struct hist_entry;
+struct hist_entry_ops;
+struct addr_location;
+struct symbol;
+
+enum hist_filter {
+	HIST_FILTER__DSO,
+	HIST_FILTER__THREAD,
+	HIST_FILTER__PARENT,
+	HIST_FILTER__SYMBOL,
+	HIST_FILTER__GUEST,
+	HIST_FILTER__HOST,
+	HIST_FILTER__SOCKET,
+	HIST_FILTER__C2C,
+};
+
+enum hist_column {
+	HISTC_SYMBOL,
+	HISTC_DSO,
+	HISTC_THREAD,
+	HISTC_COMM,
+	HISTC_CGROUP_ID,
+	HISTC_PARENT,
+	HISTC_CPU,
+	HISTC_SOCKET,
+	HISTC_SRCLINE,
+	HISTC_SRCFILE,
+	HISTC_MISPREDICT,
+	HISTC_IN_TX,
+	HISTC_ABORT,
+	HISTC_SYMBOL_FROM,
+	HISTC_SYMBOL_TO,
+	HISTC_DSO_FROM,
+	HISTC_DSO_TO,
+	HISTC_LOCAL_WEIGHT,
+	HISTC_GLOBAL_WEIGHT,
+	HISTC_MEM_DADDR_SYMBOL,
+	HISTC_MEM_DADDR_DSO,
+	HISTC_MEM_PHYS_DADDR,
+	HISTC_MEM_LOCKED,
+	HISTC_MEM_TLB,
+	HISTC_MEM_LVL,
+	HISTC_MEM_SNOOP,
+	HISTC_MEM_DCACHELINE,
+	HISTC_MEM_IADDR_SYMBOL,
+	HISTC_TRANSACTION,
+	HISTC_CYCLES,
+	HISTC_SRCLINE_FROM,
+	HISTC_SRCLINE_TO,
+	HISTC_TRACE,
+	HISTC_SYM_SIZE,
+	HISTC_DSO_SIZE,
+	HISTC_NR_COLS, /* Last entry */
+};
+
+struct thread;
+struct dso;
+
+struct hists {
+	struct rb_root		entries_in_array[2];
+	struct rb_root		*entries_in;
+	struct rb_root		entries;
+	struct rb_root		entries_collapsed;
+	u64			nr_entries;
+	u64			nr_non_filtered_entries;
+	u64			callchain_period;
+	u64			callchain_non_filtered_period;
+	struct thread		*thread_filter;
+	const struct dso	*dso_filter;
+	const char		*uid_filter_str;
+	const char		*symbol_filter_str;
+	pthread_mutex_t		lock;
+	struct events_stats	stats;
+	u64			event_stream;
+	u16			col_len[HISTC_NR_COLS];
+	int			socket_filter;
+	struct perf_hpp_list	*hpp_list;
+	struct list_head	hpp_formats;
+	int			nr_hpp_node;
+};
+
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+	int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+	int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+	int total;
+	int curr;
+
+	bool hide_unresolved;
+
+	struct perf_evsel *evsel;
+	struct perf_sample *sample;
+	struct hist_entry *he;
+	struct symbol *parent;
+	void *priv;
+
+	const struct hist_iter_ops *ops;
+	/* user-defined callback function (optional) */
+	int (*add_entry_cb)(struct hist_entry_iter *iter,
+			    struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self);
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+					struct hist_entry_ops *ops,
+					struct addr_location *al,
+					struct symbol *sym_parent,
+					struct branch_info *bi,
+					struct mem_info *mi,
+					struct perf_sample *sample,
+					bool sample_self);
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+			 int max_stack_depth, void *arg);
+
+struct perf_hpp;
+struct perf_hpp_fmt;
+
+int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+int hist_entry__transaction_len(void);
+int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
+			      struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed);
+void hist_entry__delete(struct hist_entry *he);
+
+typedef int (*hists__resort_cb_t)(struct hist_entry *he);
+
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
+void hists__output_resort(struct hists *hists, struct ui_progress *prog);
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+			     hists__resort_cb_t cb);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
+void hists__delete_entries(struct hists *hists);
+void hists__output_recalc_col_len(struct hists *hists, int max_rows);
+
+u64 hists__total_period(struct hists *hists);
+void hists__reset_stats(struct hists *hists);
+void hists__inc_stats(struct hists *hists, struct hist_entry *h);
+void hists__inc_nr_events(struct hists *hists, u32 type);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
+void events_stats__inc(struct events_stats *stats, u32 type);
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+		      int max_cols, float min_pcnt, FILE *fp,
+		      bool use_callchain);
+size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp);
+
+void hists__filter_by_dso(struct hists *hists);
+void hists__filter_by_thread(struct hists *hists);
+void hists__filter_by_symbol(struct hists *hists);
+void hists__filter_by_socket(struct hists *hists);
+
+static inline bool hists__has_filter(struct hists *hists)
+{
+	return hists->thread_filter || hists->dso_filter ||
+		hists->symbol_filter_str || (hists->socket_filter > -1);
+}
+
+u16 hists__col_len(struct hists *hists, enum hist_column col);
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len);
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len);
+void hists__reset_col_len(struct hists *hists);
+void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
+
+void hists__match(struct hists *leader, struct hists *other);
+int hists__link(struct hists *leader, struct hists *other);
+
+struct hists_evsel {
+	struct perf_evsel evsel;
+	struct hists	  hists;
+};
+
+static inline struct perf_evsel *hists_to_evsel(struct hists *hists)
+{
+	struct hists_evsel *hevsel = container_of(hists, struct hists_evsel, hists);
+	return &hevsel->evsel;
+}
+
+static inline struct hists *evsel__hists(struct perf_evsel *evsel)
+{
+	struct hists_evsel *hevsel = (struct hists_evsel *)evsel;
+	return &hevsel->hists;
+}
+
+int hists__init(void);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
+
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
+
+struct perf_hpp {
+	char *buf;
+	size_t size;
+	const char *sep;
+	void *ptr;
+};
+
+struct perf_hpp_fmt {
+	const char *name;
+	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		      struct hists *hists, int line, int *span);
+	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hists *hists);
+	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hist_entry *he);
+	int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hist_entry *he);
+	int64_t (*cmp)(struct perf_hpp_fmt *fmt,
+		       struct hist_entry *a, struct hist_entry *b);
+	int64_t (*collapse)(struct perf_hpp_fmt *fmt,
+			    struct hist_entry *a, struct hist_entry *b);
+	int64_t (*sort)(struct perf_hpp_fmt *fmt,
+			struct hist_entry *a, struct hist_entry *b);
+	bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+	void (*free)(struct perf_hpp_fmt *fmt);
+
+	struct list_head list;
+	struct list_head sort_list;
+	bool elide;
+	int len;
+	int user_len;
+	int idx;
+	int level;
+};
+
+struct perf_hpp_list {
+	struct list_head fields;
+	struct list_head sorts;
+
+	int nr_header_lines;
+	int need_collapse;
+	int parent;
+	int sym;
+	int dso;
+	int socket;
+	int thread;
+	int comm;
+};
+
+extern struct perf_hpp_list perf_hpp_list;
+
+struct perf_hpp_list_node {
+	struct list_head	list;
+	struct perf_hpp_list	hpp;
+	int			level;
+	bool			skip;
+};
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format);
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format);
+
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__prepend_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+	list_for_each_entry(format, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+	list_for_each_entry(format, &(_list)->sorts, sort_list)
+
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
+
+#define hists__for_each_format(hists, format) \
+	perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
+
+#define hists__for_each_sort_list(hists, format) \
+	perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
+
+extern struct perf_hpp_fmt perf_hpp__format[];
+
+enum {
+	/* Matches perf_hpp__format array. */
+	PERF_HPP__OVERHEAD,
+	PERF_HPP__OVERHEAD_SYS,
+	PERF_HPP__OVERHEAD_US,
+	PERF_HPP__OVERHEAD_GUEST_SYS,
+	PERF_HPP__OVERHEAD_GUEST_US,
+	PERF_HPP__OVERHEAD_ACC,
+	PERF_HPP__SAMPLES,
+	PERF_HPP__PERIOD,
+
+	PERF_HPP__MAX_INDEX
+};
+
+void perf_hpp__init(void);
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
+void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct perf_evlist *evlist);
+
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
+					 struct hists *hists)
+{
+	if (format->elide)
+		return true;
+
+	if (perf_hpp__is_dynamic_entry(format) &&
+	    !perf_hpp__defined_dynamic_entry(format, hists))
+		return true;
+
+	return false;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
+
+typedef u64 (*hpp_field_fn)(struct hist_entry *he);
+typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
+typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
+
+int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+	     struct hist_entry *he, hpp_field_fn get_field,
+	     const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		 struct hist_entry *he, hpp_field_fn get_field,
+		 const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+
+static inline void advance_hpp(struct perf_hpp *hpp, int inc)
+{
+	hpp->buf  += inc;
+	hpp->size -= inc;
+}
+
+static inline size_t perf_hpp__use_color(void)
+{
+	return !symbol_conf.field_sep;
+}
+
+static inline size_t perf_hpp__color_overhead(void)
+{
+	return perf_hpp__use_color() ?
+	       (COLOR_MAXLEN + sizeof(PERF_COLOR_RESET)) * PERF_HPP__MAX_INDEX
+	       : 0;
+}
+
+struct perf_evlist;
+
+struct hist_browser_timer {
+	void (*timer)(void *arg);
+	void *arg;
+	int refresh;
+};
+
+#ifdef HAVE_SLANG_SUPPORT
+#include "../ui/keysyms.h"
+int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt);
+
+int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt);
+
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
+				  struct hist_browser_timer *hbt,
+				  float min_pcnt,
+				  struct perf_env *env,
+				  bool warn_lost_event);
+int script_browse(const char *script_opt);
+#else
+static inline
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
+				  const char *help __maybe_unused,
+				  struct hist_browser_timer *hbt __maybe_unused,
+				  float min_pcnt __maybe_unused,
+				  struct perf_env *env __maybe_unused,
+				  bool warn_lost_event __maybe_unused)
+{
+	return 0;
+}
+static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct hist_browser_timer *hbt __maybe_unused)
+{
+	return 0;
+}
+
+static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct hist_browser_timer *hbt __maybe_unused)
+{
+	return 0;
+}
+
+static inline int script_browse(const char *script_opt __maybe_unused)
+{
+	return 0;
+}
+
+#define K_LEFT  -1000
+#define K_RIGHT -2000
+#define K_SWITCH_INPUT_DATA -3000
+#endif
+
+unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+			  struct perf_sample *sample, bool nonany_branch_mode);
+
+struct option;
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
+int perf_hist_config(const char *var, const char *value);
+
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+	HMD_NORMAL,
+	HMD_FORCE_SIBLING,
+	HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+				    enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+	return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq);
+
+static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size)
+{
+	return __hists__scnprintf_title(hists, bf, size, true);
+}
+
+#endif	/* __PERF_HIST_H */
diff --git a/util/include/asm/asm-offsets.h b/util/include/asm/asm-offsets.h
new file mode 100644
index 0000000..3aff4cf
--- /dev/null
+++ b/util/include/asm/asm-offsets.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* stub */
diff --git a/util/include/asm/cpufeature.h b/util/include/asm/cpufeature.h
new file mode 100644
index 0000000..2270481
--- /dev/null
+++ b/util/include/asm/cpufeature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif	/* PERF_CPUFEATURE_H */
diff --git a/util/include/asm/dwarf2.h b/util/include/asm/dwarf2.h
new file mode 100644
index 0000000..e9876be
--- /dev/null
+++ b/util/include/asm/dwarf2.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
+
+#endif	/* PERF_DWARF2_H */
+
diff --git a/util/include/asm/swab.h b/util/include/asm/swab.h
new file mode 100644
index 0000000..ed53894
--- /dev/null
+++ b/util/include/asm/swab.h
@@ -0,0 +1 @@
+/* stub */
diff --git a/util/include/asm/system.h b/util/include/asm/system.h
new file mode 100644
index 0000000..710cecc
--- /dev/null
+++ b/util/include/asm/system.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/util/include/asm/uaccess.h b/util/include/asm/uaccess.h
new file mode 100644
index 0000000..6a6f4b9
--- /dev/null
+++ b/util/include/asm/uaccess.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_ASM_UACCESS_H_
+#define _PERF_ASM_UACCESS_H_
+
+#define __get_user(src, dest)						\
+({									\
+	(src) = *dest;							\
+	0;								\
+})
+
+#define get_user	__get_user
+
+#define access_ok(type, addr, size)	1
+
+#endif
diff --git a/util/include/dwarf-regs.h b/util/include/dwarf-regs.h
new file mode 100644
index 0000000..7d99a08
--- /dev/null
+++ b/util/include/dwarf-regs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_DWARF_REGS_H_
+#define _PERF_DWARF_REGS_H_
+
+#ifdef HAVE_DWARF_SUPPORT
+const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
+#endif
+
+#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+/*
+ * Arch should support fetching the offset of a register in pt_regs
+ * by its name. See kernel's regs_query_register_offset in
+ * arch/xxx/kernel/ptrace.c.
+ */
+int regs_query_register_offset(const char *name);
+#endif
+#endif
diff --git a/util/include/linux/ctype.h b/util/include/linux/ctype.h
new file mode 100644
index 0000000..a53d4ee
--- /dev/null
+++ b/util/include/linux/ctype.h
@@ -0,0 +1 @@
+#include "../util.h"
diff --git a/util/include/linux/linkage.h b/util/include/linux/linkage.h
new file mode 100644
index 0000000..f01d48a
--- /dev/null
+++ b/util/include/linux/linkage.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name)				\
+	.globl name;				\
+	name:
+
+#define ENDPROC(name)
+
+#endif	/* PERF_LINUX_LINKAGE_H_ */
diff --git a/util/intel-bts.c b/util/intel-bts.c
new file mode 100644
index 0000000..72db274
--- /dev/null
+++ b/util/intel-bts.c
@@ -0,0 +1,946 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "tsc.h"
+#include "auxtrace.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-bts.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_BTS_ERR_NOINSN  5
+#define INTEL_BTS_ERR_LOST    9
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le64_to_cpu bswap_64
+#else
+#define le64_to_cpu
+#endif
+
+struct intel_bts {
+	struct auxtrace			auxtrace;
+	struct auxtrace_queues		queues;
+	struct auxtrace_heap		heap;
+	u32				auxtrace_type;
+	struct perf_session		*session;
+	struct machine			*machine;
+	bool				sampling_mode;
+	bool				snapshot_mode;
+	bool				data_queued;
+	u32				pmu_type;
+	struct perf_tsc_conversion	tc;
+	bool				cap_user_time_zero;
+	struct itrace_synth_opts	synth_opts;
+	bool				sample_branches;
+	u32				branches_filter;
+	u64				branches_sample_type;
+	u64				branches_id;
+	size_t				branches_event_size;
+	unsigned long			num_events;
+};
+
+struct intel_bts_queue {
+	struct intel_bts	*bts;
+	unsigned int		queue_nr;
+	struct auxtrace_buffer	*buffer;
+	bool			on_heap;
+	bool			done;
+	pid_t			pid;
+	pid_t			tid;
+	int			cpu;
+	u64			time;
+	struct intel_pt_insn	intel_pt_insn;
+	u32			sample_flags;
+};
+
+struct branch {
+	u64 from;
+	u64 to;
+	u64 misc;
+};
+
+static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
+			   unsigned char *buf, size_t len)
+{
+	struct branch *branch;
+	size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
+	const char *color = PERF_COLOR_BLUE;
+
+	color_fprintf(stdout, color,
+		      ". ... Intel BTS data: size %zu bytes\n",
+		      len);
+
+	while (len) {
+		if (len >= br_sz)
+			sz = br_sz;
+		else
+			sz = len;
+		printf(".");
+		color_fprintf(stdout, color, "  %08x: ", pos);
+		for (i = 0; i < sz; i++)
+			color_fprintf(stdout, color, " %02x", buf[i]);
+		for (; i < br_sz; i++)
+			color_fprintf(stdout, color, "   ");
+		if (len >= br_sz) {
+			branch = (struct branch *)buf;
+			color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
+				      le64_to_cpu(branch->from),
+				      le64_to_cpu(branch->to),
+				      le64_to_cpu(branch->misc) & 0x10 ?
+							"pred" : "miss");
+		} else {
+			color_fprintf(stdout, color, " Bad record!\n");
+		}
+		pos += sz;
+		buf += sz;
+		len -= sz;
+	}
+}
+
+static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
+				 size_t len)
+{
+	printf(".\n");
+	intel_bts_dump(bts, buf, len);
+}
+
+static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
+{
+	union perf_event event;
+	int err;
+
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
+			     sample->tid, 0, "Lost trace data");
+
+	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+	if (err)
+		pr_err("Intel BTS: failed to deliver error event, error %d\n",
+		       err);
+
+	return err;
+}
+
+static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
+						     unsigned int queue_nr)
+{
+	struct intel_bts_queue *btsq;
+
+	btsq = zalloc(sizeof(struct intel_bts_queue));
+	if (!btsq)
+		return NULL;
+
+	btsq->bts = bts;
+	btsq->queue_nr = queue_nr;
+	btsq->pid = -1;
+	btsq->tid = -1;
+	btsq->cpu = -1;
+
+	return btsq;
+}
+
+static int intel_bts_setup_queue(struct intel_bts *bts,
+				 struct auxtrace_queue *queue,
+				 unsigned int queue_nr)
+{
+	struct intel_bts_queue *btsq = queue->priv;
+
+	if (list_empty(&queue->head))
+		return 0;
+
+	if (!btsq) {
+		btsq = intel_bts_alloc_queue(bts, queue_nr);
+		if (!btsq)
+			return -ENOMEM;
+		queue->priv = btsq;
+
+		if (queue->cpu != -1)
+			btsq->cpu = queue->cpu;
+		btsq->tid = queue->tid;
+	}
+
+	if (bts->sampling_mode)
+		return 0;
+
+	if (!btsq->on_heap && !btsq->buffer) {
+		int ret;
+
+		btsq->buffer = auxtrace_buffer__next(queue, NULL);
+		if (!btsq->buffer)
+			return 0;
+
+		ret = auxtrace_heap__add(&bts->heap, queue_nr,
+					 btsq->buffer->reference);
+		if (ret)
+			return ret;
+		btsq->on_heap = true;
+	}
+
+	return 0;
+}
+
+static int intel_bts_setup_queues(struct intel_bts *bts)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < bts->queues.nr_queues; i++) {
+		ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
+					    i);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static inline int intel_bts_update_queues(struct intel_bts *bts)
+{
+	if (bts->queues.new_data) {
+		bts->queues.new_data = false;
+		return intel_bts_setup_queues(bts);
+	}
+	return 0;
+}
+
+static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
+					     unsigned char *buf_b, size_t len_b)
+{
+	size_t offs, len;
+
+	if (len_a > len_b)
+		offs = len_a - len_b;
+	else
+		offs = 0;
+
+	for (; offs < len_a; offs += sizeof(struct branch)) {
+		len = len_a - offs;
+		if (!memcmp(buf_a + offs, buf_b, len))
+			return buf_b + len;
+	}
+
+	return buf_b;
+}
+
+static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
+				    struct auxtrace_buffer *b)
+{
+	struct auxtrace_buffer *a;
+	void *start;
+
+	if (b->list.prev == &queue->head)
+		return 0;
+	a = list_entry(b->list.prev, struct auxtrace_buffer, list);
+	start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
+	if (!start)
+		return -EINVAL;
+	b->use_size = b->data + b->size - start;
+	b->use_data = start;
+	return 0;
+}
+
+static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
+					 struct branch *branch)
+{
+	int ret;
+	struct intel_bts *bts = btsq->bts;
+	union perf_event event;
+	struct perf_sample sample = { .ip = 0, };
+
+	if (bts->synth_opts.initial_skip &&
+	    bts->num_events++ <= bts->synth_opts.initial_skip)
+		return 0;
+
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = PERF_RECORD_MISC_USER;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
+	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.ip = le64_to_cpu(branch->from);
+	sample.pid = btsq->pid;
+	sample.tid = btsq->tid;
+	sample.addr = le64_to_cpu(branch->to);
+	sample.id = btsq->bts->branches_id;
+	sample.stream_id = btsq->bts->branches_id;
+	sample.period = 1;
+	sample.cpu = btsq->cpu;
+	sample.flags = btsq->sample_flags;
+	sample.insn_len = btsq->intel_pt_insn.length;
+	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
+
+	if (bts->synth_opts.inject) {
+		event.sample.header.size = bts->branches_event_size;
+		ret = perf_event__synthesize_sample(&event,
+						    bts->branches_sample_type,
+						    0, &sample);
+		if (ret)
+			return ret;
+	}
+
+	ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
+	if (ret)
+		pr_err("Intel BTS: failed to deliver branch event, error %d\n",
+		       ret);
+
+	return ret;
+}
+
+static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
+{
+	struct machine *machine = btsq->bts->machine;
+	struct thread *thread;
+	struct addr_location al;
+	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+	ssize_t len;
+	int x86_64;
+	uint8_t cpumode;
+	int err = -1;
+
+	if (machine__kernel_ip(machine, ip))
+		cpumode = PERF_RECORD_MISC_KERNEL;
+	else
+		cpumode = PERF_RECORD_MISC_USER;
+
+	thread = machine__find_thread(machine, -1, btsq->tid);
+	if (!thread)
+		return -1;
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+	if (!al.map || !al.map->dso)
+		goto out_put;
+
+	len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
+				  INTEL_PT_INSN_BUF_SZ);
+	if (len <= 0)
+		goto out_put;
+
+	/* Load maps to ensure dso->is_64_bit has been updated */
+	map__load(al.map);
+
+	x86_64 = al.map->dso->is_64_bit;
+
+	if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
+		goto out_put;
+
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
+				 pid_t tid, u64 ip)
+{
+	union perf_event event;
+	int err;
+
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
+			     "Failed to get instruction");
+
+	err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+	if (err)
+		pr_err("Intel BTS: failed to deliver error event, error %d\n",
+		       err);
+
+	return err;
+}
+
+static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
+				     struct branch *branch)
+{
+	int err;
+
+	if (!branch->from) {
+		if (branch->to)
+			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+					     PERF_IP_FLAG_TRACE_BEGIN;
+		else
+			btsq->sample_flags = 0;
+		btsq->intel_pt_insn.length = 0;
+	} else if (!branch->to) {
+		btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+				     PERF_IP_FLAG_TRACE_END;
+		btsq->intel_pt_insn.length = 0;
+	} else {
+		err = intel_bts_get_next_insn(btsq, branch->from);
+		if (err) {
+			btsq->sample_flags = 0;
+			btsq->intel_pt_insn.length = 0;
+			if (!btsq->bts->synth_opts.errors)
+				return 0;
+			err = intel_bts_synth_error(btsq->bts, btsq->cpu,
+						    btsq->pid, btsq->tid,
+						    branch->from);
+			return err;
+		}
+		btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
+		/* Check for an async branch into the kernel */
+		if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
+		    machine__kernel_ip(btsq->bts->machine, branch->to) &&
+		    btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
+					   PERF_IP_FLAG_CALL |
+					   PERF_IP_FLAG_SYSCALLRET))
+			btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+					     PERF_IP_FLAG_CALL |
+					     PERF_IP_FLAG_ASYNC |
+					     PERF_IP_FLAG_INTERRUPT;
+	}
+
+	return 0;
+}
+
+static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
+				    struct auxtrace_buffer *buffer,
+				    struct thread *thread)
+{
+	struct branch *branch;
+	size_t sz, bsz = sizeof(struct branch);
+	u32 filter = btsq->bts->branches_filter;
+	int err = 0;
+
+	if (buffer->use_data) {
+		sz = buffer->use_size;
+		branch = buffer->use_data;
+	} else {
+		sz = buffer->size;
+		branch = buffer->data;
+	}
+
+	if (!btsq->bts->sample_branches)
+		return 0;
+
+	for (; sz > bsz; branch += 1, sz -= bsz) {
+		if (!branch->from && !branch->to)
+			continue;
+		intel_bts_get_branch_type(btsq, branch);
+		if (btsq->bts->synth_opts.thread_stack)
+			thread_stack__event(thread, btsq->sample_flags,
+					    le64_to_cpu(branch->from),
+					    le64_to_cpu(branch->to),
+					    btsq->intel_pt_insn.length,
+					    buffer->buffer_nr + 1);
+		if (filter && !(filter & btsq->sample_flags))
+			continue;
+		err = intel_bts_synth_branch_sample(btsq, branch);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
+{
+	struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
+	struct auxtrace_queue *queue;
+	struct thread *thread;
+	int err;
+
+	if (btsq->done)
+		return 1;
+
+	if (btsq->pid == -1) {
+		thread = machine__find_thread(btsq->bts->machine, -1,
+					      btsq->tid);
+		if (thread)
+			btsq->pid = thread->pid_;
+	} else {
+		thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
+						 btsq->tid);
+	}
+
+	queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
+
+	if (!buffer)
+		buffer = auxtrace_buffer__next(queue, NULL);
+
+	if (!buffer) {
+		if (!btsq->bts->sampling_mode)
+			btsq->done = 1;
+		err = 1;
+		goto out_put;
+	}
+
+	/* Currently there is no support for split buffers */
+	if (buffer->consecutive) {
+		err = -EINVAL;
+		goto out_put;
+	}
+
+	if (!buffer->data) {
+		int fd = perf_data__fd(btsq->bts->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data) {
+			err = -ENOMEM;
+			goto out_put;
+		}
+	}
+
+	if (btsq->bts->snapshot_mode && !buffer->consecutive &&
+	    intel_bts_do_fix_overlap(queue, buffer)) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+
+	if (!btsq->bts->synth_opts.callchain &&
+	    !btsq->bts->synth_opts.thread_stack && thread &&
+	    (!old_buffer || btsq->bts->sampling_mode ||
+	     (btsq->bts->snapshot_mode && !buffer->consecutive)))
+		thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+
+	err = intel_bts_process_buffer(btsq, buffer, thread);
+
+	auxtrace_buffer__drop_data(buffer);
+
+	btsq->buffer = auxtrace_buffer__next(queue, buffer);
+	if (btsq->buffer) {
+		if (timestamp)
+			*timestamp = btsq->buffer->reference;
+	} else {
+		if (!btsq->bts->sampling_mode)
+			btsq->done = 1;
+	}
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
+{
+	u64 ts = 0;
+	int ret;
+
+	while (1) {
+		ret = intel_bts_process_queue(btsq, &ts);
+		if (ret < 0)
+			return ret;
+		if (ret)
+			break;
+	}
+	return 0;
+}
+
+static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
+{
+	struct auxtrace_queues *queues = &bts->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &bts->queues.queue_array[i];
+		struct intel_bts_queue *btsq = queue->priv;
+
+		if (btsq && btsq->tid == tid)
+			return intel_bts_flush_queue(btsq);
+	}
+	return 0;
+}
+
+static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
+{
+	while (1) {
+		unsigned int queue_nr;
+		struct auxtrace_queue *queue;
+		struct intel_bts_queue *btsq;
+		u64 ts = 0;
+		int ret;
+
+		if (!bts->heap.heap_cnt)
+			return 0;
+
+		if (bts->heap.heap_array[0].ordinal > timestamp)
+			return 0;
+
+		queue_nr = bts->heap.heap_array[0].queue_nr;
+		queue = &bts->queues.queue_array[queue_nr];
+		btsq = queue->priv;
+
+		auxtrace_heap__pop(&bts->heap);
+
+		ret = intel_bts_process_queue(btsq, &ts);
+		if (ret < 0) {
+			auxtrace_heap__add(&bts->heap, queue_nr, ts);
+			return ret;
+		}
+
+		if (!ret) {
+			ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		} else {
+			btsq->on_heap = false;
+		}
+	}
+
+	return 0;
+}
+
+static int intel_bts_process_event(struct perf_session *session,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct perf_tool *tool)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+	u64 timestamp;
+	int err;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("Intel BTS requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && sample->time != (u64)-1)
+		timestamp = perf_time_to_tsc(sample->time, &bts->tc);
+	else
+		timestamp = 0;
+
+	err = intel_bts_update_queues(bts);
+	if (err)
+		return err;
+
+	err = intel_bts_process_queues(bts, timestamp);
+	if (err)
+		return err;
+	if (event->header.type == PERF_RECORD_EXIT) {
+		err = intel_bts_process_tid_exit(bts, event->fork.tid);
+		if (err)
+			return err;
+	}
+
+	if (event->header.type == PERF_RECORD_AUX &&
+	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+	    bts->synth_opts.errors)
+		err = intel_bts_lost(bts, sample);
+
+	return err;
+}
+
+static int intel_bts_process_auxtrace_event(struct perf_session *session,
+					    union perf_event *event,
+					    struct perf_tool *tool __maybe_unused)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+
+	if (bts->sampling_mode)
+		return 0;
+
+	if (!bts->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t data_offset;
+		int fd = perf_data__fd(session->data);
+		int err;
+
+		if (perf_data__is_pipe(session->data)) {
+			data_offset = 0;
+		} else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&bts->queues, session, event,
+						 data_offset, &buffer);
+		if (err)
+			return err;
+
+		/* Dump here now we have copied a piped trace out of the pipe */
+		if (dump_trace) {
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				intel_bts_dump_event(bts, buffer->data,
+						     buffer->size);
+				auxtrace_buffer__put_data(buffer);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int intel_bts_flush(struct perf_session *session,
+			   struct perf_tool *tool __maybe_unused)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+	int ret;
+
+	if (dump_trace || bts->sampling_mode)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	ret = intel_bts_update_queues(bts);
+	if (ret < 0)
+		return ret;
+
+	return intel_bts_process_queues(bts, MAX_TIMESTAMP);
+}
+
+static void intel_bts_free_queue(void *priv)
+{
+	struct intel_bts_queue *btsq = priv;
+
+	if (!btsq)
+		return;
+	free(btsq);
+}
+
+static void intel_bts_free_events(struct perf_session *session)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+	struct auxtrace_queues *queues = &bts->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		intel_bts_free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+	auxtrace_queues__free(queues);
+}
+
+static void intel_bts_free(struct perf_session *session)
+{
+	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+					     auxtrace);
+
+	auxtrace_heap__free(&bts->heap);
+	intel_bts_free_events(session);
+	session->auxtrace = NULL;
+	free(bts);
+}
+
+struct intel_bts_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int intel_bts_event_synth(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_sample *sample __maybe_unused,
+				 struct machine *machine __maybe_unused)
+{
+	struct intel_bts_synth *intel_bts_synth =
+			container_of(tool, struct intel_bts_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(intel_bts_synth->session,
+						 event, NULL);
+}
+
+static int intel_bts_synth_event(struct perf_session *session,
+				 struct perf_event_attr *attr, u64 id)
+{
+	struct intel_bts_synth intel_bts_synth;
+
+	memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
+	intel_bts_synth.session = session;
+
+	return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
+					   &id, intel_bts_event_synth);
+}
+
+static int intel_bts_synth_events(struct intel_bts *bts,
+				  struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	bool found = false;
+	u64 id;
+	int err;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == bts->pmu_type && evsel->ids) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pr_debug("There are no selected events with Intel BTS data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD;
+	attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+	attr.exclude_user = evsel->attr.exclude_user;
+	attr.exclude_kernel = evsel->attr.exclude_kernel;
+	attr.exclude_hv = evsel->attr.exclude_hv;
+	attr.exclude_host = evsel->attr.exclude_host;
+	attr.exclude_guest = evsel->attr.exclude_guest;
+	attr.sample_id_all = evsel->attr.sample_id_all;
+	attr.read_format = evsel->attr.read_format;
+
+	id = evsel->id[0] + 1000000000;
+	if (!id)
+		id = 1;
+
+	if (bts->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+			 id, (u64)attr.sample_type);
+		err = intel_bts_synth_event(session, &attr, id);
+		if (err) {
+			pr_err("%s: failed to synthesize 'branches' event type\n",
+			       __func__);
+			return err;
+		}
+		bts->sample_branches = true;
+		bts->branches_sample_type = attr.sample_type;
+		bts->branches_id = id;
+		/*
+		 * We only use sample types from PERF_SAMPLE_MASK so we can use
+		 * __perf_evsel__sample_size() here.
+		 */
+		bts->branches_event_size = sizeof(struct sample_event) +
+				__perf_evsel__sample_size(attr.sample_type);
+	}
+
+	return 0;
+}
+
+static const char * const intel_bts_info_fmts[] = {
+	[INTEL_BTS_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
+	[INTEL_BTS_TIME_SHIFT]		= "  Time Shift         %"PRIu64"\n",
+	[INTEL_BTS_TIME_MULT]		= "  Time Muliplier     %"PRIu64"\n",
+	[INTEL_BTS_TIME_ZERO]		= "  Time Zero          %"PRIu64"\n",
+	[INTEL_BTS_CAP_USER_TIME_ZERO]	= "  Cap Time Zero      %"PRId64"\n",
+	[INTEL_BTS_SNAPSHOT_MODE]	= "  Snapshot mode      %"PRId64"\n",
+};
+
+static void intel_bts_print_info(u64 *arr, int start, int finish)
+{
+	int i;
+
+	if (!dump_trace)
+		return;
+
+	for (i = start; i <= finish; i++)
+		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
+}
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+				    struct perf_session *session)
+{
+	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+	size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
+	struct intel_bts *bts;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+					min_sz)
+		return -EINVAL;
+
+	bts = zalloc(sizeof(struct intel_bts));
+	if (!bts)
+		return -ENOMEM;
+
+	err = auxtrace_queues__init(&bts->queues);
+	if (err)
+		goto err_free;
+
+	bts->session = session;
+	bts->machine = &session->machines.host; /* No kvm support */
+	bts->auxtrace_type = auxtrace_info->type;
+	bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
+	bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
+	bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
+	bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
+	bts->cap_user_time_zero =
+			auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
+	bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
+
+	bts->sampling_mode = false;
+
+	bts->auxtrace.process_event = intel_bts_process_event;
+	bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
+	bts->auxtrace.flush_events = intel_bts_flush;
+	bts->auxtrace.free_events = intel_bts_free_events;
+	bts->auxtrace.free = intel_bts_free;
+	session->auxtrace = &bts->auxtrace;
+
+	intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
+			     INTEL_BTS_SNAPSHOT_MODE);
+
+	if (dump_trace)
+		return 0;
+
+	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+		bts->synth_opts = *session->itrace_synth_opts;
+	} else {
+		itrace_synth_opts__set_default(&bts->synth_opts);
+		if (session->itrace_synth_opts)
+			bts->synth_opts.thread_stack =
+				session->itrace_synth_opts->thread_stack;
+	}
+
+	if (bts->synth_opts.calls)
+		bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+					PERF_IP_FLAG_TRACE_END;
+	if (bts->synth_opts.returns)
+		bts->branches_filter |= PERF_IP_FLAG_RETURN |
+					PERF_IP_FLAG_TRACE_BEGIN;
+
+	err = intel_bts_synth_events(bts, session);
+	if (err)
+		goto err_free_queues;
+
+	err = auxtrace_queues__process_index(&bts->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	if (bts->queues.populated)
+		bts->data_queued = true;
+
+	return 0;
+
+err_free_queues:
+	auxtrace_queues__free(&bts->queues);
+	session->auxtrace = NULL;
+err_free:
+	free(bts);
+	return err;
+}
diff --git a/util/intel-bts.h b/util/intel-bts.h
new file mode 100644
index 0000000..ca65e21
--- /dev/null
+++ b/util/intel-bts.h
@@ -0,0 +1,43 @@
+/*
+ * intel-bts.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_BTS_H__
+#define INCLUDE__PERF_INTEL_BTS_H__
+
+#define INTEL_BTS_PMU_NAME "intel_bts"
+
+enum {
+	INTEL_BTS_PMU_TYPE,
+	INTEL_BTS_TIME_SHIFT,
+	INTEL_BTS_TIME_MULT,
+	INTEL_BTS_TIME_ZERO,
+	INTEL_BTS_CAP_USER_TIME_ZERO,
+	INTEL_BTS_SNAPSHOT_MODE,
+	INTEL_BTS_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+
+struct auxtrace_record *intel_bts_recording_init(int *err);
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+				    struct perf_session *session);
+
+#endif
diff --git a/util/intel-pt-decoder/Build b/util/intel-pt-decoder/Build
new file mode 100644
index 0000000..1b704fb
--- /dev/null
+++ b/util/intel-pt-decoder/Build
@@ -0,0 +1,36 @@
+libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+
+inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
+inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
+
+$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+	$(call rule_mkdir)
+	@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
+
+# Busybox's diff doesn't have -I, avoid warning in the case
+
+$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+	@(diff -I 2>&1 | grep -q 'option requires an argument' && \
+	test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+	((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \
+	((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \
+	((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \
+	((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \
+	((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \
+	((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \
+	((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \
+	(echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
+
+CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+
+ifeq ($(CC_NO_CLANG), 1)
+  CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+endif
diff --git a/util/intel-pt-decoder/gen-insn-attr-x86.awk b/util/intel-pt-decoder/gen-insn-attr-x86.awk
new file mode 100644
index 0000000..ddd5c4c
--- /dev/null
+++ b/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -0,0 +1,392 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+	if (sprintf("%x", 0) != "0")
+		return "Your awk has a printf-format problem."
+	return ""
+}
+
+# Clear working vars
+function clear_vars() {
+	delete table
+	delete lptable2
+	delete lptable1
+	delete lptable3
+	eid = -1 # escape id
+	gid = -1 # group id
+	aid = -1 # AVX id
+	tname = ""
+}
+
+BEGIN {
+	# Implementation error checking
+	awkchecked = check_awk_implement()
+	if (awkchecked != "") {
+		print "Error: " awkchecked > "/dev/stderr"
+		print "Please try to use gawk." > "/dev/stderr"
+		exit 1
+	}
+
+	# Setup generating tables
+	print "/* x86 opcode map generated from x86-opcode-map.txt */"
+	print "/* Do not change this code. */\n"
+	ggid = 1
+	geid = 1
+	gaid = 0
+	delete etable
+	delete gtable
+	delete atable
+
+	opnd_expr = "^[A-Za-z/]"
+	ext_expr = "^\\("
+	sep_expr = "^\\|$"
+	group_expr = "^Grp[0-9A-Za-z]+"
+
+	imm_expr = "^[IJAOL][a-z]"
+	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+	imm_flag["Ob"] = "INAT_MOFFSET"
+	imm_flag["Ov"] = "INAT_MOFFSET"
+	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+
+	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
+	force64_expr = "\\([df]64\\)"
+	rex_expr = "^REX(\\.[XRWB]+)*"
+	fpu_expr = "^ESC" # TODO
+
+	lprefix1_expr = "\\((66|!F3)\\)"
+	lprefix2_expr = "\\(F3\\)"
+	lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+	lprefix_expr = "\\((66|F2|F3)\\)"
+	max_lprefix = 4
+
+	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
+	# accepts VEX prefix
+	vexok_opcode_expr = "^[vk].*"
+	vexok_expr = "\\(v1\\)"
+	# All opcodes with (v) superscript supports *only* VEX prefix
+	vexonly_expr = "\\(v\\)"
+	# All opcodes with (ev) superscript supports *only* EVEX prefix
+	evexonly_expr = "\\(ev\\)"
+
+	prefix_expr = "\\(Prefix\\)"
+	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+	prefix_num["REPNE"] = "INAT_PFX_REPNE"
+	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+	prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+	prefix_num["XRELEASE"] = "INAT_PFX_REPE"
+	prefix_num["LOCK"] = "INAT_PFX_LOCK"
+	prefix_num["SEG=CS"] = "INAT_PFX_CS"
+	prefix_num["SEG=DS"] = "INAT_PFX_DS"
+	prefix_num["SEG=ES"] = "INAT_PFX_ES"
+	prefix_num["SEG=FS"] = "INAT_PFX_FS"
+	prefix_num["SEG=GS"] = "INAT_PFX_GS"
+	prefix_num["SEG=SS"] = "INAT_PFX_SS"
+	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+	prefix_num["EVEX"] = "INAT_PFX_EVEX"
+
+	clear_vars()
+}
+
+function semantic_error(msg) {
+	print "Semantic error at " NR ": " msg > "/dev/stderr"
+	exit 1
+}
+
+function debug(msg) {
+	print "DEBUG: " msg
+}
+
+function array_size(arr,   i,c) {
+	c = 0
+	for (i in arr)
+		c++
+	return c
+}
+
+/^Table:/ {
+	print "/* " $0 " */"
+	if (tname != "")
+		semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+	if (NF != 1) {
+		# escape opcode table
+		ref = ""
+		for (i = 2; i <= NF; i++)
+			ref = ref $i
+		eid = escape[ref]
+		tname = sprintf("inat_escape_table_%d", eid)
+	}
+}
+
+/^AVXcode:/ {
+	if (NF != 1) {
+		# AVX/escape opcode table
+		aid = $2
+		if (gaid <= aid)
+			gaid = aid + 1
+		if (tname == "")	# AVX only opcode table
+			tname = sprintf("inat_avx_table_%d", $2)
+	}
+	if (aid == -1 && eid == -1)	# primary opcode table
+		tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+	print "/* " $0 " */"
+	if (!($2 in group))
+		semantic_error("No group: " $2 )
+	gid = group[$2]
+	tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+	print "const insn_attr_t " name " = {"
+	for (i = 0; i < n; i++) {
+		id = sprintf(fmt, i)
+		if (tbl[id])
+			print "	[" id "] = " tbl[id] ","
+	}
+	print "};"
+}
+
+/^EndTable/ {
+	if (gid != -1) {
+		# print group tables
+		if (array_size(table) != 0) {
+			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,0] = tname
+		}
+		if (array_size(lptable1) != 0) {
+			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,1] = tname "_1"
+		}
+		if (array_size(lptable2) != 0) {
+			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,2] = tname "_2"
+		}
+		if (array_size(lptable3) != 0) {
+			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+				    "0x%x", 8)
+			gtable[gid,3] = tname "_3"
+		}
+	} else {
+		# print primary/escaped tables
+		if (array_size(table) != 0) {
+			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,0] = tname
+			if (aid >= 0)
+				atable[aid,0] = tname
+		}
+		if (array_size(lptable1) != 0) {
+			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,1] = tname "_1"
+			if (aid >= 0)
+				atable[aid,1] = tname "_1"
+		}
+		if (array_size(lptable2) != 0) {
+			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,2] = tname "_2"
+			if (aid >= 0)
+				atable[aid,2] = tname "_2"
+		}
+		if (array_size(lptable3) != 0) {
+			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+				    "0x%02x", 256)
+			etable[eid,3] = tname "_3"
+			if (aid >= 0)
+				atable[aid,3] = tname "_3"
+		}
+	}
+	print ""
+	clear_vars()
+}
+
+function add_flags(old,new) {
+	if (old && new)
+		return old " | " new
+	else if (old)
+		return old
+	else
+		return new
+}
+
+# convert operands to flags.
+function convert_operands(count,opnd,       i,j,imm,mod)
+{
+	imm = null
+	mod = null
+	for (j = 1; j <= count; j++) {
+		i = opnd[j]
+		if (match(i, imm_expr) == 1) {
+			if (!imm_flag[i])
+				semantic_error("Unknown imm opnd: " i)
+			if (imm) {
+				if (i != "Ib")
+					semantic_error("Second IMM error")
+				imm = add_flags(imm, "INAT_SCNDIMM")
+			} else
+				imm = imm_flag[i]
+		} else if (match(i, modrm_expr))
+			mod = "INAT_MODRM"
+	}
+	return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+	if (NR == 1)
+		next
+	# get index
+	idx = "0x" substr($1, 1, index($1,":") - 1)
+	if (idx in table)
+		semantic_error("Redefine " idx " in " tname)
+
+	# check if escaped opcode
+	if ("escape" == $2) {
+		if ($3 != "#")
+			semantic_error("No escaped name")
+		ref = ""
+		for (i = 4; i <= NF; i++)
+			ref = ref $i
+		if (ref in escape)
+			semantic_error("Redefine escape (" ref ")")
+		escape[ref] = geid
+		geid++
+		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+		next
+	}
+
+	variant = null
+	# converts
+	i = 2
+	while (i <= NF) {
+		opcode = $(i++)
+		delete opnds
+		ext = null
+		flags = null
+		opnd = null
+		# parse one opcode
+		if (match($i, opnd_expr)) {
+			opnd = $i
+			count = split($(i++), opnds, ",")
+			flags = convert_operands(count, opnds)
+		}
+		if (match($i, ext_expr))
+			ext = $(i++)
+		if (match($i, sep_expr))
+			i++
+		else if (i < NF)
+			semantic_error($i " is not a separator")
+
+		# check if group opcode
+		if (match(opcode, group_expr)) {
+			if (!(opcode in group)) {
+				group[opcode] = ggid
+				ggid++
+			}
+			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+		}
+		# check force(or default) 64bit
+		if (match(ext, force64_expr))
+			flags = add_flags(flags, "INAT_FORCE64")
+
+		# check REX prefix
+		if (match(opcode, rex_expr))
+			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+		# check coprocessor escape : TODO
+		if (match(opcode, fpu_expr))
+			flags = add_flags(flags, "INAT_MODRM")
+
+		# check VEX codes
+		if (match(ext, evexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+		else if (match(ext, vexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+			flags = add_flags(flags, "INAT_VEXOK")
+
+		# check prefixes
+		if (match(ext, prefix_expr)) {
+			if (!prefix_num[opcode])
+				semantic_error("Unknown prefix: " opcode)
+			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+		}
+		if (length(flags) == 0)
+			continue
+		# check if last prefix
+		if (match(ext, lprefix1_expr)) {
+			lptable1[idx] = add_flags(lptable1[idx],flags)
+			variant = "INAT_VARIANT"
+		}
+		if (match(ext, lprefix2_expr)) {
+			lptable2[idx] = add_flags(lptable2[idx],flags)
+			variant = "INAT_VARIANT"
+		}
+		if (match(ext, lprefix3_expr)) {
+			lptable3[idx] = add_flags(lptable3[idx],flags)
+			variant = "INAT_VARIANT"
+		}
+		if (!match(ext, lprefix_expr)){
+			table[idx] = add_flags(table[idx],flags)
+		}
+	}
+	if (variant)
+		table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+	if (awkchecked != "")
+		exit 1
+	# print escape opcode map's array
+	print "/* Escape opcode map array */"
+	print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
+	      "[INAT_LSTPFX_MAX + 1] = {"
+	for (i = 0; i < geid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (etable[i,j])
+				print "	["i"]["j"] = "etable[i,j]","
+	print "};\n"
+	# print group opcode map's array
+	print "/* Group opcode map array */"
+	print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
+	      "[INAT_LSTPFX_MAX + 1] = {"
+	for (i = 0; i < ggid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (gtable[i,j])
+				print "	["i"]["j"] = "gtable[i,j]","
+	print "};\n"
+	# print AVX opcode map's array
+	print "/* AVX opcode map array */"
+	print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
+	      "[INAT_LSTPFX_MAX + 1] = {"
+	for (i = 0; i < gaid; i++)
+		for (j = 0; j < max_lprefix; j++)
+			if (atable[i,j])
+				print "	["i"]["j"] = "atable[i,j]","
+	print "};"
+}
diff --git a/util/intel-pt-decoder/inat.c b/util/intel-pt-decoder/inat.c
new file mode 100644
index 0000000..906d94a
--- /dev/null
+++ b/util/intel-pt-decoder/inat.c
@@ -0,0 +1,96 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "insn.h"
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+	return inat_primary_table[opcode];
+}
+
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+	insn_attr_t lpfx_attr;
+
+	lpfx_attr = inat_get_opcode_attribute(last_pfx);
+	return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
+				      insn_attr_t esc_attr)
+{
+	const insn_attr_t *table;
+	int n;
+
+	n = inat_escape_id(esc_attr);
+
+	table = inat_escape_tables[n][0];
+	if (!table)
+		return 0;
+	if (inat_has_variant(table[opcode]) && lpfx_id) {
+		table = inat_escape_tables[n][lpfx_id];
+		if (!table)
+			return 0;
+	}
+	return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
+				     insn_attr_t grp_attr)
+{
+	const insn_attr_t *table;
+	int n;
+
+	n = inat_group_id(grp_attr);
+
+	table = inat_group_tables[n][0];
+	if (!table)
+		return inat_group_common_attribute(grp_attr);
+	if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+		table = inat_group_tables[n][lpfx_id];
+		if (!table)
+			return inat_group_common_attribute(grp_attr);
+	}
+	return table[X86_MODRM_REG(modrm)] |
+	       inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+				   insn_byte_t vex_p)
+{
+	const insn_attr_t *table;
+	if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+		return 0;
+	/* At first, this checks the master table */
+	table = inat_avx_tables[vex_m][0];
+	if (!table)
+		return 0;
+	if (!inat_is_group(table[opcode]) && vex_p) {
+		/* If this is not a group, get attribute directly */
+		table = inat_avx_tables[vex_m][vex_p];
+		if (!table)
+			return 0;
+	}
+	return table[opcode];
+}
diff --git a/util/intel-pt-decoder/inat.h b/util/intel-pt-decoder/inat.h
new file mode 100644
index 0000000..52dc8d9
--- /dev/null
+++ b/util/intel-pt-decoder/inat.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "inat_types.h"
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ	1	/* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE	2	/* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE	3	/* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK	4	/* 0xF0 */
+#define INAT_PFX_CS	5	/* 0x2E */
+#define INAT_PFX_DS	6	/* 0x3E */
+#define INAT_PFX_ES	7	/* 0x26 */
+#define INAT_PFX_FS	8	/* 0x64 */
+#define INAT_PFX_GS	9	/* 0x65 */
+#define INAT_PFX_SS	10	/* 0x36 */
+#define INAT_PFX_ADDRSZ	11	/* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX	12	/* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX	15	/* EVEX prefix */
+
+#define INAT_LSTPFX_MAX	3
+#define INAT_LGCPFX_MAX	11
+
+/* Immediate size */
+#define INAT_IMM_BYTE		1
+#define INAT_IMM_WORD		2
+#define INAT_IMM_DWORD		3
+#define INAT_IMM_QWORD		4
+#define INAT_IMM_PTR		5
+#define INAT_IMM_VWORD32	6
+#define INAT_IMM_VWORD		7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS	0
+#define INAT_PFX_BITS	4
+#define INAT_PFX_MAX    ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK	(INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS	(INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS	2
+#define INAT_ESC_MAX	((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK	(INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS	(INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS	5
+#define INAT_GRP_MAX	((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK	(INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS	(INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS	3
+#define INAT_IMM_MASK	(((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS	(INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM	(1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64	(1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM	(1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET	(1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
+
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE	0
+#define INAT_SEG_REG_DEFAULT	1
+#define INAT_SEG_REG_CS		2
+#define INAT_SEG_REG_SS		3
+#define INAT_SEG_REG_DS		4
+#define INAT_SEG_REG_ES		5
+#define INAT_SEG_REG_FS		6
+#define INAT_SEG_REG_GS		7
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+					     int lpfx_id,
+					     insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+					    int lpfx_id,
+					    insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+					  insn_byte_t vex_m,
+					  insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+	attr &= INAT_PFX_MASK;
+	return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+	if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+		return 0;
+	else
+		return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+	attr &= INAT_PFX_MASK;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+	       attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+	return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+	return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+	return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+	return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+	return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+	return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+	return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+	return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+	return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+	return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+	return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+	return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+	return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+	return attr & INAT_EVEXONLY;
+}
+#endif
diff --git a/util/intel-pt-decoder/inat_types.h b/util/intel-pt-decoder/inat_types.h
new file mode 100644
index 0000000..cb3c20c
--- /dev/null
+++ b/util/intel-pt-decoder/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/util/intel-pt-decoder/insn.c b/util/intel-pt-decoder/insn.c
new file mode 100644
index 0000000..ca983e2
--- /dev/null
+++ b/util/intel-pt-decoder/insn.c
@@ -0,0 +1,606 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include "inat.h"
+#include "insn.h"
+
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n)	\
+	((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+#define __get_next(t, insn)	\
+	({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n)	\
+	({ t r = *(t*)((insn)->next_byte + n); r; })
+
+#define get_next(t, insn)	\
+	({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+
+#define peek_nbyte_next(t, insn, n)	\
+	({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn)	peek_nbyte_next(t, insn, 0)
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn:	&struct insn to be initialized
+ * @kaddr:	address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64:	!0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
+{
+	/*
+	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+	 * even if the input buffer is long enough to hold them.
+	 */
+	if (buf_len > MAX_INSN_SIZE)
+		buf_len = MAX_INSN_SIZE;
+
+	memset(insn, 0, sizeof(*insn));
+	insn->kaddr = kaddr;
+	insn->end_kaddr = kaddr + buf_len;
+	insn->next_byte = kaddr;
+	insn->x86_64 = x86_64 ? 1 : 0;
+	insn->opnd_bytes = 4;
+	if (x86_64)
+		insn->addr_bytes = 8;
+	else
+		insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn:	&struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode.  No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+	struct insn_field *prefixes = &insn->prefixes;
+	insn_attr_t attr;
+	insn_byte_t b, lb;
+	int i, nb;
+
+	if (prefixes->got)
+		return;
+
+	nb = 0;
+	lb = 0;
+	b = peek_next(insn_byte_t, insn);
+	attr = inat_get_opcode_attribute(b);
+	while (inat_is_legacy_prefix(attr)) {
+		/* Skip if same prefix */
+		for (i = 0; i < nb; i++)
+			if (prefixes->bytes[i] == b)
+				goto found;
+		if (nb == 4)
+			/* Invalid instruction */
+			break;
+		prefixes->bytes[nb++] = b;
+		if (inat_is_address_size_prefix(attr)) {
+			/* address size switches 2/4 or 4/8 */
+			if (insn->x86_64)
+				insn->addr_bytes ^= 12;
+			else
+				insn->addr_bytes ^= 6;
+		} else if (inat_is_operand_size_prefix(attr)) {
+			/* oprand size switches 2/4 */
+			insn->opnd_bytes ^= 6;
+		}
+found:
+		prefixes->nbytes++;
+		insn->next_byte++;
+		lb = b;
+		b = peek_next(insn_byte_t, insn);
+		attr = inat_get_opcode_attribute(b);
+	}
+	/* Set the last prefix */
+	if (lb && lb != insn->prefixes.bytes[3]) {
+		if (unlikely(insn->prefixes.bytes[3])) {
+			/* Swap the last prefix */
+			b = insn->prefixes.bytes[3];
+			for (i = 0; i < nb; i++)
+				if (prefixes->bytes[i] == lb)
+					prefixes->bytes[i] = b;
+		}
+		insn->prefixes.bytes[3] = lb;
+	}
+
+	/* Decode REX prefix */
+	if (insn->x86_64) {
+		b = peek_next(insn_byte_t, insn);
+		attr = inat_get_opcode_attribute(b);
+		if (inat_is_rex_prefix(attr)) {
+			insn->rex_prefix.value = b;
+			insn->rex_prefix.nbytes = 1;
+			insn->next_byte++;
+			if (X86_REX_W(b))
+				/* REX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		}
+	}
+	insn->rex_prefix.got = 1;
+
+	/* Decode VEX prefix */
+	b = peek_next(insn_byte_t, insn);
+	attr = inat_get_opcode_attribute(b);
+	if (inat_is_vex_prefix(attr)) {
+		insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+		if (!insn->x86_64) {
+			/*
+			 * In 32-bits mode, if the [7:6] bits (mod bits of
+			 * ModRM) on the second byte are not 11b, it is
+			 * LDS or LES or BOUND.
+			 */
+			if (X86_MODRM_MOD(b2) != 3)
+				goto vex_end;
+		}
+		insn->vex_prefix.bytes[0] = b;
+		insn->vex_prefix.bytes[1] = b2;
+		if (inat_is_evex_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+			insn->vex_prefix.bytes[3] = b2;
+			insn->vex_prefix.nbytes = 4;
+			insn->next_byte += 4;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else if (inat_is_vex3_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			insn->vex_prefix.nbytes = 3;
+			insn->next_byte += 3;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else {
+			/*
+			 * For VEX2, fake VEX3-like byte#2.
+			 * Makes it easier to decode vex.W, vex.vvvv,
+			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+			 */
+			insn->vex_prefix.bytes[2] = b2 & 0x7f;
+			insn->vex_prefix.nbytes = 2;
+			insn->next_byte += 2;
+		}
+	}
+vex_end:
+	insn->vex_prefix.got = 1;
+
+	prefixes->got = 1;
+
+err_out:
+	return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn:	&struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+	struct insn_field *opcode = &insn->opcode;
+	insn_byte_t op;
+	int pfx_id;
+	if (opcode->got)
+		return;
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+
+	/* Get first opcode */
+	op = get_next(insn_byte_t, insn);
+	opcode->bytes[0] = op;
+	opcode->nbytes = 1;
+
+	/* Check if there is VEX prefix or not */
+	if (insn_is_avx(insn)) {
+		insn_byte_t m, p;
+		m = insn_vex_m_bits(insn);
+		p = insn_vex_p_bits(insn);
+		insn->attr = inat_get_avx_attribute(op, m, p);
+		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+		    (!inat_accept_vex(insn->attr) &&
+		     !inat_is_group(insn->attr)))
+			insn->attr = 0;	/* This instruction is bad */
+		goto end;	/* VEX has only 1 byte for opcode */
+	}
+
+	insn->attr = inat_get_opcode_attribute(op);
+	while (inat_is_escape(insn->attr)) {
+		/* Get escaped opcode */
+		op = get_next(insn_byte_t, insn);
+		opcode->bytes[opcode->nbytes++] = op;
+		pfx_id = insn_last_prefix_id(insn);
+		insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+	}
+	if (inat_must_vex(insn->attr))
+		insn->attr = 0;	/* This instruction is bad */
+end:
+	opcode->got = 1;
+
+err_out:
+	return;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn:	&struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any.  If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+	struct insn_field *modrm = &insn->modrm;
+	insn_byte_t pfx_id, mod;
+	if (modrm->got)
+		return;
+	if (!insn->opcode.got)
+		insn_get_opcode(insn);
+
+	if (inat_has_modrm(insn->attr)) {
+		mod = get_next(insn_byte_t, insn);
+		modrm->value = mod;
+		modrm->nbytes = 1;
+		if (inat_is_group(insn->attr)) {
+			pfx_id = insn_last_prefix_id(insn);
+			insn->attr = inat_get_group_attribute(mod, pfx_id,
+							      insn->attr);
+			if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+				insn->attr = 0;	/* This is bad */
+		}
+	}
+
+	if (insn->x86_64 && inat_is_force64(insn->attr))
+		insn->opnd_bytes = 8;
+	modrm->got = 1;
+
+err_out:
+	return;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.  No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+	struct insn_field *modrm = &insn->modrm;
+
+	if (!insn->x86_64)
+		return 0;
+	if (!modrm->got)
+		insn_get_modrm(insn);
+	/*
+	 * For rip-relative instructions, the mod field (top 2 bits)
+	 * is zero and the r/m field (bottom 3 bits) is 0x5.
+	 */
+	return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+	insn_byte_t modrm;
+
+	if (insn->sib.got)
+		return;
+	if (!insn->modrm.got)
+		insn_get_modrm(insn);
+	if (insn->modrm.nbytes) {
+		modrm = (insn_byte_t)insn->modrm.value;
+		if (insn->addr_bytes != 2 &&
+		    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+			insn->sib.value = get_next(insn_byte_t, insn);
+			insn->sib.nbytes = 1;
+		}
+	}
+	insn->sib.got = 1;
+
+err_out:
+	return;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+	insn_byte_t mod, rm, base;
+
+	if (insn->displacement.got)
+		return;
+	if (!insn->sib.got)
+		insn_get_sib(insn);
+	if (insn->modrm.nbytes) {
+		/*
+		 * Interpreting the modrm byte:
+		 * mod = 00 - no displacement fields (exceptions below)
+		 * mod = 01 - 1-byte displacement field
+		 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+		 * 	address size = 2 (0x67 prefix in 32-bit mode)
+		 * mod = 11 - no memory operand
+		 *
+		 * If address size = 2...
+		 * mod = 00, r/m = 110 - displacement field is 2 bytes
+		 *
+		 * If address size != 2...
+		 * mod != 11, r/m = 100 - SIB byte exists
+		 * mod = 00, SIB base = 101 - displacement field is 4 bytes
+		 * mod = 00, r/m = 101 - rip-relative addressing, displacement
+		 * 	field is 4 bytes
+		 */
+		mod = X86_MODRM_MOD(insn->modrm.value);
+		rm = X86_MODRM_RM(insn->modrm.value);
+		base = X86_SIB_BASE(insn->sib.value);
+		if (mod == 3)
+			goto out;
+		if (mod == 1) {
+			insn->displacement.value = get_next(signed char, insn);
+			insn->displacement.nbytes = 1;
+		} else if (insn->addr_bytes == 2) {
+			if ((mod == 0 && rm == 6) || mod == 2) {
+				insn->displacement.value =
+					 get_next(short, insn);
+				insn->displacement.nbytes = 2;
+			}
+		} else {
+			if ((mod == 0 && rm == 5) || mod == 2 ||
+			    (mod == 0 && base == 5)) {
+				insn->displacement.value = get_next(int, insn);
+				insn->displacement.nbytes = 4;
+			}
+		}
+	}
+out:
+	insn->displacement.got = 1;
+
+err_out:
+	return;
+}
+
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
+{
+	switch (insn->addr_bytes) {
+	case 2:
+		insn->moffset1.value = get_next(short, insn);
+		insn->moffset1.nbytes = 2;
+		break;
+	case 4:
+		insn->moffset1.value = get_next(int, insn);
+		insn->moffset1.nbytes = 4;
+		break;
+	case 8:
+		insn->moffset1.value = get_next(int, insn);
+		insn->moffset1.nbytes = 4;
+		insn->moffset2.value = get_next(int, insn);
+		insn->moffset2.nbytes = 4;
+		break;
+	default:	/* opnd_bytes must be modified manually */
+		goto err_out;
+	}
+	insn->moffset1.got = insn->moffset2.got = 1;
+
+	return 1;
+
+err_out:
+	return 0;
+}
+
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
+{
+	switch (insn->opnd_bytes) {
+	case 2:
+		insn->immediate.value = get_next(short, insn);
+		insn->immediate.nbytes = 2;
+		break;
+	case 4:
+	case 8:
+		insn->immediate.value = get_next(int, insn);
+		insn->immediate.nbytes = 4;
+		break;
+	default:	/* opnd_bytes must be modified manually */
+		goto err_out;
+	}
+
+	return 1;
+
+err_out:
+	return 0;
+}
+
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
+{
+	switch (insn->opnd_bytes) {
+	case 2:
+		insn->immediate1.value = get_next(short, insn);
+		insn->immediate1.nbytes = 2;
+		break;
+	case 4:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		break;
+	case 8:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		insn->immediate2.value = get_next(int, insn);
+		insn->immediate2.nbytes = 4;
+		break;
+	default:	/* opnd_bytes must be modified manually */
+		goto err_out;
+	}
+	insn->immediate1.got = insn->immediate2.got = 1;
+
+	return 1;
+err_out:
+	return 0;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static int __get_immptr(struct insn *insn)
+{
+	switch (insn->opnd_bytes) {
+	case 2:
+		insn->immediate1.value = get_next(short, insn);
+		insn->immediate1.nbytes = 2;
+		break;
+	case 4:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		break;
+	case 8:
+		/* ptr16:64 is not exist (no segment) */
+		return 0;
+	default:	/* opnd_bytes must be modified manually */
+		goto err_out;
+	}
+	insn->immediate2.value = get_next(unsigned short, insn);
+	insn->immediate2.nbytes = 2;
+	insn->immediate1.got = insn->immediate2.got = 1;
+
+	return 1;
+err_out:
+	return 0;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+	if (insn->immediate.got)
+		return;
+	if (!insn->displacement.got)
+		insn_get_displacement(insn);
+
+	if (inat_has_moffset(insn->attr)) {
+		if (!__get_moffset(insn))
+			goto err_out;
+		goto done;
+	}
+
+	if (!inat_has_immediate(insn->attr))
+		/* no immediates */
+		goto done;
+
+	switch (inat_immediate_size(insn->attr)) {
+	case INAT_IMM_BYTE:
+		insn->immediate.value = get_next(signed char, insn);
+		insn->immediate.nbytes = 1;
+		break;
+	case INAT_IMM_WORD:
+		insn->immediate.value = get_next(short, insn);
+		insn->immediate.nbytes = 2;
+		break;
+	case INAT_IMM_DWORD:
+		insn->immediate.value = get_next(int, insn);
+		insn->immediate.nbytes = 4;
+		break;
+	case INAT_IMM_QWORD:
+		insn->immediate1.value = get_next(int, insn);
+		insn->immediate1.nbytes = 4;
+		insn->immediate2.value = get_next(int, insn);
+		insn->immediate2.nbytes = 4;
+		break;
+	case INAT_IMM_PTR:
+		if (!__get_immptr(insn))
+			goto err_out;
+		break;
+	case INAT_IMM_VWORD32:
+		if (!__get_immv32(insn))
+			goto err_out;
+		break;
+	case INAT_IMM_VWORD:
+		if (!__get_immv(insn))
+			goto err_out;
+		break;
+	default:
+		/* Here, insn must have an immediate, but failed */
+		goto err_out;
+	}
+	if (inat_has_second_immediate(insn->attr)) {
+		insn->immediate2.value = get_next(signed char, insn);
+		insn->immediate2.nbytes = 1;
+	}
+done:
+	insn->immediate.got = 1;
+
+err_out:
+	return;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn:	&struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+	if (insn->length)
+		return;
+	if (!insn->immediate.got)
+		insn_get_immediate(insn);
+	insn->length = (unsigned char)((unsigned long)insn->next_byte
+				     - (unsigned long)insn->kaddr);
+}
diff --git a/util/intel-pt-decoder/insn.h b/util/intel-pt-decoder/insn.h
new file mode 100644
index 0000000..e23578c
--- /dev/null
+++ b/util/intel-pt-decoder/insn.h
@@ -0,0 +1,211 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include "inat.h"
+
+struct insn_field {
+	union {
+		insn_value_t value;
+		insn_byte_t bytes[4];
+	};
+	/* !0 if we've run insn_get_xxx() for this field */
+	unsigned char got;
+	unsigned char nbytes;
+};
+
+struct insn {
+	struct insn_field prefixes;	/*
+					 * Prefixes
+					 * prefixes.bytes[3]: last prefix
+					 */
+	struct insn_field rex_prefix;	/* REX prefix */
+	struct insn_field vex_prefix;	/* VEX prefix */
+	struct insn_field opcode;	/*
+					 * opcode.bytes[0]: opcode1
+					 * opcode.bytes[1]: opcode2
+					 * opcode.bytes[2]: opcode3
+					 */
+	struct insn_field modrm;
+	struct insn_field sib;
+	struct insn_field displacement;
+	union {
+		struct insn_field immediate;
+		struct insn_field moffset1;	/* for 64bit MOV */
+		struct insn_field immediate1;	/* for 64bit imm or off16/32 */
+	};
+	union {
+		struct insn_field moffset2;	/* for 64bit MOV */
+		struct insn_field immediate2;	/* for 64bit imm or seg16 */
+	};
+
+	insn_attr_t attr;
+	unsigned char opnd_bytes;
+	unsigned char addr_bytes;
+	unsigned char length;
+	unsigned char x86_64;
+
+	const insn_byte_t *kaddr;	/* kernel address of insn to analyze */
+	const insn_byte_t *end_kaddr;	/* kernel address of last insn in buffer */
+	const insn_byte_t *next_byte;
+};
+
+#define MAX_INSN_SIZE	15
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags  */
+#define X86_VEX_W(vex)	((vex) & 0x80)	/* VEX3 Byte2 */
+#define X86_VEX_R(vex)	((vex) & 0x80)	/* VEX2/3 Byte1 */
+#define X86_VEX_X(vex)	((vex) & 0x40)	/* VEX3 Byte1 */
+#define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
+#define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
+#define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
+#define X86_VEX2_M	1			/* VEX2.M always 1 */
+#define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex)	((vex) & 0x03)		/* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX	0x1f			/* VEX3.M Maximum value */
+
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+	insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn,
+				    const void *kaddr, int buf_len)
+{
+#ifdef CONFIG_X86_64
+	insn_init(insn, kaddr, buf_len, 1);
+#else /* CONFIG_X86_32 */
+	insn_init(insn, kaddr, buf_len, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.value != 0);
+}
+
+static inline int insn_is_evex(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.nbytes == 4);
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+	return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+		insn->displacement.got && insn->immediate.got;
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
+		return X86_VEX2_M;
+	else if (insn->vex_prefix.nbytes == 3)	/* 3 bytes VEX */
+		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+	else					/* EVEX */
+		return X86_EVEX_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
+		return X86_VEX_P(insn->vex_prefix.bytes[1]);
+	else
+		return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+	if (insn_is_avx(insn))
+		return insn_vex_p_bits(insn);	/* VEX_p is a SIMD prefix id */
+
+	if (insn->prefixes.bytes[3])
+		return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+	return 0;
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+	return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+	return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+	return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+	return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+	return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+	return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+	return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/util/intel-pt-decoder/intel-pt-decoder.c b/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 0000000..f9157ae
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,2668 @@
+/*
+ * intel_pt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+
+#include "../cache.h"
+#include "../util.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "intel-pt-pkt-decoder.h"
+#include "intel-pt-decoder.h"
+#include "intel-pt-log.h"
+
+#define INTEL_PT_BLK_SIZE 1024
+
+#define BIT63 (((uint64_t)1 << 63))
+
+#define INTEL_PT_RETURN 1
+
+/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
+#define INTEL_PT_MAX_LOOPS 10000
+
+struct intel_pt_blk {
+	struct intel_pt_blk *prev;
+	uint64_t ip[INTEL_PT_BLK_SIZE];
+};
+
+struct intel_pt_stack {
+	struct intel_pt_blk *blk;
+	struct intel_pt_blk *spare;
+	int pos;
+};
+
+enum intel_pt_pkt_state {
+	INTEL_PT_STATE_NO_PSB,
+	INTEL_PT_STATE_NO_IP,
+	INTEL_PT_STATE_ERR_RESYNC,
+	INTEL_PT_STATE_IN_SYNC,
+	INTEL_PT_STATE_TNT,
+	INTEL_PT_STATE_TIP,
+	INTEL_PT_STATE_TIP_PGD,
+	INTEL_PT_STATE_FUP,
+	INTEL_PT_STATE_FUP_NO_TIP,
+};
+
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+	switch (pkt_state) {
+	case INTEL_PT_STATE_NO_PSB:
+	case INTEL_PT_STATE_NO_IP:
+	case INTEL_PT_STATE_ERR_RESYNC:
+	case INTEL_PT_STATE_IN_SYNC:
+	case INTEL_PT_STATE_TNT:
+		return true;
+	case INTEL_PT_STATE_TIP:
+	case INTEL_PT_STATE_TIP_PGD:
+	case INTEL_PT_STATE_FUP:
+	case INTEL_PT_STATE_FUP_NO_TIP:
+		return false;
+	default:
+		return true;
+	};
+}
+
+#ifdef INTEL_PT_STRICT
+#define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_NO_PSB
+#else
+#define INTEL_PT_STATE_ERR1	(decoder->pkt_state)
+#define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_IP
+#define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_ERR_RESYNC
+#define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_IN_SYNC
+#endif
+
+struct intel_pt_decoder {
+	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
+	void *data;
+	struct intel_pt_state state;
+	const unsigned char *buf;
+	size_t len;
+	bool return_compression;
+	bool branch_enable;
+	bool mtc_insn;
+	bool pge;
+	bool have_tma;
+	bool have_cyc;
+	bool fixup_last_mtc;
+	bool have_last_ip;
+	uint64_t pos;
+	uint64_t last_ip;
+	uint64_t ip;
+	uint64_t cr3;
+	uint64_t timestamp;
+	uint64_t tsc_timestamp;
+	uint64_t ref_timestamp;
+	uint64_t sample_timestamp;
+	uint64_t ret_addr;
+	uint64_t ctc_timestamp;
+	uint64_t ctc_delta;
+	uint64_t cycle_cnt;
+	uint64_t cyc_ref_timestamp;
+	uint32_t last_mtc;
+	uint32_t tsc_ctc_ratio_n;
+	uint32_t tsc_ctc_ratio_d;
+	uint32_t tsc_ctc_mult;
+	uint32_t tsc_slip;
+	uint32_t ctc_rem_mask;
+	int mtc_shift;
+	struct intel_pt_stack stack;
+	enum intel_pt_pkt_state pkt_state;
+	struct intel_pt_pkt packet;
+	struct intel_pt_pkt tnt;
+	int pkt_step;
+	int pkt_len;
+	int last_packet_type;
+	unsigned int cbr;
+	unsigned int cbr_seen;
+	unsigned int max_non_turbo_ratio;
+	double max_non_turbo_ratio_fp;
+	double cbr_cyc_to_tsc;
+	double calc_cyc_to_tsc;
+	bool have_calc_cyc_to_tsc;
+	int exec_mode;
+	unsigned int insn_bytes;
+	uint64_t period;
+	enum intel_pt_period_type period_type;
+	uint64_t tot_insn_cnt;
+	uint64_t period_insn_cnt;
+	uint64_t period_mask;
+	uint64_t period_ticks;
+	uint64_t last_masked_timestamp;
+	bool continuous_period;
+	bool overflow;
+	bool set_fup_tx_flags;
+	bool set_fup_ptw;
+	bool set_fup_mwait;
+	bool set_fup_pwre;
+	bool set_fup_exstop;
+	unsigned int fup_tx_flags;
+	unsigned int tx_flags;
+	uint64_t fup_ptw_payload;
+	uint64_t fup_mwait_payload;
+	uint64_t fup_pwre_payload;
+	uint64_t cbr_payload;
+	uint64_t timestamp_insn_cnt;
+	uint64_t sample_insn_cnt;
+	uint64_t stuck_ip;
+	int no_progress;
+	int stuck_ip_prd;
+	int stuck_ip_cnt;
+	const unsigned char *next_buf;
+	size_t next_len;
+	unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+};
+
+static uint64_t intel_pt_lower_power_of_2(uint64_t x)
+{
+	int i;
+
+	for (i = 0; x != 1; i++)
+		x >>= 1;
+
+	return x << i;
+}
+
+static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
+{
+	if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
+		uint64_t period;
+
+		period = intel_pt_lower_power_of_2(decoder->period);
+		decoder->period_mask  = ~(period - 1);
+		decoder->period_ticks = period;
+	}
+}
+
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+	if (!d)
+		return 0;
+	return (t / d) * n + ((t % d) * n) / d;
+}
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
+{
+	struct intel_pt_decoder *decoder;
+
+	if (!params->get_trace || !params->walk_insn)
+		return NULL;
+
+	decoder = zalloc(sizeof(struct intel_pt_decoder));
+	if (!decoder)
+		return NULL;
+
+	decoder->get_trace          = params->get_trace;
+	decoder->walk_insn          = params->walk_insn;
+	decoder->pgd_ip             = params->pgd_ip;
+	decoder->data               = params->data;
+	decoder->return_compression = params->return_compression;
+	decoder->branch_enable      = params->branch_enable;
+
+	decoder->period             = params->period;
+	decoder->period_type        = params->period_type;
+
+	decoder->max_non_turbo_ratio    = params->max_non_turbo_ratio;
+	decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+
+	intel_pt_setup_period(decoder);
+
+	decoder->mtc_shift = params->mtc_period;
+	decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+	decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+	decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+	if (!decoder->tsc_ctc_ratio_n)
+		decoder->tsc_ctc_ratio_d = 0;
+
+	if (decoder->tsc_ctc_ratio_d) {
+		if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+			decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+						decoder->tsc_ctc_ratio_d;
+
+		/*
+		 * Allow for timestamps appearing to backwards because a TSC
+		 * packet has slipped past a MTC packet, so allow 2 MTC ticks
+		 * or ...
+		 */
+		decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
+					decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+	}
+	/* ... or 0x100 paranoia */
+	if (decoder->tsc_slip < 0x100)
+		decoder->tsc_slip = 0x100;
+
+	intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+	intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+	intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+	intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+	intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
+	return decoder;
+}
+
+static void intel_pt_pop_blk(struct intel_pt_stack *stack)
+{
+	struct intel_pt_blk *blk = stack->blk;
+
+	stack->blk = blk->prev;
+	if (!stack->spare)
+		stack->spare = blk;
+	else
+		free(blk);
+}
+
+static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
+{
+	if (!stack->pos) {
+		if (!stack->blk)
+			return 0;
+		intel_pt_pop_blk(stack);
+		if (!stack->blk)
+			return 0;
+		stack->pos = INTEL_PT_BLK_SIZE;
+	}
+	return stack->blk->ip[--stack->pos];
+}
+
+static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
+{
+	struct intel_pt_blk *blk;
+
+	if (stack->spare) {
+		blk = stack->spare;
+		stack->spare = NULL;
+	} else {
+		blk = malloc(sizeof(struct intel_pt_blk));
+		if (!blk)
+			return -ENOMEM;
+	}
+
+	blk->prev = stack->blk;
+	stack->blk = blk;
+	stack->pos = 0;
+	return 0;
+}
+
+static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
+{
+	int err;
+
+	if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
+		err = intel_pt_alloc_blk(stack);
+		if (err)
+			return err;
+	}
+
+	stack->blk->ip[stack->pos++] = ip;
+	return 0;
+}
+
+static void intel_pt_clear_stack(struct intel_pt_stack *stack)
+{
+	while (stack->blk)
+		intel_pt_pop_blk(stack);
+	stack->pos = 0;
+}
+
+static void intel_pt_free_stack(struct intel_pt_stack *stack)
+{
+	intel_pt_clear_stack(stack);
+	zfree(&stack->blk);
+	zfree(&stack->spare);
+}
+
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
+{
+	intel_pt_free_stack(&decoder->stack);
+	free(decoder);
+}
+
+static int intel_pt_ext_err(int code)
+{
+	switch (code) {
+	case -ENOMEM:
+		return INTEL_PT_ERR_NOMEM;
+	case -ENOSYS:
+		return INTEL_PT_ERR_INTERN;
+	case -EBADMSG:
+		return INTEL_PT_ERR_BADPKT;
+	case -ENODATA:
+		return INTEL_PT_ERR_NODATA;
+	case -EILSEQ:
+		return INTEL_PT_ERR_NOINSN;
+	case -ENOENT:
+		return INTEL_PT_ERR_MISMAT;
+	case -EOVERFLOW:
+		return INTEL_PT_ERR_OVR;
+	case -ENOSPC:
+		return INTEL_PT_ERR_LOST;
+	case -ELOOP:
+		return INTEL_PT_ERR_NELOOP;
+	default:
+		return INTEL_PT_ERR_UNK;
+	}
+}
+
+static const char *intel_pt_err_msgs[] = {
+	[INTEL_PT_ERR_NOMEM]  = "Memory allocation failed",
+	[INTEL_PT_ERR_INTERN] = "Internal error",
+	[INTEL_PT_ERR_BADPKT] = "Bad packet",
+	[INTEL_PT_ERR_NODATA] = "No more data",
+	[INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
+	[INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
+	[INTEL_PT_ERR_OVR]    = "Overflow packet",
+	[INTEL_PT_ERR_LOST]   = "Lost trace data",
+	[INTEL_PT_ERR_UNK]    = "Unknown error!",
+	[INTEL_PT_ERR_NELOOP] = "Never-ending loop",
+};
+
+int intel_pt__strerror(int code, char *buf, size_t buflen)
+{
+	if (code < 1 || code >= INTEL_PT_ERR_MAX)
+		code = INTEL_PT_ERR_UNK;
+	strlcpy(buf, intel_pt_err_msgs[code], buflen);
+	return 0;
+}
+
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
+				 uint64_t last_ip)
+{
+	uint64_t ip;
+
+	switch (packet->count) {
+	case 1:
+		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
+		     packet->payload;
+		break;
+	case 2:
+		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
+		     packet->payload;
+		break;
+	case 3:
+		ip = packet->payload;
+		/* Sign-extend 6-byte ip */
+		if (ip & (uint64_t)0x800000000000ULL)
+			ip |= (uint64_t)0xffff000000000000ULL;
+		break;
+	case 4:
+		ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+		     packet->payload;
+		break;
+	case 6:
+		ip = packet->payload;
+		break;
+	default:
+		return 0;
+	}
+
+	return ip;
+}
+
+static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
+{
+	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+	decoder->have_last_ip = true;
+}
+
+static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
+{
+	intel_pt_set_last_ip(decoder);
+	decoder->ip = decoder->last_ip;
+}
+
+static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
+{
+	intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
+			    decoder->buf);
+}
+
+static int intel_pt_bug(struct intel_pt_decoder *decoder)
+{
+	intel_pt_log("ERROR: Internal error\n");
+	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+	return -ENOSYS;
+}
+
+static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
+{
+	decoder->tx_flags = 0;
+}
+
+static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
+{
+	decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
+}
+
+static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
+{
+	intel_pt_clear_tx_flags(decoder);
+	decoder->have_tma = false;
+	decoder->pkt_len = 1;
+	decoder->pkt_step = 1;
+	intel_pt_decoder_log_packet(decoder);
+	if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
+		intel_pt_log("ERROR: Bad packet\n");
+		decoder->pkt_state = INTEL_PT_STATE_ERR1;
+	}
+	return -EBADMSG;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_buffer buffer = { .buf = 0, };
+	int ret;
+
+	decoder->pkt_step = 0;
+
+	intel_pt_log("Getting more data\n");
+	ret = decoder->get_trace(&buffer, decoder->data);
+	if (ret)
+		return ret;
+	decoder->buf = buffer.buf;
+	decoder->len = buffer.len;
+	if (!decoder->len) {
+		intel_pt_log("No more data\n");
+		return -ENODATA;
+	}
+	if (!buffer.consecutive) {
+		decoder->ip = 0;
+		decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+		decoder->ref_timestamp = buffer.ref_timestamp;
+		decoder->timestamp = 0;
+		decoder->have_tma = false;
+		decoder->state.trace_nr = buffer.trace_nr;
+		intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
+			     decoder->ref_timestamp);
+		return -ENOLINK;
+	}
+
+	return 0;
+}
+
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
+{
+	if (!decoder->next_buf)
+		return intel_pt_get_data(decoder);
+
+	decoder->buf = decoder->next_buf;
+	decoder->len = decoder->next_len;
+	decoder->next_buf = 0;
+	decoder->next_len = 0;
+	return 0;
+}
+
+static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
+{
+	unsigned char *buf = decoder->temp_buf;
+	size_t old_len, len, n;
+	int ret;
+
+	old_len = decoder->len;
+	len = decoder->len;
+	memcpy(buf, decoder->buf, len);
+
+	ret = intel_pt_get_data(decoder);
+	if (ret) {
+		decoder->pos += old_len;
+		return ret < 0 ? ret : -EINVAL;
+	}
+
+	n = INTEL_PT_PKT_MAX_SZ - len;
+	if (n > decoder->len)
+		n = decoder->len;
+	memcpy(buf + len, decoder->buf, n);
+	len += n;
+
+	ret = intel_pt_get_packet(buf, len, &decoder->packet);
+	if (ret < (int)old_len) {
+		decoder->next_buf = decoder->buf;
+		decoder->next_len = decoder->len;
+		decoder->buf = buf;
+		decoder->len = old_len;
+		return intel_pt_bad_packet(decoder);
+	}
+
+	decoder->next_buf = decoder->buf + (ret - old_len);
+	decoder->next_len = decoder->len - (ret - old_len);
+
+	decoder->buf = buf;
+	decoder->len = ret;
+
+	return ret;
+}
+
+struct intel_pt_pkt_info {
+	struct intel_pt_decoder	  *decoder;
+	struct intel_pt_pkt       packet;
+	uint64_t                  pos;
+	int                       pkt_len;
+	int                       last_packet_type;
+	void                      *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+				  intel_pt_pkt_cb_t cb, void *data)
+{
+	struct intel_pt_pkt_info pkt_info;
+	const unsigned char *buf = decoder->buf;
+	size_t len = decoder->len;
+	int ret;
+
+	pkt_info.decoder          = decoder;
+	pkt_info.pos              = decoder->pos;
+	pkt_info.pkt_len          = decoder->pkt_step;
+	pkt_info.last_packet_type = decoder->last_packet_type;
+	pkt_info.data             = data;
+
+	while (1) {
+		do {
+			pkt_info.pos += pkt_info.pkt_len;
+			buf          += pkt_info.pkt_len;
+			len          -= pkt_info.pkt_len;
+
+			if (!len)
+				return INTEL_PT_NEED_MORE_BYTES;
+
+			ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+			if (!ret)
+				return INTEL_PT_NEED_MORE_BYTES;
+			if (ret < 0)
+				return ret;
+
+			pkt_info.pkt_len = ret;
+		} while (pkt_info.packet.type == INTEL_PT_PAD);
+
+		ret = cb(&pkt_info);
+		if (ret)
+			return 0;
+
+		pkt_info.last_packet_type = pkt_info.packet.type;
+	}
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+	uint64_t        cycle_cnt;
+	unsigned int    cbr;
+	uint32_t        last_mtc;
+	uint64_t        ctc_timestamp;
+	uint64_t        ctc_delta;
+	uint64_t        tsc_timestamp;
+	uint64_t        timestamp;
+	bool            have_tma;
+	bool            fixup_last_mtc;
+	bool            from_mtc;
+	double          cbr_cyc_to_tsc;
+};
+
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+				    uint32_t *last_mtc)
+{
+	uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+	uint32_t mask = ~(first_missing_bit - 1);
+
+	*last_mtc |= mtc & mask;
+	if (*last_mtc >= mtc) {
+		*last_mtc -= first_missing_bit;
+		*last_mtc &= 0xff;
+	}
+}
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_decoder *decoder = pkt_info->decoder;
+	struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+	uint64_t timestamp;
+	double cyc_to_tsc;
+	unsigned int cbr;
+	uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+	switch (pkt_info->packet.type) {
+	case INTEL_PT_TNT:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TIP:
+	case INTEL_PT_FUP:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PIP:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_PAD:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_MNT:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
+		return 0;
+
+	case INTEL_PT_MTC:
+		if (!data->have_tma)
+			return 0;
+
+		mtc = pkt_info->packet.payload;
+		if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+			data->fixup_last_mtc = false;
+			intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+						&data->last_mtc);
+		}
+		if (mtc > data->last_mtc)
+			mtc_delta = mtc - data->last_mtc;
+		else
+			mtc_delta = mtc + 256 - data->last_mtc;
+		data->ctc_delta += mtc_delta << decoder->mtc_shift;
+		data->last_mtc = mtc;
+
+		if (decoder->tsc_ctc_mult) {
+			timestamp = data->ctc_timestamp +
+				data->ctc_delta * decoder->tsc_ctc_mult;
+		} else {
+			timestamp = data->ctc_timestamp +
+				multdiv(data->ctc_delta,
+					decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+		}
+
+		if (timestamp < data->timestamp)
+			return 1;
+
+		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+			data->timestamp = timestamp;
+			return 0;
+		}
+
+		break;
+
+	case INTEL_PT_TSC:
+		/*
+		 * For now, do not support using TSC packets - refer
+		 * intel_pt_calc_cyc_to_tsc().
+		 */
+		if (data->from_mtc)
+			return 1;
+		timestamp = pkt_info->packet.payload |
+			    (data->timestamp & (0xffULL << 56));
+		if (data->from_mtc && timestamp < data->timestamp &&
+		    data->timestamp - timestamp < decoder->tsc_slip)
+			return 1;
+		if (timestamp < data->timestamp)
+			timestamp += (1ULL << 56);
+		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+			if (data->from_mtc)
+				return 1;
+			data->tsc_timestamp = timestamp;
+			data->timestamp = timestamp;
+			return 0;
+		}
+		break;
+
+	case INTEL_PT_TMA:
+		if (data->from_mtc)
+			return 1;
+
+		if (!decoder->tsc_ctc_ratio_d)
+			return 0;
+
+		ctc = pkt_info->packet.payload;
+		fc = pkt_info->packet.count;
+		ctc_rem = ctc & decoder->ctc_rem_mask;
+
+		data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+		data->ctc_timestamp = data->tsc_timestamp - fc;
+		if (decoder->tsc_ctc_mult) {
+			data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+		} else {
+			data->ctc_timestamp -=
+				multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+		}
+
+		data->ctc_delta = 0;
+		data->have_tma = true;
+		data->fixup_last_mtc = true;
+
+		return 0;
+
+	case INTEL_PT_CYC:
+		data->cycle_cnt += pkt_info->packet.payload;
+		return 0;
+
+	case INTEL_PT_CBR:
+		cbr = pkt_info->packet.payload;
+		if (data->cbr && data->cbr != cbr)
+			return 1;
+		data->cbr = cbr;
+		data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+		return 0;
+
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
+	case INTEL_PT_OVF:
+	case INTEL_PT_BAD: /* Does not happen */
+	default:
+		return 1;
+	}
+
+	if (!data->cbr && decoder->cbr) {
+		data->cbr = decoder->cbr;
+		data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+	}
+
+	if (!data->cycle_cnt)
+		return 1;
+
+	cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+	if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+	    cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+		return 1;
+	}
+
+	decoder->calc_cyc_to_tsc = cyc_to_tsc;
+	decoder->have_calc_cyc_to_tsc = true;
+
+	if (data->cbr) {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+	} else {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+			     cyc_to_tsc, pkt_info->pos);
+	}
+
+	return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+				     bool from_mtc)
+{
+	struct intel_pt_calc_cyc_to_tsc_info data = {
+		.cycle_cnt      = 0,
+		.cbr            = 0,
+		.last_mtc       = decoder->last_mtc,
+		.ctc_timestamp  = decoder->ctc_timestamp,
+		.ctc_delta      = decoder->ctc_delta,
+		.tsc_timestamp  = decoder->tsc_timestamp,
+		.timestamp      = decoder->timestamp,
+		.have_tma       = decoder->have_tma,
+		.fixup_last_mtc = decoder->fixup_last_mtc,
+		.from_mtc       = from_mtc,
+		.cbr_cyc_to_tsc = 0,
+	};
+
+	/*
+	 * For now, do not support using TSC packets for at least the reasons:
+	 * 1) timing might have stopped
+	 * 2) TSC packets within PSB+ can slip against CYC packets
+	 */
+	if (!from_mtc)
+		return;
+
+	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
+static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
+{
+	int ret;
+
+	decoder->last_packet_type = decoder->packet.type;
+
+	do {
+		decoder->pos += decoder->pkt_step;
+		decoder->buf += decoder->pkt_step;
+		decoder->len -= decoder->pkt_step;
+
+		if (!decoder->len) {
+			ret = intel_pt_get_next_data(decoder);
+			if (ret)
+				return ret;
+		}
+
+		ret = intel_pt_get_packet(decoder->buf, decoder->len,
+					  &decoder->packet);
+		if (ret == INTEL_PT_NEED_MORE_BYTES &&
+		    decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
+			ret = intel_pt_get_split_packet(decoder);
+			if (ret < 0)
+				return ret;
+		}
+		if (ret <= 0)
+			return intel_pt_bad_packet(decoder);
+
+		decoder->pkt_len = ret;
+		decoder->pkt_step = ret;
+		intel_pt_decoder_log_packet(decoder);
+	} while (decoder->packet.type == INTEL_PT_PAD);
+
+	return 0;
+}
+
+static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp, masked_timestamp;
+
+	timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+	masked_timestamp = timestamp & decoder->period_mask;
+	if (decoder->continuous_period) {
+		if (masked_timestamp != decoder->last_masked_timestamp)
+			return 1;
+	} else {
+		timestamp += 1;
+		masked_timestamp = timestamp & decoder->period_mask;
+		if (masked_timestamp != decoder->last_masked_timestamp) {
+			decoder->last_masked_timestamp = masked_timestamp;
+			decoder->continuous_period = true;
+		}
+	}
+	return decoder->period_ticks - (timestamp - masked_timestamp);
+}
+
+static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
+{
+	switch (decoder->period_type) {
+	case INTEL_PT_PERIOD_INSTRUCTIONS:
+		return decoder->period - decoder->period_insn_cnt;
+	case INTEL_PT_PERIOD_TICKS:
+		return intel_pt_next_period(decoder);
+	case INTEL_PT_PERIOD_NONE:
+	case INTEL_PT_PERIOD_MTC:
+	default:
+		return 0;
+	}
+}
+
+static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp, masked_timestamp;
+
+	switch (decoder->period_type) {
+	case INTEL_PT_PERIOD_INSTRUCTIONS:
+		decoder->period_insn_cnt = 0;
+		break;
+	case INTEL_PT_PERIOD_TICKS:
+		timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+		masked_timestamp = timestamp & decoder->period_mask;
+		decoder->last_masked_timestamp = masked_timestamp;
+		break;
+	case INTEL_PT_PERIOD_NONE:
+	case INTEL_PT_PERIOD_MTC:
+	default:
+		break;
+	}
+
+	decoder->state.type |= INTEL_PT_INSTRUCTION;
+}
+
+static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
+			      struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+	uint64_t max_insn_cnt, insn_cnt = 0;
+	int err;
+
+	if (!decoder->mtc_insn)
+		decoder->mtc_insn = true;
+
+	max_insn_cnt = intel_pt_next_sample(decoder);
+
+	err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
+				 max_insn_cnt, decoder->data);
+
+	decoder->tot_insn_cnt += insn_cnt;
+	decoder->timestamp_insn_cnt += insn_cnt;
+	decoder->sample_insn_cnt += insn_cnt;
+	decoder->period_insn_cnt += insn_cnt;
+
+	if (err) {
+		decoder->no_progress = 0;
+		decoder->pkt_state = INTEL_PT_STATE_ERR2;
+		intel_pt_log_at("ERROR: Failed to get instruction",
+				decoder->ip);
+		if (err == -ENOENT)
+			return -ENOLINK;
+		return -EILSEQ;
+	}
+
+	if (ip && decoder->ip == ip) {
+		err = -EAGAIN;
+		goto out;
+	}
+
+	if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+		intel_pt_sample_insn(decoder);
+
+	if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
+		decoder->state.type = INTEL_PT_INSTRUCTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->ip += intel_pt_insn->length;
+		err = INTEL_PT_RETURN;
+		goto out;
+	}
+
+	if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
+		/* Zero-length calls are excluded */
+		if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
+		    intel_pt_insn->rel) {
+			err = intel_pt_push(&decoder->stack, decoder->ip +
+					    intel_pt_insn->length);
+			if (err)
+				goto out;
+		}
+	} else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
+		decoder->ret_addr = intel_pt_pop(&decoder->stack);
+	}
+
+	if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
+		int cnt = decoder->no_progress++;
+
+		decoder->state.from_ip = decoder->ip;
+		decoder->ip += intel_pt_insn->length +
+				intel_pt_insn->rel;
+		decoder->state.to_ip = decoder->ip;
+		err = INTEL_PT_RETURN;
+
+		/*
+		 * Check for being stuck in a loop.  This can happen if a
+		 * decoder error results in the decoder erroneously setting the
+		 * ip to an address that is itself in an infinite loop that
+		 * consumes no packets.  When that happens, there must be an
+		 * unconditional branch.
+		 */
+		if (cnt) {
+			if (cnt == 1) {
+				decoder->stuck_ip = decoder->state.to_ip;
+				decoder->stuck_ip_prd = 1;
+				decoder->stuck_ip_cnt = 1;
+			} else if (cnt > INTEL_PT_MAX_LOOPS ||
+				   decoder->state.to_ip == decoder->stuck_ip) {
+				intel_pt_log_at("ERROR: Never-ending loop",
+						decoder->state.to_ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+				err = -ELOOP;
+				goto out;
+			} else if (!--decoder->stuck_ip_cnt) {
+				decoder->stuck_ip_prd += 1;
+				decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
+				decoder->stuck_ip = decoder->state.to_ip;
+			}
+		}
+		goto out_no_progress;
+	}
+out:
+	decoder->no_progress = 0;
+out_no_progress:
+	decoder->state.insn_op = intel_pt_insn->op;
+	decoder->state.insn_len = intel_pt_insn->length;
+	memcpy(decoder->state.insn, intel_pt_insn->buf,
+	       INTEL_PT_INSN_BUF_SZ);
+
+	if (decoder->tx_flags & INTEL_PT_IN_TX)
+		decoder->state.flags |= INTEL_PT_IN_TX;
+
+	return err;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+{
+	bool ret = false;
+
+	if (decoder->set_fup_tx_flags) {
+		decoder->set_fup_tx_flags = false;
+		decoder->tx_flags = decoder->fup_tx_flags;
+		decoder->state.type = INTEL_PT_TRANSACTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.flags = decoder->fup_tx_flags;
+		return true;
+	}
+	if (decoder->set_fup_ptw) {
+		decoder->set_fup_ptw = false;
+		decoder->state.type = INTEL_PT_PTW;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.ptw_payload = decoder->fup_ptw_payload;
+		return true;
+	}
+	if (decoder->set_fup_mwait) {
+		decoder->set_fup_mwait = false;
+		decoder->state.type = INTEL_PT_MWAIT_OP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.mwait_payload = decoder->fup_mwait_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_pwre) {
+		decoder->set_fup_pwre = false;
+		decoder->state.type |= INTEL_PT_PWR_ENTRY;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.pwre_payload = decoder->fup_pwre_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_exstop) {
+		decoder->set_fup_exstop = false;
+		decoder->state.type |= INTEL_PT_EX_STOP;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		ret = true;
+	}
+	return ret;
+}
+
+static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	uint64_t ip;
+	int err;
+
+	ip = decoder->last_ip;
+
+	while (1) {
+		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
+		if (err == INTEL_PT_RETURN)
+			return 0;
+		if (err == -EAGAIN) {
+			if (intel_pt_fup_event(decoder))
+				return 0;
+			return err;
+		}
+		decoder->set_fup_tx_flags = false;
+		if (err)
+			return err;
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+			intel_pt_log_at("ERROR: Unexpected indirect branch",
+					decoder->ip);
+			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+			return -ENOENT;
+		}
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+			intel_pt_log_at("ERROR: Unexpected conditional branch",
+					decoder->ip);
+			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+			return -ENOENT;
+		}
+
+		intel_pt_bug(decoder);
+	}
+}
+
+static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	int err;
+
+	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+	if (err == INTEL_PT_RETURN &&
+	    decoder->pgd_ip &&
+	    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+	    (decoder->state.type & INTEL_PT_BRANCH) &&
+	    decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+		/* Unconditional branch leaving filter region */
+		decoder->no_progress = 0;
+		decoder->pge = false;
+		decoder->continuous_period = false;
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->state.to_ip = 0;
+		return 0;
+	}
+	if (err == INTEL_PT_RETURN)
+		return 0;
+	if (err)
+		return err;
+
+	if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+		if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			if (decoder->packet.count != 0)
+				decoder->ip = decoder->last_ip;
+		} else {
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->state.from_ip = decoder->ip;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				decoder->state.to_ip = decoder->last_ip;
+				decoder->ip = decoder->last_ip;
+			}
+		}
+		return 0;
+	}
+
+	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+		uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+				 intel_pt_insn.rel;
+
+		if (decoder->pgd_ip &&
+		    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+		    decoder->pgd_ip(to_ip, decoder->data)) {
+			/* Conditional branch leaving filter region */
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->ip = to_ip;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+		}
+		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
+				decoder->ip);
+		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+		return -ENOENT;
+	}
+
+	return intel_pt_bug(decoder);
+}
+
+static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+{
+	struct intel_pt_insn intel_pt_insn;
+	int err;
+
+	while (1) {
+		err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+		if (err == INTEL_PT_RETURN)
+			return 0;
+		if (err)
+			return err;
+
+		if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+			if (!decoder->return_compression) {
+				intel_pt_log_at("ERROR: RET when expecting conditional branch",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				return -ENOENT;
+			}
+			if (!decoder->ret_addr) {
+				intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				return -ENOENT;
+			}
+			if (!(decoder->tnt.payload & BIT63)) {
+				intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				return -ENOENT;
+			}
+			decoder->tnt.count -= 1;
+			if (!decoder->tnt.count)
+				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->tnt.payload <<= 1;
+			decoder->state.from_ip = decoder->ip;
+			decoder->ip = decoder->ret_addr;
+			decoder->state.to_ip = decoder->ip;
+			return 0;
+		}
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+			/* Handle deferred TIPs */
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type != INTEL_PT_TIP ||
+			    decoder->packet.count == 0) {
+				intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
+						decoder->ip);
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+				decoder->pkt_step = 0;
+				return -ENOENT;
+			}
+			intel_pt_set_last_ip(decoder);
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = decoder->last_ip;
+			decoder->ip = decoder->last_ip;
+			return 0;
+		}
+
+		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+			decoder->tnt.count -= 1;
+			if (!decoder->tnt.count)
+				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			if (decoder->tnt.payload & BIT63) {
+				decoder->tnt.payload <<= 1;
+				decoder->state.from_ip = decoder->ip;
+				decoder->ip += intel_pt_insn.length +
+					       intel_pt_insn.rel;
+				decoder->state.to_ip = decoder->ip;
+				return 0;
+			}
+			/* Instruction sample for a non-taken branch */
+			if (decoder->state.type & INTEL_PT_INSTRUCTION) {
+				decoder->tnt.payload <<= 1;
+				decoder->state.type = INTEL_PT_INSTRUCTION;
+				decoder->state.from_ip = decoder->ip;
+				decoder->state.to_ip = 0;
+				decoder->ip += intel_pt_insn.length;
+				return 0;
+			}
+			decoder->ip += intel_pt_insn.length;
+			if (!decoder->tnt.count)
+				return -EAGAIN;
+			decoder->tnt.payload <<= 1;
+			continue;
+		}
+
+		return intel_pt_bug(decoder);
+	}
+}
+
+static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
+{
+	unsigned int fup_tx_flags;
+	int err;
+
+	fup_tx_flags = decoder->packet.payload &
+		       (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
+	err = intel_pt_get_next_packet(decoder);
+	if (err)
+		return err;
+	if (decoder->packet.type == INTEL_PT_FUP) {
+		decoder->fup_tx_flags = fup_tx_flags;
+		decoder->set_fup_tx_flags = true;
+		if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
+			*no_tip = true;
+	} else {
+		intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
+				decoder->pos);
+		intel_pt_update_in_tx(decoder);
+	}
+	return 0;
+}
+
+static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp;
+
+	decoder->have_tma = false;
+
+	if (decoder->ref_timestamp) {
+		timestamp = decoder->packet.payload |
+			    (decoder->ref_timestamp & (0xffULL << 56));
+		if (timestamp < decoder->ref_timestamp) {
+			if (decoder->ref_timestamp - timestamp > (1ULL << 55))
+				timestamp += (1ULL << 56);
+		} else {
+			if (timestamp - decoder->ref_timestamp > (1ULL << 55))
+				timestamp -= (1ULL << 56);
+		}
+		decoder->tsc_timestamp = timestamp;
+		decoder->timestamp = timestamp;
+		decoder->ref_timestamp = 0;
+		decoder->timestamp_insn_cnt = 0;
+	} else if (decoder->timestamp) {
+		timestamp = decoder->packet.payload |
+			    (decoder->timestamp & (0xffULL << 56));
+		decoder->tsc_timestamp = timestamp;
+		if (timestamp < decoder->timestamp &&
+		    decoder->timestamp - timestamp < decoder->tsc_slip) {
+			intel_pt_log_to("Suppressing backwards timestamp",
+					timestamp);
+			timestamp = decoder->timestamp;
+		}
+		if (timestamp < decoder->timestamp) {
+			intel_pt_log_to("Wraparound timestamp", timestamp);
+			timestamp += (1ULL << 56);
+			decoder->tsc_timestamp = timestamp;
+		}
+		decoder->timestamp = timestamp;
+		decoder->timestamp_insn_cnt = 0;
+	}
+
+	if (decoder->last_packet_type == INTEL_PT_CYC) {
+		decoder->cyc_ref_timestamp = decoder->timestamp;
+		decoder->cycle_cnt = 0;
+		decoder->have_calc_cyc_to_tsc = false;
+		intel_pt_calc_cyc_to_tsc(decoder, false);
+	}
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+{
+	intel_pt_log("ERROR: Buffer overflow\n");
+	intel_pt_clear_tx_flags(decoder);
+	decoder->have_tma = false;
+	decoder->cbr = 0;
+	decoder->timestamp_insn_cnt = 0;
+	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+	decoder->overflow = true;
+	return -EOVERFLOW;
+}
+
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+	uint32_t ctc = decoder->packet.payload;
+	uint32_t fc = decoder->packet.count;
+	uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+	if (!decoder->tsc_ctc_ratio_d)
+		return;
+
+	decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+	decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+	if (decoder->tsc_ctc_mult) {
+		decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+	} else {
+		decoder->ctc_timestamp -= multdiv(ctc_rem,
+						  decoder->tsc_ctc_ratio_n,
+						  decoder->tsc_ctc_ratio_d);
+	}
+	decoder->ctc_delta = 0;
+	decoder->have_tma = true;
+	decoder->fixup_last_mtc = true;
+	intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x  CTC rem %#x\n",
+		     decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp;
+	uint32_t mtc, mtc_delta;
+
+	if (!decoder->have_tma)
+		return;
+
+	mtc = decoder->packet.payload;
+
+	if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+		decoder->fixup_last_mtc = false;
+		intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+					&decoder->last_mtc);
+	}
+
+	if (mtc > decoder->last_mtc)
+		mtc_delta = mtc - decoder->last_mtc;
+	else
+		mtc_delta = mtc + 256 - decoder->last_mtc;
+
+	decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+	if (decoder->tsc_ctc_mult) {
+		timestamp = decoder->ctc_timestamp +
+			    decoder->ctc_delta * decoder->tsc_ctc_mult;
+	} else {
+		timestamp = decoder->ctc_timestamp +
+			    multdiv(decoder->ctc_delta,
+				    decoder->tsc_ctc_ratio_n,
+				    decoder->tsc_ctc_ratio_d);
+	}
+
+	if (timestamp < decoder->timestamp)
+		intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+			     timestamp, decoder->timestamp);
+	else
+		decoder->timestamp = timestamp;
+
+	decoder->timestamp_insn_cnt = 0;
+	decoder->last_mtc = mtc;
+
+	if (decoder->last_packet_type == INTEL_PT_CYC) {
+		decoder->cyc_ref_timestamp = decoder->timestamp;
+		decoder->cycle_cnt = 0;
+		decoder->have_calc_cyc_to_tsc = false;
+		intel_pt_calc_cyc_to_tsc(decoder, true);
+	}
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+	unsigned int cbr = decoder->packet.payload & 0xff;
+
+	decoder->cbr_payload = decoder->packet.payload;
+
+	if (decoder->cbr == cbr)
+		return;
+
+	decoder->cbr = cbr;
+	decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+	decoder->have_cyc = true;
+
+	decoder->cycle_cnt += decoder->packet.payload;
+
+	if (!decoder->cyc_ref_timestamp)
+		return;
+
+	if (decoder->have_calc_cyc_to_tsc)
+		timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+	else if (decoder->cbr)
+		timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+	else
+		return;
+
+	if (timestamp < decoder->timestamp)
+		intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+			     timestamp, decoder->timestamp);
+	else
+		decoder->timestamp = timestamp;
+
+	decoder->timestamp_insn_cnt = 0;
+}
+
+/* Walk PSB+ packets when already in sync. */
+static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_PSBEND:
+			return 0;
+
+		case INTEL_PT_TIP_PGD:
+		case INTEL_PT_TIP_PGE:
+		case INTEL_PT_TIP:
+		case INTEL_PT_TNT:
+		case INTEL_PT_TRACESTOP:
+		case INTEL_PT_BAD:
+		case INTEL_PT_PSB:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+			decoder->have_tma = false;
+			intel_pt_log("ERROR: Unexpected packet\n");
+			return -EAGAIN;
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			decoder->exec_mode = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_PIP:
+			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+			break;
+
+		case INTEL_PT_FUP:
+			decoder->pge = true;
+			if (decoder->packet.count)
+				intel_pt_set_last_ip(decoder);
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			intel_pt_update_in_tx(decoder);
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+				decoder->state.type |= INTEL_PT_INSTRUCTION;
+			break;
+
+		case INTEL_PT_CYC:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		default:
+			break;
+		}
+	}
+}
+
+static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
+		decoder->tx_flags = 0;
+		decoder->state.flags &= ~INTEL_PT_IN_TX;
+		decoder->state.flags |= INTEL_PT_ABORT_TX;
+	} else {
+		decoder->state.flags |= INTEL_PT_ASYNC;
+	}
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TNT:
+		case INTEL_PT_FUP:
+		case INTEL_PT_TRACESTOP:
+		case INTEL_PT_PSB:
+		case INTEL_PT_TSC:
+		case INTEL_PT_TMA:
+		case INTEL_PT_CBR:
+		case INTEL_PT_MODE_TSX:
+		case INTEL_PT_BAD:
+		case INTEL_PT_PSBEND:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+			intel_pt_log("ERROR: Missing TIP after FUP\n");
+			decoder->pkt_state = INTEL_PT_STATE_ERR3;
+			decoder->pkt_step = 0;
+			return -ENOENT;
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_TIP_PGD:
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			if (decoder->packet.count != 0) {
+				intel_pt_set_ip(decoder);
+				intel_pt_log("Omitting PGD ip " x64_fmt "\n",
+					     decoder->ip);
+			}
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			return 0;
+
+		case INTEL_PT_TIP_PGE:
+			decoder->pge = true;
+			intel_pt_log("Omitting PGE ip " x64_fmt "\n",
+				     decoder->ip);
+			decoder->state.from_ip = 0;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				intel_pt_set_ip(decoder);
+				decoder->state.to_ip = decoder->ip;
+			}
+			return 0;
+
+		case INTEL_PT_TIP:
+			decoder->state.from_ip = decoder->ip;
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				intel_pt_set_ip(decoder);
+				decoder->state.to_ip = decoder->ip;
+			}
+			return 0;
+
+		case INTEL_PT_PIP:
+			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+				decoder->state.type |= INTEL_PT_INSTRUCTION;
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			decoder->exec_mode = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+			break;
+
+		default:
+			return intel_pt_bug(decoder);
+		}
+	}
+}
+
+static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+{
+	bool no_tip = false;
+	int err;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+next:
+		switch (decoder->packet.type) {
+		case INTEL_PT_TNT:
+			if (!decoder->packet.count)
+				break;
+			decoder->tnt = decoder->packet;
+			decoder->pkt_state = INTEL_PT_STATE_TNT;
+			err = intel_pt_walk_tnt(decoder);
+			if (err == -EAGAIN)
+				break;
+			return err;
+
+		case INTEL_PT_TIP_PGD:
+			if (decoder->packet.count != 0)
+				intel_pt_set_last_ip(decoder);
+			decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
+			return intel_pt_walk_tip(decoder);
+
+		case INTEL_PT_TIP_PGE: {
+			decoder->pge = true;
+			if (decoder->packet.count == 0) {
+				intel_pt_log_at("Skipping zero TIP.PGE",
+						decoder->pos);
+				break;
+			}
+			intel_pt_set_ip(decoder);
+			decoder->state.from_ip = 0;
+			decoder->state.to_ip = decoder->ip;
+			return 0;
+		}
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_TIP:
+			if (decoder->packet.count != 0)
+				intel_pt_set_last_ip(decoder);
+			decoder->pkt_state = INTEL_PT_STATE_TIP;
+			return intel_pt_walk_tip(decoder);
+
+		case INTEL_PT_FUP:
+			if (decoder->packet.count == 0) {
+				intel_pt_log_at("Skipping zero FUP",
+						decoder->pos);
+				no_tip = false;
+				break;
+			}
+			intel_pt_set_last_ip(decoder);
+			if (!decoder->branch_enable) {
+				decoder->ip = decoder->last_ip;
+				if (intel_pt_fup_event(decoder))
+					return 0;
+				no_tip = false;
+				break;
+			}
+			if (decoder->set_fup_mwait)
+				no_tip = true;
+			err = intel_pt_walk_fup(decoder);
+			if (err != -EAGAIN) {
+				if (err)
+					return err;
+				if (no_tip)
+					decoder->pkt_state =
+						INTEL_PT_STATE_FUP_NO_TIP;
+				else
+					decoder->pkt_state = INTEL_PT_STATE_FUP;
+				return 0;
+			}
+			if (no_tip) {
+				no_tip = false;
+				break;
+			}
+			return intel_pt_walk_fup_tip(decoder);
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			intel_pt_clear_tx_flags(decoder);
+			decoder->have_tma = false;
+			break;
+
+		case INTEL_PT_PSB:
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
+			intel_pt_clear_stack(&decoder->stack);
+			err = intel_pt_walk_psbend(decoder);
+			if (err == -EAGAIN)
+				goto next;
+			if (err)
+				return err;
+			break;
+
+		case INTEL_PT_PIP:
+			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+				break;
+			/*
+			 * Ensure that there has been an instruction since the
+			 * last MTC.
+			 */
+			if (!decoder->mtc_insn)
+				break;
+			decoder->mtc_insn = false;
+			/* Ensure that there is a timestamp */
+			if (!decoder->timestamp)
+				break;
+			decoder->state.type = INTEL_PT_INSTRUCTION;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->mtc_insn = false;
+			return 0;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			if (!decoder->branch_enable &&
+			    decoder->cbr != decoder->cbr_seen) {
+				decoder->cbr_seen = decoder->cbr;
+				decoder->state.type = INTEL_PT_CBR_CHG;
+				decoder->state.from_ip = decoder->ip;
+				decoder->state.to_ip = 0;
+				decoder->state.cbr_payload =
+							decoder->packet.payload;
+				return 0;
+			}
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			decoder->exec_mode = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			/* MODE_TSX need not be followed by FUP */
+			if (!decoder->pge) {
+				intel_pt_update_in_tx(decoder);
+				break;
+			}
+			err = intel_pt_mode_tsx(decoder, &no_tip);
+			if (err)
+				return err;
+			goto next;
+
+		case INTEL_PT_BAD: /* Does not happen */
+			return intel_pt_bug(decoder);
+
+		case INTEL_PT_PSBEND:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+			break;
+
+		case INTEL_PT_PTWRITE_IP:
+			decoder->fup_ptw_payload = decoder->packet.payload;
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_ptw = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_PTWRITE:
+			decoder->state.type = INTEL_PT_PTW;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.ptw_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_MWAIT:
+			decoder->fup_mwait_payload = decoder->packet.payload;
+			decoder->set_fup_mwait = true;
+			break;
+
+		case INTEL_PT_PWRE:
+			if (decoder->set_fup_mwait) {
+				decoder->fup_pwre_payload =
+							decoder->packet.payload;
+				decoder->set_fup_pwre = true;
+				break;
+			}
+			decoder->state.type = INTEL_PT_PWR_ENTRY;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_EXSTOP_IP:
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_exstop = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_EXSTOP:
+			decoder->state.type = INTEL_PT_EX_STOP;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
+		case INTEL_PT_PWRX:
+			decoder->state.type = INTEL_PT_PWR_EXIT;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
+		default:
+			return intel_pt_bug(decoder);
+		}
+	}
+}
+
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+	return decoder->packet.count &&
+	       (decoder->have_last_ip || decoder->packet.count == 3 ||
+		decoder->packet.count == 6);
+}
+
+/* Walk PSB+ packets to get in sync. */
+static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TIP_PGD:
+			decoder->continuous_period = false;
+			__fallthrough;
+		case INTEL_PT_TIP_PGE:
+		case INTEL_PT_TIP:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+			intel_pt_log("ERROR: Unexpected packet\n");
+			return -ENOENT;
+
+		case INTEL_PT_FUP:
+			decoder->pge = true;
+			if (intel_pt_have_ip(decoder)) {
+				uint64_t current_ip = decoder->ip;
+
+				intel_pt_set_ip(decoder);
+				if (current_ip)
+					intel_pt_log_to("Setting IP",
+							decoder->ip);
+			}
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_PIP:
+			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			decoder->exec_mode = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			intel_pt_update_in_tx(decoder);
+			break;
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			intel_pt_clear_tx_flags(decoder);
+			__fallthrough;
+
+		case INTEL_PT_TNT:
+			decoder->have_tma = false;
+			intel_pt_log("ERROR: Unexpected packet\n");
+			if (decoder->ip)
+				decoder->pkt_state = INTEL_PT_STATE_ERR4;
+			else
+				decoder->pkt_state = INTEL_PT_STATE_ERR3;
+			return -ENOENT;
+
+		case INTEL_PT_BAD: /* Does not happen */
+			return intel_pt_bug(decoder);
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_PSBEND:
+			return 0;
+
+		case INTEL_PT_PSB:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		default:
+			break;
+		}
+	}
+}
+
+static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	while (1) {
+		err = intel_pt_get_next_packet(decoder);
+		if (err)
+			return err;
+
+		switch (decoder->packet.type) {
+		case INTEL_PT_TIP_PGD:
+			decoder->continuous_period = false;
+			__fallthrough;
+		case INTEL_PT_TIP_PGE:
+		case INTEL_PT_TIP:
+			decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (decoder->ip)
+				return 0;
+			break;
+
+		case INTEL_PT_FUP:
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (decoder->ip)
+				return 0;
+			break;
+
+		case INTEL_PT_MTC:
+			intel_pt_calc_mtc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TSC:
+			intel_pt_calc_tsc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_TMA:
+			intel_pt_calc_tma(decoder);
+			break;
+
+		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
+			break;
+
+		case INTEL_PT_CBR:
+			intel_pt_calc_cbr(decoder);
+			break;
+
+		case INTEL_PT_PIP:
+			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+			break;
+
+		case INTEL_PT_MODE_EXEC:
+			decoder->exec_mode = decoder->packet.payload;
+			break;
+
+		case INTEL_PT_MODE_TSX:
+			intel_pt_update_in_tx(decoder);
+			break;
+
+		case INTEL_PT_OVF:
+			return intel_pt_overflow(decoder);
+
+		case INTEL_PT_BAD: /* Does not happen */
+			return intel_pt_bug(decoder);
+
+		case INTEL_PT_TRACESTOP:
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			intel_pt_clear_tx_flags(decoder);
+			decoder->have_tma = false;
+			break;
+
+		case INTEL_PT_PSB:
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
+			intel_pt_clear_stack(&decoder->stack);
+			err = intel_pt_walk_psb(decoder);
+			if (err)
+				return err;
+			if (decoder->ip) {
+				/* Do not have a sample */
+				decoder->state.type = 0;
+				return 0;
+			}
+			break;
+
+		case INTEL_PT_TNT:
+		case INTEL_PT_PSBEND:
+		case INTEL_PT_VMCS:
+		case INTEL_PT_MNT:
+		case INTEL_PT_PAD:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
+		default:
+			break;
+		}
+	}
+}
+
+static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	decoder->set_fup_tx_flags = false;
+	decoder->set_fup_ptw = false;
+	decoder->set_fup_mwait = false;
+	decoder->set_fup_pwre = false;
+	decoder->set_fup_exstop = false;
+
+	if (!decoder->branch_enable) {
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->overflow = false;
+		decoder->state.type = 0; /* Do not have a sample */
+		return 0;
+	}
+
+	intel_pt_log("Scanning for full IP\n");
+	err = intel_pt_walk_to_ip(decoder);
+	if (err)
+		return err;
+
+	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	decoder->overflow = false;
+
+	decoder->state.from_ip = 0;
+	decoder->state.to_ip = decoder->ip;
+	intel_pt_log_to("Setting IP", decoder->ip);
+
+	return 0;
+}
+
+static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
+{
+	const unsigned char *end = decoder->buf + decoder->len;
+	size_t i;
+
+	for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
+		if (i > decoder->len)
+			continue;
+		if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
+			return i;
+	}
+	return 0;
+}
+
+static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
+{
+	size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
+	const char *psb = INTEL_PT_PSB_STR;
+
+	if (rest_psb > decoder->len ||
+	    memcmp(decoder->buf, psb + part_psb, rest_psb))
+		return 0;
+
+	return rest_psb;
+}
+
+static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
+				  int part_psb)
+{
+	int rest_psb, ret;
+
+	decoder->pos += decoder->len;
+	decoder->len = 0;
+
+	ret = intel_pt_get_next_data(decoder);
+	if (ret)
+		return ret;
+
+	rest_psb = intel_pt_rest_psb(decoder, part_psb);
+	if (!rest_psb)
+		return 0;
+
+	decoder->pos -= part_psb;
+	decoder->next_buf = decoder->buf + rest_psb;
+	decoder->next_len = decoder->len - rest_psb;
+	memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+	decoder->buf = decoder->temp_buf;
+	decoder->len = INTEL_PT_PSB_LEN;
+
+	return 0;
+}
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
+{
+	unsigned char *next;
+	int ret;
+
+	intel_pt_log("Scanning for PSB\n");
+	while (1) {
+		if (!decoder->len) {
+			ret = intel_pt_get_next_data(decoder);
+			if (ret)
+				return ret;
+		}
+
+		next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
+			      INTEL_PT_PSB_LEN);
+		if (!next) {
+			int part_psb;
+
+			part_psb = intel_pt_part_psb(decoder);
+			if (part_psb) {
+				ret = intel_pt_get_split_psb(decoder, part_psb);
+				if (ret)
+					return ret;
+			} else {
+				decoder->pos += decoder->len;
+				decoder->len = 0;
+			}
+			continue;
+		}
+
+		decoder->pkt_step = next - decoder->buf;
+		return intel_pt_get_next_packet(decoder);
+	}
+}
+
+static int intel_pt_sync(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	decoder->pge = false;
+	decoder->continuous_period = false;
+	decoder->have_last_ip = false;
+	decoder->last_ip = 0;
+	decoder->ip = 0;
+	intel_pt_clear_stack(&decoder->stack);
+
+	err = intel_pt_scan_for_psb(decoder);
+	if (err)
+		return err;
+
+	decoder->have_last_ip = true;
+	decoder->pkt_state = INTEL_PT_STATE_NO_IP;
+
+	err = intel_pt_walk_psb(decoder);
+	if (err)
+		return err;
+
+	if (decoder->ip) {
+		decoder->state.type = 0; /* Do not have a sample */
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+	} else {
+		return intel_pt_sync_ip(decoder);
+	}
+
+	return 0;
+}
+
+static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t est = decoder->sample_insn_cnt << 1;
+
+	if (!decoder->cbr || !decoder->max_non_turbo_ratio)
+		goto out;
+
+	est *= decoder->max_non_turbo_ratio;
+	est /= decoder->cbr;
+out:
+	return decoder->sample_timestamp + est;
+}
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+{
+	int err;
+
+	do {
+		decoder->state.type = INTEL_PT_BRANCH;
+		decoder->state.flags = 0;
+
+		switch (decoder->pkt_state) {
+		case INTEL_PT_STATE_NO_PSB:
+			err = intel_pt_sync(decoder);
+			break;
+		case INTEL_PT_STATE_NO_IP:
+			decoder->have_last_ip = false;
+			decoder->last_ip = 0;
+			decoder->ip = 0;
+			__fallthrough;
+		case INTEL_PT_STATE_ERR_RESYNC:
+			err = intel_pt_sync_ip(decoder);
+			break;
+		case INTEL_PT_STATE_IN_SYNC:
+			err = intel_pt_walk_trace(decoder);
+			break;
+		case INTEL_PT_STATE_TNT:
+			err = intel_pt_walk_tnt(decoder);
+			if (err == -EAGAIN)
+				err = intel_pt_walk_trace(decoder);
+			break;
+		case INTEL_PT_STATE_TIP:
+		case INTEL_PT_STATE_TIP_PGD:
+			err = intel_pt_walk_tip(decoder);
+			break;
+		case INTEL_PT_STATE_FUP:
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			err = intel_pt_walk_fup(decoder);
+			if (err == -EAGAIN)
+				err = intel_pt_walk_fup_tip(decoder);
+			else if (!err)
+				decoder->pkt_state = INTEL_PT_STATE_FUP;
+			break;
+		case INTEL_PT_STATE_FUP_NO_TIP:
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			err = intel_pt_walk_fup(decoder);
+			if (err == -EAGAIN)
+				err = intel_pt_walk_trace(decoder);
+			break;
+		default:
+			err = intel_pt_bug(decoder);
+			break;
+		}
+	} while (err == -ENOLINK);
+
+	if (err) {
+		decoder->state.err = intel_pt_ext_err(err);
+		decoder->state.from_ip = decoder->ip;
+		decoder->sample_timestamp = decoder->timestamp;
+		decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+	} else {
+		decoder->state.err = 0;
+		if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
+			decoder->cbr_seen = decoder->cbr;
+			decoder->state.type |= INTEL_PT_CBR_CHG;
+			decoder->state.cbr_payload = decoder->cbr_payload;
+		}
+		if (intel_pt_sample_time(decoder->pkt_state)) {
+			decoder->sample_timestamp = decoder->timestamp;
+			decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+		}
+	}
+
+	decoder->state.timestamp = decoder->sample_timestamp;
+	decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
+	decoder->state.cr3 = decoder->cr3;
+	decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+
+	return &decoder->state;
+}
+
+/**
+ * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the next PSB packet if
+ * there is one, otherwise the buffer pointer is unchanged.  If @buf is updated,
+ * @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
+{
+	unsigned char *next;
+
+	next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+	if (next) {
+		*len -= next - *buf;
+		*buf = next;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * intel_pt_step_psb - move buffer pointer to the start of the following PSB
+ *                     packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the following PSB packet
+ * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
+ * pointer is unchanged.  If @buf is updated, @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
+{
+	unsigned char *next;
+
+	if (!*len)
+		return false;
+
+	next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+	if (next) {
+		*len -= next - *buf;
+		*buf = next;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * intel_pt_last_psb - find the last PSB packet in a buffer.
+ * @buf: buffer
+ * @len: size of buffer
+ *
+ * This function finds the last PSB in a buffer.
+ *
+ * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
+ */
+static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
+{
+	const char *n = INTEL_PT_PSB_STR;
+	unsigned char *p;
+	size_t k;
+
+	if (len < INTEL_PT_PSB_LEN)
+		return NULL;
+
+	k = len - INTEL_PT_PSB_LEN + 1;
+	while (1) {
+		p = memrchr(buf, n[0], k);
+		if (!p)
+			return NULL;
+		if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
+			return p;
+		k = p - buf;
+		if (!k)
+			return NULL;
+	}
+}
+
+/**
+ * intel_pt_next_tsc - find and return next TSC.
+ * @buf: buffer
+ * @len: size of buffer
+ * @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
+ *
+ * Find a TSC packet in @buf and return the TSC value.  This function assumes
+ * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
+ * PSBEND packet is found.
+ *
+ * Return: %true if TSC is found, false otherwise.
+ */
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+			      size_t *rem)
+{
+	struct intel_pt_pkt packet;
+	int ret;
+
+	while (len) {
+		ret = intel_pt_get_packet(buf, len, &packet);
+		if (ret <= 0)
+			return false;
+		if (packet.type == INTEL_PT_TSC) {
+			*tsc = packet.payload;
+			*rem = len;
+			return true;
+		}
+		if (packet.type == INTEL_PT_PSBEND)
+			return false;
+		buf += ret;
+		len -= ret;
+	}
+	return false;
+}
+
+/**
+ * intel_pt_tsc_cmp - compare 7-byte TSCs.
+ * @tsc1: first TSC to compare
+ * @tsc2: second TSC to compare
+ *
+ * This function compares 7-byte TSC values allowing for the possibility that
+ * TSC wrapped around.  Generally it is not possible to know if TSC has wrapped
+ * around so for that purpose this function assumes the absolute difference is
+ * less than half the maximum difference.
+ *
+ * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
+ * after @tsc2.
+ */
+static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
+{
+	const uint64_t halfway = (1ULL << 55);
+
+	if (tsc1 == tsc2)
+		return 0;
+
+	if (tsc1 < tsc2) {
+		if (tsc2 - tsc1 < halfway)
+			return -1;
+		else
+			return 1;
+	} else {
+		if (tsc1 - tsc2 < halfway)
+			return 1;
+		else
+			return -1;
+	}
+}
+
+/**
+ * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
+ *                             using TSC.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ *               to buf_a
+ *
+ * If the trace contains TSC we can look at the last TSC of @buf_a and the
+ * first TSC of @buf_b in order to determine if the buffers overlap, and then
+ * walk forward in @buf_b until a later TSC is found.  A precondition is that
+ * @buf_a and @buf_b are positioned at a PSB.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
+						size_t len_a,
+						unsigned char *buf_b,
+						size_t len_b, bool *consecutive)
+{
+	uint64_t tsc_a, tsc_b;
+	unsigned char *p;
+	size_t len, rem_a, rem_b;
+
+	p = intel_pt_last_psb(buf_a, len_a);
+	if (!p)
+		return buf_b; /* No PSB in buf_a => no overlap */
+
+	len = len_a - (p - buf_a);
+	if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
+		/* The last PSB+ in buf_a is incomplete, so go back one more */
+		len_a -= len;
+		p = intel_pt_last_psb(buf_a, len_a);
+		if (!p)
+			return buf_b; /* No full PSB+ => assume no overlap */
+		len = len_a - (p - buf_a);
+		if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
+			return buf_b; /* No TSC in buf_a => assume no overlap */
+	}
+
+	while (1) {
+		/* Ignore PSB+ with no TSC */
+		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+			int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+			/* Same TSC, so buffers are consecutive */
+			if (!cmp && rem_b >= rem_a) {
+				*consecutive = true;
+				return buf_b + len_b - (rem_b - rem_a);
+			}
+			if (cmp < 0)
+				return buf_b; /* tsc_a < tsc_b => no overlap */
+		}
+
+		if (!intel_pt_step_psb(&buf_b, &len_b))
+			return buf_b + len_b; /* No PSB in buf_b => no data */
+	}
+}
+
+/**
+ * intel_pt_find_overlap - determine start of non-overlapped trace data.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ *               to buf_a
+ *
+ * When trace samples or snapshots are recorded there is the possibility that
+ * the data overlaps.  Note that, for the purposes of decoding, data is only
+ * useful if it begins with a PSB packet.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+				     unsigned char *buf_b, size_t len_b,
+				     bool have_tsc, bool *consecutive)
+{
+	unsigned char *found;
+
+	/* Buffer 'b' must start at PSB so throw away everything before that */
+	if (!intel_pt_next_psb(&buf_b, &len_b))
+		return buf_b + len_b; /* No PSB */
+
+	if (!intel_pt_next_psb(&buf_a, &len_a))
+		return buf_b; /* No overlap */
+
+	if (have_tsc) {
+		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+						  consecutive);
+		if (found)
+			return found;
+	}
+
+	/*
+	 * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
+	 * we can ignore the first part of buffer 'a'.
+	 */
+	while (len_b < len_a) {
+		if (!intel_pt_step_psb(&buf_a, &len_a))
+			return buf_b; /* No overlap */
+	}
+
+	/* Now len_b >= len_a */
+	while (1) {
+		/* Potential overlap so check the bytes */
+		found = memmem(buf_a, len_a, buf_b, len_a);
+		if (found) {
+			*consecutive = true;
+			return buf_b + len_a;
+		}
+
+		/* Try again at next PSB in buffer 'a' */
+		if (!intel_pt_step_psb(&buf_a, &len_a))
+			return buf_b; /* No overlap */
+	}
+}
diff --git a/util/intel-pt-decoder/intel-pt-decoder.h b/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 0000000..fc1752d
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,124 @@
+/*
+ * intel_pt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_DECODER_H__
+#define INCLUDE__INTEL_PT_DECODER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "intel-pt-insn-decoder.h"
+
+#define INTEL_PT_IN_TX		(1 << 0)
+#define INTEL_PT_ABORT_TX	(1 << 1)
+#define INTEL_PT_ASYNC		(1 << 2)
+#define INTEL_PT_FUP_IP		(1 << 3)
+
+enum intel_pt_sample_type {
+	INTEL_PT_BRANCH		= 1 << 0,
+	INTEL_PT_INSTRUCTION	= 1 << 1,
+	INTEL_PT_TRANSACTION	= 1 << 2,
+	INTEL_PT_PTW		= 1 << 3,
+	INTEL_PT_MWAIT_OP	= 1 << 4,
+	INTEL_PT_PWR_ENTRY	= 1 << 5,
+	INTEL_PT_EX_STOP	= 1 << 6,
+	INTEL_PT_PWR_EXIT	= 1 << 7,
+	INTEL_PT_CBR_CHG	= 1 << 8,
+};
+
+enum intel_pt_period_type {
+	INTEL_PT_PERIOD_NONE,
+	INTEL_PT_PERIOD_INSTRUCTIONS,
+	INTEL_PT_PERIOD_TICKS,
+	INTEL_PT_PERIOD_MTC,
+};
+
+enum {
+	INTEL_PT_ERR_NOMEM = 1,
+	INTEL_PT_ERR_INTERN,
+	INTEL_PT_ERR_BADPKT,
+	INTEL_PT_ERR_NODATA,
+	INTEL_PT_ERR_NOINSN,
+	INTEL_PT_ERR_MISMAT,
+	INTEL_PT_ERR_OVR,
+	INTEL_PT_ERR_LOST,
+	INTEL_PT_ERR_UNK,
+	INTEL_PT_ERR_NELOOP,
+	INTEL_PT_ERR_MAX,
+};
+
+struct intel_pt_state {
+	enum intel_pt_sample_type type;
+	int err;
+	uint64_t from_ip;
+	uint64_t to_ip;
+	uint64_t cr3;
+	uint64_t tot_insn_cnt;
+	uint64_t timestamp;
+	uint64_t est_timestamp;
+	uint64_t trace_nr;
+	uint64_t ptw_payload;
+	uint64_t mwait_payload;
+	uint64_t pwre_payload;
+	uint64_t pwrx_payload;
+	uint64_t cbr_payload;
+	uint32_t flags;
+	enum intel_pt_insn_op insn_op;
+	int insn_len;
+	char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+struct intel_pt_insn;
+
+struct intel_pt_buffer {
+	const unsigned char *buf;
+	size_t len;
+	bool consecutive;
+	uint64_t ref_timestamp;
+	uint64_t trace_nr;
+};
+
+struct intel_pt_params {
+	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
+	void *data;
+	bool return_compression;
+	bool branch_enable;
+	uint64_t period;
+	enum intel_pt_period_type period_type;
+	unsigned max_non_turbo_ratio;
+	unsigned int mtc_period;
+	uint32_t tsc_ctc_ratio_n;
+	uint32_t tsc_ctc_ratio_d;
+};
+
+struct intel_pt_decoder;
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+				     unsigned char *buf_b, size_t len_b,
+				     bool have_tsc, bool *consecutive);
+
+int intel_pt__strerror(int code, char *buf, size_t buflen);
+
+#endif
diff --git a/util/intel-pt-decoder/intel-pt-insn-decoder.c b/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 0000000..5481882
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,274 @@
+/*
+ * intel_pt_insn_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "event.h"
+
+#include "insn.h"
+
+#include "inat.c"
+#include "insn.c"
+
+#include "intel-pt-insn-decoder.h"
+#include "dump-insn.h"
+
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
+#error Instruction buffer size too small
+#endif
+
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
+static void intel_pt_insn_decoder(struct insn *insn,
+				  struct intel_pt_insn *intel_pt_insn)
+{
+	enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
+	enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
+	int ext;
+
+	intel_pt_insn->rel = 0;
+
+	if (insn_is_avx(insn)) {
+		intel_pt_insn->op = INTEL_PT_OP_OTHER;
+		intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
+		intel_pt_insn->length = insn->length;
+		return;
+	}
+
+	switch (insn->opcode.bytes[0]) {
+	case 0xf:
+		switch (insn->opcode.bytes[1]) {
+		case 0x05: /* syscall */
+		case 0x34: /* sysenter */
+			op = INTEL_PT_OP_SYSCALL;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		case 0x07: /* sysret */
+		case 0x35: /* sysexit */
+			op = INTEL_PT_OP_SYSRET;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		case 0x80 ... 0x8f: /* jcc */
+			op = INTEL_PT_OP_JCC;
+			branch = INTEL_PT_BR_CONDITIONAL;
+			break;
+		default:
+			break;
+		}
+		break;
+	case 0x70 ... 0x7f: /* jcc */
+		op = INTEL_PT_OP_JCC;
+		branch = INTEL_PT_BR_CONDITIONAL;
+		break;
+	case 0xc2: /* near ret */
+	case 0xc3: /* near ret */
+	case 0xca: /* far ret */
+	case 0xcb: /* far ret */
+		op = INTEL_PT_OP_RET;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xcf: /* iret */
+		op = INTEL_PT_OP_IRET;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xcc ... 0xce: /* int */
+		op = INTEL_PT_OP_INT;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xe8: /* call near rel */
+		op = INTEL_PT_OP_CALL;
+		branch = INTEL_PT_BR_UNCONDITIONAL;
+		break;
+	case 0x9a: /* call far absolute */
+		op = INTEL_PT_OP_CALL;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xe0 ... 0xe2: /* loop */
+		op = INTEL_PT_OP_LOOP;
+		branch = INTEL_PT_BR_CONDITIONAL;
+		break;
+	case 0xe3: /* jcc */
+		op = INTEL_PT_OP_JCC;
+		branch = INTEL_PT_BR_CONDITIONAL;
+		break;
+	case 0xe9: /* jmp */
+	case 0xeb: /* jmp */
+		op = INTEL_PT_OP_JMP;
+		branch = INTEL_PT_BR_UNCONDITIONAL;
+		break;
+	case 0xea: /* far jmp */
+		op = INTEL_PT_OP_JMP;
+		branch = INTEL_PT_BR_INDIRECT;
+		break;
+	case 0xff: /* call near absolute, call far absolute ind */
+		ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+		switch (ext) {
+		case 2: /* near ind call */
+		case 3: /* far ind call */
+			op = INTEL_PT_OP_CALL;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		case 4:
+		case 5:
+			op = INTEL_PT_OP_JMP;
+			branch = INTEL_PT_BR_INDIRECT;
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	intel_pt_insn->op = op;
+	intel_pt_insn->branch = branch;
+	intel_pt_insn->length = insn->length;
+
+	if (branch == INTEL_PT_BR_CONDITIONAL ||
+	    branch == INTEL_PT_BR_UNCONDITIONAL) {
+#if __BYTE_ORDER == __BIG_ENDIAN
+		switch (insn->immediate.nbytes) {
+		case 1:
+			intel_pt_insn->rel = insn->immediate.value;
+			break;
+		case 2:
+			intel_pt_insn->rel =
+					bswap_16((short)insn->immediate.value);
+			break;
+		case 4:
+			intel_pt_insn->rel = bswap_32(insn->immediate.value);
+			break;
+		default:
+			intel_pt_insn->rel = 0;
+			break;
+		}
+#else
+		intel_pt_insn->rel = insn->immediate.value;
+#endif
+	}
+}
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+		      struct intel_pt_insn *intel_pt_insn)
+{
+	struct insn insn;
+
+	insn_init(&insn, buf, len, x86_64);
+	insn_get_length(&insn);
+	if (!insn_complete(&insn) || insn.length > len)
+		return -1;
+	intel_pt_insn_decoder(&insn, intel_pt_insn);
+	if (insn.length < INTEL_PT_INSN_BUF_SZ)
+		memcpy(intel_pt_insn->buf, buf, insn.length);
+	else
+		memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ);
+	return 0;
+}
+
+const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
+		      u8 *inbuf, int inlen, int *lenp)
+{
+	struct insn insn;
+	int n, i;
+	int left;
+
+	insn_init(&insn, inbuf, inlen, x->is64bit);
+	insn_get_length(&insn);
+	if (!insn_complete(&insn) || insn.length > inlen)
+		return "<bad>";
+	if (lenp)
+		*lenp = insn.length;
+	left = sizeof(x->out);
+	n = snprintf(x->out, left, "insn: ");
+	left -= n;
+	for (i = 0; i < insn.length; i++) {
+		n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
+		left -= n;
+	}
+	return x->out;
+}
+
+const char *branch_name[] = {
+	[INTEL_PT_OP_OTHER]	= "Other",
+	[INTEL_PT_OP_CALL]	= "Call",
+	[INTEL_PT_OP_RET]	= "Ret",
+	[INTEL_PT_OP_JCC]	= "Jcc",
+	[INTEL_PT_OP_JMP]	= "Jmp",
+	[INTEL_PT_OP_LOOP]	= "Loop",
+	[INTEL_PT_OP_IRET]	= "IRet",
+	[INTEL_PT_OP_INT]	= "Int",
+	[INTEL_PT_OP_SYSCALL]	= "Syscall",
+	[INTEL_PT_OP_SYSRET]	= "Sysret",
+};
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op)
+{
+	return branch_name[op];
+}
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+		       size_t buf_len)
+{
+	switch (intel_pt_insn->branch) {
+	case INTEL_PT_BR_CONDITIONAL:
+	case INTEL_PT_BR_UNCONDITIONAL:
+		return snprintf(buf, buf_len, "%s %s%d",
+				intel_pt_insn_name(intel_pt_insn->op),
+				intel_pt_insn->rel > 0 ? "+" : "",
+				intel_pt_insn->rel);
+	case INTEL_PT_BR_NO_BRANCH:
+	case INTEL_PT_BR_INDIRECT:
+		return snprintf(buf, buf_len, "%s",
+				intel_pt_insn_name(intel_pt_insn->op));
+	default:
+		break;
+	}
+	return 0;
+}
+
+int intel_pt_insn_type(enum intel_pt_insn_op op)
+{
+	switch (op) {
+	case INTEL_PT_OP_OTHER:
+		return 0;
+	case INTEL_PT_OP_CALL:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
+	case INTEL_PT_OP_RET:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
+	case INTEL_PT_OP_JCC:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+	case INTEL_PT_OP_JMP:
+		return PERF_IP_FLAG_BRANCH;
+	case INTEL_PT_OP_LOOP:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+	case INTEL_PT_OP_IRET:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+		       PERF_IP_FLAG_INTERRUPT;
+	case INTEL_PT_OP_INT:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+		       PERF_IP_FLAG_INTERRUPT;
+	case INTEL_PT_OP_SYSCALL:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+		       PERF_IP_FLAG_SYSCALLRET;
+	case INTEL_PT_OP_SYSRET:
+		return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+		       PERF_IP_FLAG_SYSCALLRET;
+	default:
+		return 0;
+	}
+}
diff --git a/util/intel-pt-decoder/intel-pt-insn-decoder.h b/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 0000000..37ec562
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,63 @@
+/*
+ * intel_pt_insn_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
+#define INCLUDE__INTEL_PT_INSN_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_INSN_DESC_MAX		32
+#define INTEL_PT_INSN_BUF_SZ		16
+
+enum intel_pt_insn_op {
+	INTEL_PT_OP_OTHER,
+	INTEL_PT_OP_CALL,
+	INTEL_PT_OP_RET,
+	INTEL_PT_OP_JCC,
+	INTEL_PT_OP_JMP,
+	INTEL_PT_OP_LOOP,
+	INTEL_PT_OP_IRET,
+	INTEL_PT_OP_INT,
+	INTEL_PT_OP_SYSCALL,
+	INTEL_PT_OP_SYSRET,
+};
+
+enum intel_pt_insn_branch {
+	INTEL_PT_BR_NO_BRANCH,
+	INTEL_PT_BR_INDIRECT,
+	INTEL_PT_BR_CONDITIONAL,
+	INTEL_PT_BR_UNCONDITIONAL,
+};
+
+struct intel_pt_insn {
+	enum intel_pt_insn_op		op;
+	enum intel_pt_insn_branch	branch;
+	int				length;
+	int32_t				rel;
+	unsigned char			buf[INTEL_PT_INSN_BUF_SZ];
+};
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+		      struct intel_pt_insn *intel_pt_insn);
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op);
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+		       size_t buf_len);
+
+int intel_pt_insn_type(enum intel_pt_insn_op op);
+
+#endif
diff --git a/util/intel-pt-decoder/intel-pt-log.c b/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 0000000..e02bc7b
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,156 @@
+/*
+ * intel_pt_log.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "intel-pt-log.h"
+#include "intel-pt-insn-decoder.h"
+
+#include "intel-pt-pkt-decoder.h"
+
+#define MAX_LOG_NAME 256
+
+static FILE *f;
+static char log_name[MAX_LOG_NAME];
+bool intel_pt_enable_logging;
+
+void intel_pt_log_enable(void)
+{
+	intel_pt_enable_logging = true;
+}
+
+void intel_pt_log_disable(void)
+{
+	if (f)
+		fflush(f);
+	intel_pt_enable_logging = false;
+}
+
+void intel_pt_log_set_name(const char *name)
+{
+	strncpy(log_name, name, MAX_LOG_NAME - 5);
+	strcat(log_name, ".log");
+}
+
+static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
+				int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		fprintf(f, " ");
+
+	fprintf(f, "  %08" PRIx64 ": ", pos);
+	for (i = 0; i < len; i++)
+		fprintf(f, " %02x", buf[i]);
+	for (; i < 16; i++)
+		fprintf(f, "   ");
+	fprintf(f, " ");
+}
+
+static void intel_pt_print_no_data(uint64_t pos, int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		fprintf(f, " ");
+
+	fprintf(f, "  %08" PRIx64 ": ", pos);
+	for (i = 0; i < 16; i++)
+		fprintf(f, "   ");
+	fprintf(f, " ");
+}
+
+static int intel_pt_log_open(void)
+{
+	if (!intel_pt_enable_logging)
+		return -1;
+
+	if (f)
+		return 0;
+
+	if (!log_name[0])
+		return -1;
+
+	f = fopen(log_name, "w+");
+	if (!f) {
+		intel_pt_enable_logging = false;
+		return -1;
+	}
+
+	return 0;
+}
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+			   uint64_t pos, const unsigned char *buf)
+{
+	char desc[INTEL_PT_PKT_DESC_MAX];
+
+	if (intel_pt_log_open())
+		return;
+
+	intel_pt_print_data(buf, pkt_len, pos, 0);
+	intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+	fprintf(f, "%s\n", desc);
+}
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+	char desc[INTEL_PT_INSN_DESC_MAX];
+	size_t len = intel_pt_insn->length;
+
+	if (intel_pt_log_open())
+		return;
+
+	if (len > INTEL_PT_INSN_BUF_SZ)
+		len = INTEL_PT_INSN_BUF_SZ;
+	intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
+	if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+		fprintf(f, "%s\n", desc);
+	else
+		fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+				 uint64_t ip)
+{
+	char desc[INTEL_PT_INSN_DESC_MAX];
+
+	if (intel_pt_log_open())
+		return;
+
+	intel_pt_print_no_data(ip, 8);
+	if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+		fprintf(f, "%s\n", desc);
+	else
+		fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log(const char *fmt, ...)
+{
+	va_list args;
+
+	if (intel_pt_log_open())
+		return;
+
+	va_start(args, fmt);
+	vfprintf(f, fmt, args);
+	va_end(args);
+}
diff --git a/util/intel-pt-decoder/intel-pt-log.h b/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 0000000..45b64f9
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,78 @@
+/*
+ * intel_pt_log.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_LOG_H__
+#define INCLUDE__INTEL_PT_LOG_H__
+
+#include <linux/compiler.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+struct intel_pt_pkt;
+
+void intel_pt_log_enable(void);
+void intel_pt_log_disable(void);
+void intel_pt_log_set_name(const char *name);
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+			   uint64_t pos, const unsigned char *buf);
+
+struct intel_pt_insn;
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+				 uint64_t ip);
+
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
+
+#define intel_pt_log(fmt, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log(fmt, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_packet(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_packet(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_insn(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_insn(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define intel_pt_log_insn_no_data(arg, ...) \
+	do { \
+		if (intel_pt_enable_logging) \
+			__intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
+	} while (0)
+
+#define x64_fmt "0x%" PRIx64
+
+extern bool intel_pt_enable_logging;
+
+static inline void intel_pt_log_at(const char *msg, uint64_t u)
+{
+	intel_pt_log("%s at " x64_fmt "\n", msg, u);
+}
+
+static inline void intel_pt_log_to(const char *msg, uint64_t u)
+{
+	intel_pt_log("%s to " x64_fmt "\n", msg, u);
+}
+
+#endif
diff --git a/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 0000000..ba4c9dd
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,638 @@
+/*
+ * intel_pt_pkt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/compiler.h>
+
+#include "intel-pt-pkt-decoder.h"
+
+#define BIT(n)		(1 << (n))
+
+#define BIT63		((uint64_t)1 << 63)
+
+#define NR_FLAG		BIT63
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+	memcpy((d), (s), (n));    \
+	*(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const packet_name[] = {
+	[INTEL_PT_BAD]		= "Bad Packet!",
+	[INTEL_PT_PAD]		= "PAD",
+	[INTEL_PT_TNT]		= "TNT",
+	[INTEL_PT_TIP_PGD]	= "TIP.PGD",
+	[INTEL_PT_TIP_PGE]	= "TIP.PGE",
+	[INTEL_PT_TSC]		= "TSC",
+	[INTEL_PT_TMA]		= "TMA",
+	[INTEL_PT_MODE_EXEC]	= "MODE.Exec",
+	[INTEL_PT_MODE_TSX]	= "MODE.TSX",
+	[INTEL_PT_MTC]		= "MTC",
+	[INTEL_PT_TIP]		= "TIP",
+	[INTEL_PT_FUP]		= "FUP",
+	[INTEL_PT_CYC]		= "CYC",
+	[INTEL_PT_VMCS]		= "VMCS",
+	[INTEL_PT_PSB]		= "PSB",
+	[INTEL_PT_PSBEND]	= "PSBEND",
+	[INTEL_PT_CBR]		= "CBR",
+	[INTEL_PT_TRACESTOP]	= "TraceSTOP",
+	[INTEL_PT_PIP]		= "PIP",
+	[INTEL_PT_OVF]		= "OVF",
+	[INTEL_PT_MNT]		= "MNT",
+	[INTEL_PT_PTWRITE]	= "PTWRITE",
+	[INTEL_PT_PTWRITE_IP]	= "PTWRITE",
+	[INTEL_PT_EXSTOP]	= "EXSTOP",
+	[INTEL_PT_EXSTOP_IP]	= "EXSTOP",
+	[INTEL_PT_MWAIT]	= "MWAIT",
+	[INTEL_PT_PWRE]		= "PWRE",
+	[INTEL_PT_PWRX]		= "PWRX",
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+{
+	return packet_name[type];
+}
+
+static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
+				 struct intel_pt_pkt *packet)
+{
+	uint64_t payload;
+	int count;
+
+	if (len < 8)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	payload = le64_to_cpu(*(uint64_t *)buf);
+
+	for (count = 47; count; count--) {
+		if (payload & BIT63)
+			break;
+		payload <<= 1;
+	}
+
+	packet->type = INTEL_PT_TNT;
+	packet->count = count;
+	packet->payload = payload << 1;
+	return 8;
+}
+
+static int intel_pt_get_pip(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	uint64_t payload = 0;
+
+	if (len < 8)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	packet->type = INTEL_PT_PIP;
+	memcpy_le64(&payload, buf + 2, 6);
+	packet->payload = payload >> 1;
+	if (payload & 1)
+		packet->payload |= NR_FLAG;
+
+	return 8;
+}
+
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_TRACESTOP;
+	return 2;
+}
+
+static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_CBR;
+	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
+	return 4;
+}
+
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	unsigned int count = (52 - 5) >> 3;
+
+	if (count < 1 || count > 7)
+		return INTEL_PT_BAD_PACKET;
+
+	if (len < count + 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	packet->type = INTEL_PT_VMCS;
+	packet->count = count;
+	memcpy_le64(&packet->payload, buf + 2, count);
+
+	return count + 2;
+}
+
+static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_OVF;
+	return 2;
+}
+
+static int intel_pt_get_psb(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	int i;
+
+	if (len < 16)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	for (i = 2; i < 16; i += 2) {
+		if (buf[i] != 2 || buf[i + 1] != 0x82)
+			return INTEL_PT_BAD_PACKET;
+	}
+
+	packet->type = INTEL_PT_PSB;
+	return 16;
+}
+
+static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_PSBEND;
+	return 2;
+}
+
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 7)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	packet->type = INTEL_PT_TMA;
+	packet->payload = buf[2] | (buf[3] << 8);
+	packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+	return 7;
+}
+
+static int intel_pt_get_pad(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_PAD;
+	return 1;
+}
+
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 11)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MNT;
+	memcpy_le64(&packet->payload, buf + 3, 8);
+	return 11
+;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 3)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	switch (buf[2]) {
+	case 0x88: /* MNT */
+		return intel_pt_get_mnt(buf, len, packet);
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+				struct intel_pt_pkt *packet)
+{
+	packet->count = (buf[1] >> 5) & 0x3;
+	packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+					 INTEL_PT_PTWRITE;
+
+	switch (packet->count) {
+	case 0:
+		if (len < 6)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+		return 6;
+	case 1:
+		if (len < 10)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+		return 10;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP;
+	return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP_IP;
+	return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 10)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MWAIT;
+	packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+	return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRE;
+	memcpy_le64(&packet->payload, buf + 2, 2);
+	return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 7)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRX;
+	memcpy_le64(&packet->payload, buf + 2, 5);
+	return 7;
+}
+
+static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	if ((buf[1] & 0x1f) == 0x12)
+		return intel_pt_get_ptwrite(buf, len, packet);
+
+	switch (buf[1]) {
+	case 0xa3: /* Long TNT */
+		return intel_pt_get_long_tnt(buf, len, packet);
+	case 0x43: /* PIP */
+		return intel_pt_get_pip(buf, len, packet);
+	case 0x83: /* TraceStop */
+		return intel_pt_get_tracestop(packet);
+	case 0x03: /* CBR */
+		return intel_pt_get_cbr(buf, len, packet);
+	case 0xc8: /* VMCS */
+		return intel_pt_get_vmcs(buf, len, packet);
+	case 0xf3: /* OVF */
+		return intel_pt_get_ovf(packet);
+	case 0x82: /* PSB */
+		return intel_pt_get_psb(buf, len, packet);
+	case 0x23: /* PSBEND */
+		return intel_pt_get_psbend(packet);
+	case 0x73: /* TMA */
+		return intel_pt_get_tma(buf, len, packet);
+	case 0xC3: /* 3-byte header */
+		return intel_pt_get_3byte(buf, len, packet);
+	case 0x62: /* EXSTOP no IP */
+		return intel_pt_get_exstop(packet);
+	case 0xE2: /* EXSTOP with IP */
+		return intel_pt_get_exstop_ip(packet);
+	case 0xC2: /* MWAIT */
+		return intel_pt_get_mwait(buf, len, packet);
+	case 0x22: /* PWRE */
+		return intel_pt_get_pwre(buf, len, packet);
+	case 0xA2: /* PWRX */
+		return intel_pt_get_pwrx(buf, len, packet);
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_short_tnt(unsigned int byte,
+				  struct intel_pt_pkt *packet)
+{
+	int count;
+
+	for (count = 6; count; count--) {
+		if (byte & BIT(7))
+			break;
+		byte <<= 1;
+	}
+
+	packet->type = INTEL_PT_TNT;
+	packet->count = count;
+	packet->payload = (uint64_t)byte << 57;
+
+	return 1;
+}
+
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+			    size_t len, struct intel_pt_pkt *packet)
+{
+	unsigned int offs = 1, shift;
+	uint64_t payload = byte >> 3;
+
+	byte >>= 2;
+	len -= 1;
+	for (shift = 5; byte & 1; shift += 7) {
+		if (offs > 9)
+			return INTEL_PT_BAD_PACKET;
+		if (len < offs)
+			return INTEL_PT_NEED_MORE_BYTES;
+		byte = buf[offs++];
+		payload |= (byte >> 1) << shift;
+	}
+
+	packet->type = INTEL_PT_CYC;
+	packet->payload = payload;
+	return offs;
+}
+
+static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
+			   const unsigned char *buf, size_t len,
+			   struct intel_pt_pkt *packet)
+{
+	int ip_len;
+
+	packet->count = byte >> 5;
+
+	switch (packet->count) {
+	case 0:
+		ip_len = 0;
+		break;
+	case 1:
+		if (len < 3)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 2;
+		packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+		break;
+	case 2:
+		if (len < 5)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 4;
+		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
+		break;
+	case 3:
+	case 4:
+		if (len < 7)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 6;
+		memcpy_le64(&packet->payload, buf + 1, 6);
+		break;
+	case 6:
+		if (len < 9)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 8;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+		break;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+
+	packet->type = type;
+
+	return ip_len + 1;
+}
+
+static int intel_pt_get_mode(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	switch (buf[1] >> 5) {
+	case 0:
+		packet->type = INTEL_PT_MODE_EXEC;
+		switch (buf[1] & 3) {
+		case 0:
+			packet->payload = 16;
+			break;
+		case 1:
+			packet->payload = 64;
+			break;
+		case 2:
+			packet->payload = 32;
+			break;
+		default:
+			return INTEL_PT_BAD_PACKET;
+		}
+		break;
+	case 1:
+		packet->type = INTEL_PT_MODE_TSX;
+		if ((buf[1] & 3) == 3)
+			return INTEL_PT_BAD_PACKET;
+		packet->payload = buf[1] & 3;
+		break;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+
+	return 2;
+}
+
+static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 8)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_TSC;
+	memcpy_le64(&packet->payload, buf + 1, 7);
+	return 8;
+}
+
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+			    struct intel_pt_pkt *packet)
+{
+	if (len < 2)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MTC;
+	packet->payload = buf[1];
+	return 2;
+}
+
+static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
+				  struct intel_pt_pkt *packet)
+{
+	unsigned int byte;
+
+	memset(packet, 0, sizeof(struct intel_pt_pkt));
+
+	if (!len)
+		return INTEL_PT_NEED_MORE_BYTES;
+
+	byte = buf[0];
+	if (!(byte & BIT(0))) {
+		if (byte == 0)
+			return intel_pt_get_pad(packet);
+		if (byte == 2)
+			return intel_pt_get_ext(buf, len, packet);
+		return intel_pt_get_short_tnt(byte, packet);
+	}
+
+	if ((byte & 2))
+		return intel_pt_get_cyc(byte, buf, len, packet);
+
+	switch (byte & 0x1f) {
+	case 0x0D:
+		return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
+	case 0x11:
+		return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
+				       packet);
+	case 0x01:
+		return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
+				       packet);
+	case 0x1D:
+		return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
+	case 0x19:
+		switch (byte) {
+		case 0x99:
+			return intel_pt_get_mode(buf, len, packet);
+		case 0x19:
+			return intel_pt_get_tsc(buf, len, packet);
+		case 0x59:
+			return intel_pt_get_mtc(buf, len, packet);
+		default:
+			return INTEL_PT_BAD_PACKET;
+		}
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+			struct intel_pt_pkt *packet)
+{
+	int ret;
+
+	ret = intel_pt_do_get_packet(buf, len, packet);
+	if (ret > 0) {
+		while (ret < 8 && len > (size_t)ret && !buf[ret])
+			ret += 1;
+	}
+	return ret;
+}
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+		      size_t buf_len)
+{
+	int ret, i, nr;
+	unsigned long long payload = packet->payload;
+	const char *name = intel_pt_pkt_name(packet->type);
+
+	switch (packet->type) {
+	case INTEL_PT_BAD:
+	case INTEL_PT_PAD:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_OVF:
+		return snprintf(buf, buf_len, "%s", name);
+	case INTEL_PT_TNT: {
+		size_t blen = buf_len;
+
+		ret = snprintf(buf, blen, "%s ", name);
+		if (ret < 0)
+			return ret;
+		buf += ret;
+		blen -= ret;
+		for (i = 0; i < packet->count; i++) {
+			if (payload & BIT63)
+				ret = snprintf(buf, blen, "T");
+			else
+				ret = snprintf(buf, blen, "N");
+			if (ret < 0)
+				return ret;
+			buf += ret;
+			blen -= ret;
+			payload <<= 1;
+		}
+		ret = snprintf(buf, blen, " (%d)", packet->count);
+		if (ret < 0)
+			return ret;
+		blen -= ret;
+		return buf_len - blen;
+	}
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TIP:
+	case INTEL_PT_FUP:
+		if (!(packet->count))
+			return snprintf(buf, buf_len, "%s no ip", name);
+		__fallthrough;
+	case INTEL_PT_CYC:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_MTC:
+	case INTEL_PT_MNT:
+	case INTEL_PT_CBR:
+	case INTEL_PT_TSC:
+		return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+	case INTEL_PT_TMA:
+		return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+				(unsigned)payload, packet->count);
+	case INTEL_PT_MODE_EXEC:
+		return snprintf(buf, buf_len, "%s %lld", name, payload);
+	case INTEL_PT_MODE_TSX:
+		return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
+				name, (unsigned)(payload >> 1) & 1,
+				(unsigned)payload & 1);
+	case INTEL_PT_PIP:
+		nr = packet->payload & NR_FLAG ? 1 : 0;
+		payload &= ~NR_FLAG;
+		ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+			       name, payload, nr);
+		return ret;
+	case INTEL_PT_PTWRITE:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+	case INTEL_PT_PTWRITE_IP:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+	case INTEL_PT_EXSTOP:
+		return snprintf(buf, buf_len, "%s IP:0", name);
+	case INTEL_PT_EXSTOP_IP:
+		return snprintf(buf, buf_len, "%s IP:1", name);
+	case INTEL_PT_MWAIT:
+		return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+				name, payload, (unsigned int)(payload & 0xff),
+				(unsigned int)((payload >> 32) & 0x3));
+	case INTEL_PT_PWRE:
+		return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+				name, payload, !!(payload & 0x80),
+				(unsigned int)((payload >> 12) & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
+	case INTEL_PT_PWRX:
+		return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+				name, payload,
+				(unsigned int)((payload >> 4) & 0xf),
+				(unsigned int)(payload & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
+	default:
+		break;
+	}
+	return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+			name, payload, packet->count);
+}
diff --git a/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 0000000..73ddc3a
--- /dev/null
+++ b/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,77 @@
+/*
+ * intel_pt_pkt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
+#define INCLUDE__INTEL_PT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_PKT_DESC_MAX	256
+
+#define INTEL_PT_NEED_MORE_BYTES	-1
+#define INTEL_PT_BAD_PACKET		-2
+
+#define INTEL_PT_PSB_STR		"\002\202\002\202\002\202\002\202" \
+					"\002\202\002\202\002\202\002\202"
+#define INTEL_PT_PSB_LEN		16
+
+#define INTEL_PT_PKT_MAX_SZ		16
+
+enum intel_pt_pkt_type {
+	INTEL_PT_BAD,
+	INTEL_PT_PAD,
+	INTEL_PT_TNT,
+	INTEL_PT_TIP_PGD,
+	INTEL_PT_TIP_PGE,
+	INTEL_PT_TSC,
+	INTEL_PT_TMA,
+	INTEL_PT_MODE_EXEC,
+	INTEL_PT_MODE_TSX,
+	INTEL_PT_MTC,
+	INTEL_PT_TIP,
+	INTEL_PT_FUP,
+	INTEL_PT_CYC,
+	INTEL_PT_VMCS,
+	INTEL_PT_PSB,
+	INTEL_PT_PSBEND,
+	INTEL_PT_CBR,
+	INTEL_PT_TRACESTOP,
+	INTEL_PT_PIP,
+	INTEL_PT_OVF,
+	INTEL_PT_MNT,
+	INTEL_PT_PTWRITE,
+	INTEL_PT_PTWRITE_IP,
+	INTEL_PT_EXSTOP,
+	INTEL_PT_EXSTOP_IP,
+	INTEL_PT_MWAIT,
+	INTEL_PT_PWRE,
+	INTEL_PT_PWRX,
+};
+
+struct intel_pt_pkt {
+	enum intel_pt_pkt_type	type;
+	int			count;
+	uint64_t		payload;
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+			struct intel_pt_pkt *packet);
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
+
+#endif
diff --git a/util/intel-pt-decoder/x86-opcode-map.txt b/util/intel-pt-decoder/x86-opcode-map.txt
new file mode 100644
index 0000000..e0b8593
--- /dev/null
+++ b/util/intel-pt-decoder/x86-opcode-map.txt
@@ -0,0 +1,1072 @@
+# x86 Opcode Maps
+#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+#   (#326018-047US, June 2013)
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+#  (ev): this opcode requires EVEX prefix.
+#  (evo): this opcode is changed by EVEX prefix (EVEX opcode)
+#  (v): this opcode requires VEX prefix.
+#  (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+#  - (66): the last prefix is 0x66
+#  - (F3): the last prefix is 0xF3
+#  - (F2): the last prefix is 0xF2
+#  - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+#  - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+18: Grp16 (1A)
+19:
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd  Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd  Vsd,Wsd (66),(v1)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
+# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JA/JNBE Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xf0-0xff
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+ff: UD0
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1f: vpabsq Vx,Wx (66),(ev)
+# 0x0f 0x38 0x20-0x2f
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+# 0x0f 0x38 0x30-0x3f
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0x38 0x40-0x8f
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+80: INVEPT Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+# 0x0f 0x38 0x90-0xbf (FMA)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+# 0x0f 0x38 0xc0-0xff
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+cc: sha1rnds4 Vdq,Wdq,Ib
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1: TEST Eb,Ib
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Mp
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
+EndTable
+
+GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
+EndTable
+
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp15
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+4: XSAVE | ptwrite Ey (F3),(11B)
+5: XRSTOR | lfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
diff --git a/util/intel-pt.c b/util/intel-pt.c
new file mode 100644
index 0000000..0effaff
--- /dev/null
+++ b/util/intel-pt.c
@@ -0,0 +1,2622 @@
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "session.h"
+#include "machine.h"
+#include "memswap.h"
+#include "sort.h"
+#include "tool.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "map.h"
+#include "color.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "symbol.h"
+#include "callchain.h"
+#include "dso.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "tsc.h"
+#include "intel-pt.h"
+#include "config.h"
+
+#include "intel-pt-decoder/intel-pt-log.h"
+#include "intel-pt-decoder/intel-pt-decoder.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+struct intel_pt {
+	struct auxtrace auxtrace;
+	struct auxtrace_queues queues;
+	struct auxtrace_heap heap;
+	u32 auxtrace_type;
+	struct perf_session *session;
+	struct machine *machine;
+	struct perf_evsel *switch_evsel;
+	struct thread *unknown_thread;
+	bool timeless_decoding;
+	bool sampling_mode;
+	bool snapshot_mode;
+	bool per_cpu_mmaps;
+	bool have_tsc;
+	bool data_queued;
+	bool est_tsc;
+	bool sync_switch;
+	bool mispred_all;
+	int have_sched_switch;
+	u32 pmu_type;
+	u64 kernel_start;
+	u64 switch_ip;
+	u64 ptss_ip;
+
+	struct perf_tsc_conversion tc;
+	bool cap_user_time_zero;
+
+	struct itrace_synth_opts synth_opts;
+
+	bool sample_instructions;
+	u64 instructions_sample_type;
+	u64 instructions_id;
+
+	bool sample_branches;
+	u32 branches_filter;
+	u64 branches_sample_type;
+	u64 branches_id;
+
+	bool sample_transactions;
+	u64 transactions_sample_type;
+	u64 transactions_id;
+
+	bool sample_ptwrites;
+	u64 ptwrites_sample_type;
+	u64 ptwrites_id;
+
+	bool sample_pwr_events;
+	u64 pwr_events_sample_type;
+	u64 mwait_id;
+	u64 pwre_id;
+	u64 exstop_id;
+	u64 pwrx_id;
+	u64 cbr_id;
+
+	u64 tsc_bit;
+	u64 mtc_bit;
+	u64 mtc_freq_bits;
+	u32 tsc_ctc_ratio_n;
+	u32 tsc_ctc_ratio_d;
+	u64 cyc_bit;
+	u64 noretcomp_bit;
+	unsigned max_non_turbo_ratio;
+	unsigned cbr2khz;
+
+	unsigned long num_events;
+
+	char *filter;
+	struct addr_filters filts;
+};
+
+enum switch_state {
+	INTEL_PT_SS_NOT_TRACING,
+	INTEL_PT_SS_UNKNOWN,
+	INTEL_PT_SS_TRACING,
+	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
+	INTEL_PT_SS_EXPECTING_SWITCH_IP,
+};
+
+struct intel_pt_queue {
+	struct intel_pt *pt;
+	unsigned int queue_nr;
+	struct auxtrace_buffer *buffer;
+	struct auxtrace_buffer *old_buffer;
+	void *decoder;
+	const struct intel_pt_state *state;
+	struct ip_callchain *chain;
+	struct branch_stack *last_branch;
+	struct branch_stack *last_branch_rb;
+	size_t last_branch_pos;
+	union perf_event *event_buf;
+	bool on_heap;
+	bool stop;
+	bool step_through_buffers;
+	bool use_buffer_pid_tid;
+	bool sync_switch;
+	pid_t pid, tid;
+	int cpu;
+	int switch_state;
+	pid_t next_tid;
+	struct thread *thread;
+	bool exclude_kernel;
+	bool have_sample;
+	u64 time;
+	u64 timestamp;
+	u32 flags;
+	u16 insn_len;
+	u64 last_insn_cnt;
+	char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
+			  unsigned char *buf, size_t len)
+{
+	struct intel_pt_pkt packet;
+	size_t pos = 0;
+	int ret, pkt_len, i;
+	char desc[INTEL_PT_PKT_DESC_MAX];
+	const char *color = PERF_COLOR_BLUE;
+
+	color_fprintf(stdout, color,
+		      ". ... Intel Processor Trace data: size %zu bytes\n",
+		      len);
+
+	while (len) {
+		ret = intel_pt_get_packet(buf, len, &packet);
+		if (ret > 0)
+			pkt_len = ret;
+		else
+			pkt_len = 1;
+		printf(".");
+		color_fprintf(stdout, color, "  %08x: ", pos);
+		for (i = 0; i < pkt_len; i++)
+			color_fprintf(stdout, color, " %02x", buf[i]);
+		for (; i < 16; i++)
+			color_fprintf(stdout, color, "   ");
+		if (ret > 0) {
+			ret = intel_pt_pkt_desc(&packet, desc,
+						INTEL_PT_PKT_DESC_MAX);
+			if (ret > 0)
+				color_fprintf(stdout, color, " %s\n", desc);
+		} else {
+			color_fprintf(stdout, color, " Bad packet!\n");
+		}
+		pos += pkt_len;
+		buf += pkt_len;
+		len -= pkt_len;
+	}
+}
+
+static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
+				size_t len)
+{
+	printf(".\n");
+	intel_pt_dump(pt, buf, len);
+}
+
+static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
+				   struct auxtrace_buffer *b)
+{
+	bool consecutive = false;
+	void *start;
+
+	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
+				      pt->have_tsc, &consecutive);
+	if (!start)
+		return -EINVAL;
+	b->use_size = b->data + b->size - start;
+	b->use_data = start;
+	if (b->use_size && consecutive)
+		b->consecutive = true;
+	return 0;
+}
+
+/* This function assumes data is processed sequentially only */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct auxtrace_buffer *buffer = ptq->buffer;
+	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+	struct auxtrace_queue *queue;
+	bool might_overlap;
+
+	if (ptq->stop) {
+		b->len = 0;
+		return 0;
+	}
+
+	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+	buffer = auxtrace_buffer__next(queue, buffer);
+	if (!buffer) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		b->len = 0;
+		return 0;
+	}
+
+	ptq->buffer = buffer;
+
+	if (!buffer->data) {
+		int fd = perf_data__fd(ptq->pt->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data)
+			return -ENOMEM;
+	}
+
+	might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
+	if (might_overlap && !buffer->consecutive && old_buffer &&
+	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
+		return -ENOMEM;
+
+	if (buffer->use_data) {
+		b->len = buffer->use_size;
+		b->buf = buffer->use_data;
+	} else {
+		b->len = buffer->size;
+		b->buf = buffer->data;
+	}
+	b->ref_timestamp = buffer->reference;
+
+	if (!old_buffer || (might_overlap && !buffer->consecutive)) {
+		b->consecutive = false;
+		b->trace_nr = buffer->buffer_nr + 1;
+	} else {
+		b->consecutive = true;
+	}
+
+	if (ptq->step_through_buffers)
+		ptq->stop = true;
+
+	if (b->len) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		ptq->old_buffer = buffer;
+	} else {
+		auxtrace_buffer__drop_data(buffer);
+		return intel_pt_get_trace(b, data);
+	}
+
+	return 0;
+}
+
+struct intel_pt_cache_entry {
+	struct auxtrace_cache_entry	entry;
+	u64				insn_cnt;
+	u64				byte_cnt;
+	enum intel_pt_insn_op		op;
+	enum intel_pt_insn_branch	branch;
+	int				length;
+	int32_t				rel;
+	char				insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+static int intel_pt_config_div(const char *var, const char *value, void *data)
+{
+	int *d = data;
+	long val;
+
+	if (!strcmp(var, "intel-pt.cache-divisor")) {
+		val = strtol(value, NULL, 0);
+		if (val > 0 && val <= INT_MAX)
+			*d = val;
+	}
+
+	return 0;
+}
+
+static int intel_pt_cache_divisor(void)
+{
+	static int d;
+
+	if (d)
+		return d;
+
+	perf_config(intel_pt_config_div, &d);
+
+	if (!d)
+		d = 64;
+
+	return d;
+}
+
+static unsigned int intel_pt_cache_size(struct dso *dso,
+					struct machine *machine)
+{
+	off_t size;
+
+	size = dso__data_size(dso, machine);
+	size /= intel_pt_cache_divisor();
+	if (size < 1000)
+		return 10;
+	if (size > (1 << 21))
+		return 21;
+	return 32 - __builtin_clz(size);
+}
+
+static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
+					     struct machine *machine)
+{
+	struct auxtrace_cache *c;
+	unsigned int bits;
+
+	if (dso->auxtrace_cache)
+		return dso->auxtrace_cache;
+
+	bits = intel_pt_cache_size(dso, machine);
+
+	/* Ignoring cache creation failure */
+	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
+
+	dso->auxtrace_cache = c;
+
+	return c;
+}
+
+static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
+			      u64 offset, u64 insn_cnt, u64 byte_cnt,
+			      struct intel_pt_insn *intel_pt_insn)
+{
+	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+	struct intel_pt_cache_entry *e;
+	int err;
+
+	if (!c)
+		return -ENOMEM;
+
+	e = auxtrace_cache__alloc_entry(c);
+	if (!e)
+		return -ENOMEM;
+
+	e->insn_cnt = insn_cnt;
+	e->byte_cnt = byte_cnt;
+	e->op = intel_pt_insn->op;
+	e->branch = intel_pt_insn->branch;
+	e->length = intel_pt_insn->length;
+	e->rel = intel_pt_insn->rel;
+	memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
+
+	err = auxtrace_cache__add(c, offset, &e->entry);
+	if (err)
+		auxtrace_cache__free_entry(c, e);
+
+	return err;
+}
+
+static struct intel_pt_cache_entry *
+intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
+{
+	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+	if (!c)
+		return NULL;
+
+	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
+}
+
+static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+				   uint64_t *insn_cnt_ptr, uint64_t *ip,
+				   uint64_t to_ip, uint64_t max_insn_cnt,
+				   void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct machine *machine = ptq->pt->machine;
+	struct thread *thread;
+	struct addr_location al;
+	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+	ssize_t len;
+	int x86_64;
+	u8 cpumode;
+	u64 offset, start_offset, start_ip;
+	u64 insn_cnt = 0;
+	bool one_map = true;
+
+	intel_pt_insn->length = 0;
+
+	if (to_ip && *ip == to_ip)
+		goto out_no_cache;
+
+	if (*ip >= ptq->pt->kernel_start)
+		cpumode = PERF_RECORD_MISC_KERNEL;
+	else
+		cpumode = PERF_RECORD_MISC_USER;
+
+	thread = ptq->thread;
+	if (!thread) {
+		if (cpumode != PERF_RECORD_MISC_KERNEL)
+			return -EINVAL;
+		thread = ptq->pt->unknown_thread;
+	}
+
+	while (1) {
+		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
+		if (!al.map || !al.map->dso)
+			return -EINVAL;
+
+		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+		    dso__data_status_seen(al.map->dso,
+					  DSO_DATA_STATUS_SEEN_ITRACE))
+			return -ENOENT;
+
+		offset = al.map->map_ip(al.map, *ip);
+
+		if (!to_ip && one_map) {
+			struct intel_pt_cache_entry *e;
+
+			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+			if (e &&
+			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
+				*insn_cnt_ptr = e->insn_cnt;
+				*ip += e->byte_cnt;
+				intel_pt_insn->op = e->op;
+				intel_pt_insn->branch = e->branch;
+				intel_pt_insn->length = e->length;
+				intel_pt_insn->rel = e->rel;
+				memcpy(intel_pt_insn->buf, e->insn,
+				       INTEL_PT_INSN_BUF_SZ);
+				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
+				return 0;
+			}
+		}
+
+		start_offset = offset;
+		start_ip = *ip;
+
+		/* Load maps to ensure dso->is_64_bit has been updated */
+		map__load(al.map);
+
+		x86_64 = al.map->dso->is_64_bit;
+
+		while (1) {
+			len = dso__data_read_offset(al.map->dso, machine,
+						    offset, buf,
+						    INTEL_PT_INSN_BUF_SZ);
+			if (len <= 0)
+				return -EINVAL;
+
+			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
+				return -EINVAL;
+
+			intel_pt_log_insn(intel_pt_insn, *ip);
+
+			insn_cnt += 1;
+
+			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
+				goto out;
+
+			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+				goto out_no_cache;
+
+			*ip += intel_pt_insn->length;
+
+			if (to_ip && *ip == to_ip)
+				goto out_no_cache;
+
+			if (*ip >= al.map->end)
+				break;
+
+			offset += intel_pt_insn->length;
+		}
+		one_map = false;
+	}
+out:
+	*insn_cnt_ptr = insn_cnt;
+
+	if (!one_map)
+		goto out_no_cache;
+
+	/*
+	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
+	 * entries.
+	 */
+	if (to_ip) {
+		struct intel_pt_cache_entry *e;
+
+		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
+		if (e)
+			return 0;
+	}
+
+	/* Ignore cache errors */
+	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
+			   *ip - start_ip, intel_pt_insn);
+
+	return 0;
+
+out_no_cache:
+	*insn_cnt_ptr = insn_cnt;
+	return 0;
+}
+
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+				  uint64_t offset, const char *filename)
+{
+	struct addr_filter *filt;
+	bool have_filter   = false;
+	bool hit_tracestop = false;
+	bool hit_filter    = false;
+
+	list_for_each_entry(filt, &pt->filts.head, list) {
+		if (filt->start)
+			have_filter = true;
+
+		if ((filename && !filt->filename) ||
+		    (!filename && filt->filename) ||
+		    (filename && strcmp(filename, filt->filename)))
+			continue;
+
+		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+			continue;
+
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+			     ip, offset, filename ? filename : "[kernel]",
+			     filt->start ? "filter" : "stop",
+			     filt->addr, filt->size);
+
+		if (filt->start)
+			hit_filter = true;
+		else
+			hit_tracestop = true;
+	}
+
+	if (!hit_tracestop && !hit_filter)
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+			     ip, offset, filename ? filename : "[kernel]");
+
+	return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct thread *thread;
+	struct addr_location al;
+	u8 cpumode;
+	u64 offset;
+
+	if (ip >= ptq->pt->kernel_start)
+		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+
+	cpumode = PERF_RECORD_MISC_USER;
+
+	thread = ptq->thread;
+	if (!thread)
+		return -EINVAL;
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+	if (!al.map || !al.map->dso)
+		return -EINVAL;
+
+	offset = al.map->map_ip(al.map, ip);
+
+	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+				     al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
+static bool intel_pt_get_config(struct intel_pt *pt,
+				struct perf_event_attr *attr, u64 *config)
+{
+	if (attr->type == pt->pmu_type) {
+		if (config)
+			*config = attr->config;
+		return true;
+	}
+
+	return false;
+}
+
+static bool intel_pt_exclude_kernel(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
+		    !evsel->attr.exclude_kernel)
+			return false;
+	}
+	return true;
+}
+
+static bool intel_pt_return_compression(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	u64 config;
+
+	if (!pt->noretcomp_bit)
+		return true;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+		    (config & pt->noretcomp_bit))
+			return false;
+	}
+	return true;
+}
+
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+		    (config & 1) && !(config & 0x2000))
+			return false;
+	}
+	return true;
+}
+
+static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	unsigned int shift;
+	u64 config;
+
+	if (!pt->mtc_freq_bits)
+		return 0;
+
+	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
+		config >>= 1;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, &config))
+			return (config & pt->mtc_freq_bits) >> shift;
+	}
+	return 0;
+}
+
+static bool intel_pt_timeless_decoding(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	bool timeless_decoding = true;
+	u64 config;
+
+	if (!pt->tsc_bit || !pt->cap_user_time_zero)
+		return true;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
+			return true;
+		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
+			if (config & pt->tsc_bit)
+				timeless_decoding = false;
+			else
+				return true;
+		}
+	}
+	return timeless_decoding;
+}
+
+static bool intel_pt_tracing_kernel(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
+		    !evsel->attr.exclude_kernel)
+			return true;
+	}
+	return false;
+}
+
+static bool intel_pt_have_tsc(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	bool have_tsc = false;
+	u64 config;
+
+	if (!pt->tsc_bit)
+		return false;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
+			if (config & pt->tsc_bit)
+				have_tsc = true;
+			else
+				return false;
+		}
+	}
+	return have_tsc;
+}
+
+static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
+{
+	u64 quot, rem;
+
+	quot = ns / pt->tc.time_mult;
+	rem  = ns % pt->tc.time_mult;
+	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
+		pt->tc.time_mult;
+}
+
+static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
+						   unsigned int queue_nr)
+{
+	struct intel_pt_params params = { .get_trace = 0, };
+	struct intel_pt_queue *ptq;
+
+	ptq = zalloc(sizeof(struct intel_pt_queue));
+	if (!ptq)
+		return NULL;
+
+	if (pt->synth_opts.callchain) {
+		size_t sz = sizeof(struct ip_callchain);
+
+		sz += pt->synth_opts.callchain_sz * sizeof(u64);
+		ptq->chain = zalloc(sz);
+		if (!ptq->chain)
+			goto out_free;
+	}
+
+	if (pt->synth_opts.last_branch) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += pt->synth_opts.last_branch_sz *
+		      sizeof(struct branch_entry);
+		ptq->last_branch = zalloc(sz);
+		if (!ptq->last_branch)
+			goto out_free;
+		ptq->last_branch_rb = zalloc(sz);
+		if (!ptq->last_branch_rb)
+			goto out_free;
+	}
+
+	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+	if (!ptq->event_buf)
+		goto out_free;
+
+	ptq->pt = pt;
+	ptq->queue_nr = queue_nr;
+	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
+	ptq->pid = -1;
+	ptq->tid = -1;
+	ptq->cpu = -1;
+	ptq->next_tid = -1;
+
+	params.get_trace = intel_pt_get_trace;
+	params.walk_insn = intel_pt_walk_next_insn;
+	params.data = ptq;
+	params.return_compression = intel_pt_return_compression(pt);
+	params.branch_enable = intel_pt_branch_enable(pt);
+	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
+	params.mtc_period = intel_pt_mtc_period(pt);
+	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
+	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+
+	if (pt->filts.cnt > 0)
+		params.pgd_ip = intel_pt_pgd_ip;
+
+	if (pt->synth_opts.instructions) {
+		if (pt->synth_opts.period) {
+			switch (pt->synth_opts.period_type) {
+			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
+				params.period_type =
+						INTEL_PT_PERIOD_INSTRUCTIONS;
+				params.period = pt->synth_opts.period;
+				break;
+			case PERF_ITRACE_PERIOD_TICKS:
+				params.period_type = INTEL_PT_PERIOD_TICKS;
+				params.period = pt->synth_opts.period;
+				break;
+			case PERF_ITRACE_PERIOD_NANOSECS:
+				params.period_type = INTEL_PT_PERIOD_TICKS;
+				params.period = intel_pt_ns_to_ticks(pt,
+							pt->synth_opts.period);
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (!params.period) {
+			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
+			params.period = 1;
+		}
+	}
+
+	ptq->decoder = intel_pt_decoder_new(&params);
+	if (!ptq->decoder)
+		goto out_free;
+
+	return ptq;
+
+out_free:
+	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->last_branch_rb);
+	zfree(&ptq->chain);
+	free(ptq);
+	return NULL;
+}
+
+static void intel_pt_free_queue(void *priv)
+{
+	struct intel_pt_queue *ptq = priv;
+
+	if (!ptq)
+		return;
+	thread__zput(ptq->thread);
+	intel_pt_decoder_free(ptq->decoder);
+	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->last_branch_rb);
+	zfree(&ptq->chain);
+	free(ptq);
+}
+
+static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
+				     struct auxtrace_queue *queue)
+{
+	struct intel_pt_queue *ptq = queue->priv;
+
+	if (queue->tid == -1 || pt->have_sched_switch) {
+		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+		thread__zput(ptq->thread);
+	}
+
+	if (!ptq->thread && ptq->tid != -1)
+		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
+
+	if (ptq->thread) {
+		ptq->pid = ptq->thread->pid_;
+		if (queue->cpu == -1)
+			ptq->cpu = ptq->thread->cpu;
+	}
+}
+
+static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
+{
+	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
+		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
+	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
+		if (ptq->state->to_ip)
+			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+				     PERF_IP_FLAG_ASYNC |
+				     PERF_IP_FLAG_INTERRUPT;
+		else
+			ptq->flags = PERF_IP_FLAG_BRANCH |
+				     PERF_IP_FLAG_TRACE_END;
+		ptq->insn_len = 0;
+	} else {
+		if (ptq->state->from_ip)
+			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
+		else
+			ptq->flags = PERF_IP_FLAG_BRANCH |
+				     PERF_IP_FLAG_TRACE_BEGIN;
+		if (ptq->state->flags & INTEL_PT_IN_TX)
+			ptq->flags |= PERF_IP_FLAG_IN_TX;
+		ptq->insn_len = ptq->state->insn_len;
+		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
+	}
+}
+
+static int intel_pt_setup_queue(struct intel_pt *pt,
+				struct auxtrace_queue *queue,
+				unsigned int queue_nr)
+{
+	struct intel_pt_queue *ptq = queue->priv;
+
+	if (list_empty(&queue->head))
+		return 0;
+
+	if (!ptq) {
+		ptq = intel_pt_alloc_queue(pt, queue_nr);
+		if (!ptq)
+			return -ENOMEM;
+		queue->priv = ptq;
+
+		if (queue->cpu != -1)
+			ptq->cpu = queue->cpu;
+		ptq->tid = queue->tid;
+
+		if (pt->sampling_mode && !pt->snapshot_mode &&
+		    pt->timeless_decoding)
+			ptq->step_through_buffers = true;
+
+		ptq->sync_switch = pt->sync_switch;
+	}
+
+	if (!ptq->on_heap &&
+	    (!ptq->sync_switch ||
+	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
+		const struct intel_pt_state *state;
+		int ret;
+
+		if (pt->timeless_decoding)
+			return 0;
+
+		intel_pt_log("queue %u getting timestamp\n", queue_nr);
+		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+		while (1) {
+			state = intel_pt_decode(ptq->decoder);
+			if (state->err) {
+				if (state->err == INTEL_PT_ERR_NODATA) {
+					intel_pt_log("queue %u has no timestamp\n",
+						     queue_nr);
+					return 0;
+				}
+				continue;
+			}
+			if (state->timestamp)
+				break;
+		}
+
+		ptq->timestamp = state->timestamp;
+		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
+			     queue_nr, ptq->timestamp);
+		ptq->state = state;
+		ptq->have_sample = true;
+		intel_pt_sample_flags(ptq);
+		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
+		if (ret)
+			return ret;
+		ptq->on_heap = true;
+	}
+
+	return 0;
+}
+
+static int intel_pt_setup_queues(struct intel_pt *pt)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	struct branch_stack *bs_src = ptq->last_branch_rb;
+	struct branch_stack *bs_dst = ptq->last_branch;
+	size_t nr = 0;
+
+	bs_dst->nr = bs_src->nr;
+
+	if (!bs_src->nr)
+		return;
+
+	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
+	memcpy(&bs_dst->entries[0],
+	       &bs_src->entries[ptq->last_branch_pos],
+	       sizeof(struct branch_entry) * nr);
+
+	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
+		memcpy(&bs_dst->entries[nr],
+		       &bs_src->entries[0],
+		       sizeof(struct branch_entry) * ptq->last_branch_pos);
+	}
+}
+
+static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	ptq->last_branch_pos = 0;
+	ptq->last_branch_rb->nr = 0;
+}
+
+static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct branch_stack *bs = ptq->last_branch_rb;
+	struct branch_entry *be;
+
+	if (!ptq->last_branch_pos)
+		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
+
+	ptq->last_branch_pos -= 1;
+
+	be              = &bs->entries[ptq->last_branch_pos];
+	be->from        = state->from_ip;
+	be->to          = state->to_ip;
+	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
+	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
+	/* No support for mispredict */
+	be->flags.mispred = ptq->pt->mispred_all;
+
+	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
+		bs->nr += 1;
+}
+
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+	return pt->synth_opts.initial_skip &&
+	       pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	if (!pt->timeless_decoding)
+		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+	sample->cpumode = PERF_RECORD_MISC_USER;
+	sample->ip = ptq->state->from_ip;
+	sample->pid = ptq->pid;
+	sample->tid = ptq->tid;
+	sample->addr = ptq->state->to_ip;
+	sample->period = 1;
+	sample->cpu = ptq->cpu;
+	sample->flags = ptq->flags;
+	sample->insn_len = ptq->insn_len;
+	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
+static int intel_pt_inject_event(union perf_event *event,
+				 struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+				      union perf_event *event,
+				      struct perf_sample *sample, u64 type)
+{
+	if (!pt->synth_opts.inject)
+		return 0;
+
+	return intel_pt_inject_event(event, sample, type);
+}
+
+static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
+					  union perf_event *event,
+					  struct perf_sample *sample, u64 type)
+{
+	int ret;
+
+	ret = intel_pt_opt_inject(pt, event, sample, type);
+	if (ret)
+		return ret;
+
+	ret = perf_session__deliver_synth_event(pt->session, event, sample);
+	if (ret)
+		pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+	return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct dummy_branch_stack {
+		u64			nr;
+		struct branch_entry	entries;
+	} dummy_bs;
+
+	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+		return 0;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_b_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->branches_id;
+	sample.stream_id = ptq->pt->branches_id;
+
+	/*
+	 * perf report cannot handle events without a branch stack when using
+	 * SORT_MODE__BRANCH so make a dummy one.
+	 */
+	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
+
+	return intel_pt_deliver_synth_b_event(pt, event, &sample,
+					      pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+				 struct intel_pt_queue *ptq,
+				 union perf_event *event,
+				 struct perf_sample *sample)
+{
+	intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+	if (pt->synth_opts.callchain) {
+		thread_stack__sample(ptq->thread, ptq->chain,
+				     pt->synth_opts.callchain_sz, sample->ip);
+		sample->callchain = ptq->chain;
+	}
+
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample->branch_stack = ptq->last_branch;
+	}
+}
+
+static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
+					       struct intel_pt_queue *ptq,
+					       union perf_event *event,
+					       struct perf_sample *sample,
+					       u64 type)
+{
+	int ret;
+
+	ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
+
+	if (pt->synth_opts.last_branch)
+		intel_pt_reset_last_branch_rb(ptq);
+
+	return ret;
+}
+
+static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->instructions_id;
+	sample.stream_id = ptq->pt->instructions_id;
+	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+
+	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->instructions_sample_type);
+}
+
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->transactions_id;
+	sample.stream_id = ptq->pt->transactions_id;
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->transactions_sample_type);
+}
+
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	intel_pt_prep_sample(pt, ptq, event, sample);
+
+	/*
+	 * Zero IP is used to mean "trace start" but that is not the case for
+	 * power or PTWRITE events with no IP, so clear the flags.
+	 */
+	if (!sample->ip)
+		sample->flags = 0;
+}
+
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_ptwrite raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->ptwrites_id;
+	sample.stream_id = ptq->pt->ptwrites_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->ptwrites_sample_type);
+}
+
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_cbr raw;
+	u32 flags;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->cbr_id;
+	sample.stream_id = ptq->pt->cbr_id;
+
+	flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+	raw.flags = cpu_to_le32(flags);
+	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+	raw.reserved3 = 0;
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_mwait raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->mwait_id;
+	sample.stream_id = ptq->pt->mwait_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwre raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwre_id;
+	sample.stream_id = ptq->pt->pwre_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_exstop raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->exstop_id;
+	sample.stream_id = ptq->pt->exstop_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwrx raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwrx_id;
+	sample.stream_id = ptq->pt->pwrx_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
+				pid_t pid, pid_t tid, u64 ip)
+{
+	union perf_event event;
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+	int err;
+
+	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
+
+	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+			     code, cpu, pid, tid, ip, msg);
+
+	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
+	if (err)
+		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
+		       err);
+
+	return err;
+}
+
+static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
+{
+	struct auxtrace_queue *queue;
+	pid_t tid = ptq->next_tid;
+	int err;
+
+	if (tid == -1)
+		return 0;
+
+	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
+
+	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
+
+	queue = &pt->queues.queue_array[ptq->queue_nr];
+	intel_pt_set_pid_tid_cpu(pt, queue);
+
+	ptq->next_tid = -1;
+
+	return err;
+}
+
+static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
+{
+	struct intel_pt *pt = ptq->pt;
+
+	return ip == pt->switch_ip &&
+	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
+	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
+			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
+}
+
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+			  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
+			  INTEL_PT_CBR_CHG)
+
+static int intel_pt_sample(struct intel_pt_queue *ptq)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct intel_pt *pt = ptq->pt;
+	int err;
+
+	if (!ptq->have_sample)
+		return 0;
+
+	ptq->have_sample = false;
+
+	if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
+		if (state->type & INTEL_PT_CBR_CHG) {
+			err = intel_pt_synth_cbr_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_MWAIT_OP) {
+			err = intel_pt_synth_mwait_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_ENTRY) {
+			err = intel_pt_synth_pwre_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_EX_STOP) {
+			err = intel_pt_synth_exstop_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_EXIT) {
+			err = intel_pt_synth_pwrx_sample(ptq);
+			if (err)
+				return err;
+		}
+	}
+
+	if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
+		err = intel_pt_synth_instruction_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
+		err = intel_pt_synth_transaction_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+		err = intel_pt_synth_ptwrite_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (!(state->type & INTEL_PT_BRANCH))
+		return 0;
+
+	if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
+		thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
+				    state->to_ip, ptq->insn_len,
+				    state->trace_nr);
+	else
+		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
+
+	if (pt->sample_branches) {
+		err = intel_pt_synth_branch_sample(ptq);
+		if (err)
+			return err;
+	}
+
+	if (pt->synth_opts.last_branch)
+		intel_pt_update_last_branch_rb(ptq);
+
+	if (!ptq->sync_switch)
+		return 0;
+
+	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
+		switch (ptq->switch_state) {
+		case INTEL_PT_SS_UNKNOWN:
+		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+			err = intel_pt_next_tid(pt, ptq);
+			if (err)
+				return err;
+			ptq->switch_state = INTEL_PT_SS_TRACING;
+			break;
+		default:
+			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
+			return 1;
+		}
+	} else if (!state->to_ip) {
+		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
+		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+		   state->to_ip == pt->ptss_ip &&
+		   (ptq->flags & PERF_IP_FLAG_CALL)) {
+		ptq->switch_state = INTEL_PT_SS_TRACING;
+	}
+
+	return 0;
+}
+
+static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
+{
+	struct machine *machine = pt->machine;
+	struct map *map;
+	struct symbol *sym, *start;
+	u64 ip, switch_ip = 0;
+	const char *ptss;
+
+	if (ptss_ip)
+		*ptss_ip = 0;
+
+	map = machine__kernel_map(machine);
+	if (!map)
+		return 0;
+
+	if (map__load(map))
+		return 0;
+
+	start = dso__first_symbol(map->dso, MAP__FUNCTION);
+
+	for (sym = start; sym; sym = dso__next_symbol(sym)) {
+		if (sym->binding == STB_GLOBAL &&
+		    !strcmp(sym->name, "__switch_to")) {
+			ip = map->unmap_ip(map, sym->start);
+			if (ip >= map->start && ip < map->end) {
+				switch_ip = ip;
+				break;
+			}
+		}
+	}
+
+	if (!switch_ip || !ptss_ip)
+		return 0;
+
+	if (pt->have_sched_switch == 1)
+		ptss = "perf_trace_sched_switch";
+	else
+		ptss = "__perf_event_task_sched_out";
+
+	for (sym = start; sym; sym = dso__next_symbol(sym)) {
+		if (!strcmp(sym->name, ptss)) {
+			ip = map->unmap_ip(map, sym->start);
+			if (ip >= map->start && ip < map->end) {
+				*ptss_ip = ip;
+				break;
+			}
+		}
+	}
+
+	return switch_ip;
+}
+
+static void intel_pt_enable_sync_switch(struct intel_pt *pt)
+{
+	unsigned int i;
+
+	pt->sync_switch = true;
+
+	for (i = 0; i < pt->queues.nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq)
+			ptq->sync_switch = true;
+	}
+}
+
+static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct intel_pt *pt = ptq->pt;
+	int err;
+
+	if (!pt->kernel_start) {
+		pt->kernel_start = machine__kernel_start(pt->machine);
+		if (pt->per_cpu_mmaps &&
+		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
+		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
+		    !pt->sampling_mode) {
+			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
+			if (pt->switch_ip) {
+				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
+					     pt->switch_ip, pt->ptss_ip);
+				intel_pt_enable_sync_switch(pt);
+			}
+		}
+	}
+
+	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+	while (1) {
+		err = intel_pt_sample(ptq);
+		if (err)
+			return err;
+
+		state = intel_pt_decode(ptq->decoder);
+		if (state->err) {
+			if (state->err == INTEL_PT_ERR_NODATA)
+				return 1;
+			if (ptq->sync_switch &&
+			    state->from_ip >= pt->kernel_start) {
+				ptq->sync_switch = false;
+				intel_pt_next_tid(pt, ptq);
+			}
+			if (pt->synth_opts.errors) {
+				err = intel_pt_synth_error(pt, state->err,
+							   ptq->cpu, ptq->pid,
+							   ptq->tid,
+							   state->from_ip);
+				if (err)
+					return err;
+			}
+			continue;
+		}
+
+		ptq->state = state;
+		ptq->have_sample = true;
+		intel_pt_sample_flags(ptq);
+
+		/* Use estimated TSC upon return to user space */
+		if (pt->est_tsc &&
+		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
+		    state->to_ip && state->to_ip < pt->kernel_start) {
+			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+				     state->timestamp, state->est_timestamp);
+			ptq->timestamp = state->est_timestamp;
+		/* Use estimated TSC in unknown switch state */
+		} else if (ptq->sync_switch &&
+			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
+			   ptq->next_tid == -1) {
+			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+				     state->timestamp, state->est_timestamp);
+			ptq->timestamp = state->est_timestamp;
+		} else if (state->timestamp > ptq->timestamp) {
+			ptq->timestamp = state->timestamp;
+		}
+
+		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
+			*timestamp = ptq->timestamp;
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static inline int intel_pt_update_queues(struct intel_pt *pt)
+{
+	if (pt->queues.new_data) {
+		pt->queues.new_data = false;
+		return intel_pt_setup_queues(pt);
+	}
+	return 0;
+}
+
+static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
+{
+	unsigned int queue_nr;
+	u64 ts;
+	int ret;
+
+	while (1) {
+		struct auxtrace_queue *queue;
+		struct intel_pt_queue *ptq;
+
+		if (!pt->heap.heap_cnt)
+			return 0;
+
+		if (pt->heap.heap_array[0].ordinal >= timestamp)
+			return 0;
+
+		queue_nr = pt->heap.heap_array[0].queue_nr;
+		queue = &pt->queues.queue_array[queue_nr];
+		ptq = queue->priv;
+
+		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
+			     queue_nr, pt->heap.heap_array[0].ordinal,
+			     timestamp);
+
+		auxtrace_heap__pop(&pt->heap);
+
+		if (pt->heap.heap_cnt) {
+			ts = pt->heap.heap_array[0].ordinal + 1;
+			if (ts > timestamp)
+				ts = timestamp;
+		} else {
+			ts = timestamp;
+		}
+
+		intel_pt_set_pid_tid_cpu(pt, queue);
+
+		ret = intel_pt_run_decoder(ptq, &ts);
+
+		if (ret < 0) {
+			auxtrace_heap__add(&pt->heap, queue_nr, ts);
+			return ret;
+		}
+
+		if (!ret) {
+			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		} else {
+			ptq->on_heap = false;
+		}
+	}
+
+	return 0;
+}
+
+static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
+					    u64 time_)
+{
+	struct auxtrace_queues *queues = &pt->queues;
+	unsigned int i;
+	u64 ts = 0;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+		struct intel_pt_queue *ptq = queue->priv;
+
+		if (ptq && (tid == -1 || ptq->tid == tid)) {
+			ptq->time = time_;
+			intel_pt_set_pid_tid_cpu(pt, queue);
+			intel_pt_run_decoder(ptq, &ts);
+		}
+	}
+	return 0;
+}
+
+static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
+{
+	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
+				    sample->pid, sample->tid, 0);
+}
+
+static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
+{
+	unsigned i, j;
+
+	if (cpu < 0 || !pt->queues.nr_queues)
+		return NULL;
+
+	if ((unsigned)cpu >= pt->queues.nr_queues)
+		i = pt->queues.nr_queues - 1;
+	else
+		i = cpu;
+
+	if (pt->queues.queue_array[i].cpu == cpu)
+		return pt->queues.queue_array[i].priv;
+
+	for (j = 0; i > 0; j++) {
+		if (pt->queues.queue_array[--i].cpu == cpu)
+			return pt->queues.queue_array[i].priv;
+	}
+
+	for (; j < pt->queues.nr_queues; j++) {
+		if (pt->queues.queue_array[j].cpu == cpu)
+			return pt->queues.queue_array[j].priv;
+	}
+
+	return NULL;
+}
+
+static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
+				u64 timestamp)
+{
+	struct intel_pt_queue *ptq;
+	int err;
+
+	if (!pt->sync_switch)
+		return 1;
+
+	ptq = intel_pt_cpu_to_ptq(pt, cpu);
+	if (!ptq || !ptq->sync_switch)
+		return 1;
+
+	switch (ptq->switch_state) {
+	case INTEL_PT_SS_NOT_TRACING:
+		ptq->next_tid = -1;
+		break;
+	case INTEL_PT_SS_UNKNOWN:
+	case INTEL_PT_SS_TRACING:
+		ptq->next_tid = tid;
+		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
+		return 0;
+	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+		if (!ptq->on_heap) {
+			ptq->timestamp = perf_time_to_tsc(timestamp,
+							  &pt->tc);
+			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
+						 ptq->timestamp);
+			if (err)
+				return err;
+			ptq->on_heap = true;
+		}
+		ptq->switch_state = INTEL_PT_SS_TRACING;
+		break;
+	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+		ptq->next_tid = tid;
+		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
+		break;
+	default:
+		break;
+	}
+
+	return 1;
+}
+
+static int intel_pt_process_switch(struct intel_pt *pt,
+				   struct perf_sample *sample)
+{
+	struct perf_evsel *evsel;
+	pid_t tid;
+	int cpu, ret;
+
+	evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
+	if (evsel != pt->switch_evsel)
+		return 0;
+
+	tid = perf_evsel__intval(evsel, sample, "next_pid");
+	cpu = sample->cpu;
+
+	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
+		     &pt->tc));
+
+	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+	if (ret <= 0)
+		return ret;
+
+	return machine__set_current_tid(pt->machine, cpu, -1, tid);
+}
+
+static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
+				   struct perf_sample *sample)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	pid_t pid, tid;
+	int cpu, ret;
+
+	cpu = sample->cpu;
+
+	if (pt->have_sched_switch == 3) {
+		if (!out)
+			return 0;
+		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
+			pr_err("Expecting CPU-wide context switch event\n");
+			return -EINVAL;
+		}
+		pid = event->context_switch.next_prev_pid;
+		tid = event->context_switch.next_prev_tid;
+	} else {
+		if (out)
+			return 0;
+		pid = sample->pid;
+		tid = sample->tid;
+	}
+
+	if (tid == -1) {
+		pr_err("context_switch event has no tid\n");
+		return -EINVAL;
+	}
+
+	intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+		     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
+		     &pt->tc));
+
+	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+	if (ret <= 0)
+		return ret;
+
+	return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
+static int intel_pt_process_itrace_start(struct intel_pt *pt,
+					 union perf_event *event,
+					 struct perf_sample *sample)
+{
+	if (!pt->per_cpu_mmaps)
+		return 0;
+
+	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+		     sample->cpu, event->itrace_start.pid,
+		     event->itrace_start.tid, sample->time,
+		     perf_time_to_tsc(sample->time, &pt->tc));
+
+	return machine__set_current_tid(pt->machine, sample->cpu,
+					event->itrace_start.pid,
+					event->itrace_start.tid);
+}
+
+static int intel_pt_process_event(struct perf_session *session,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct perf_tool *tool)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	u64 timestamp;
+	int err = 0;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("Intel Processor Trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && sample->time != (u64)-1)
+		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+	else
+		timestamp = 0;
+
+	if (timestamp || pt->timeless_decoding) {
+		err = intel_pt_update_queues(pt);
+		if (err)
+			return err;
+	}
+
+	if (pt->timeless_decoding) {
+		if (event->header.type == PERF_RECORD_EXIT) {
+			err = intel_pt_process_timeless_queues(pt,
+							       event->fork.tid,
+							       sample->time);
+		}
+	} else if (timestamp) {
+		err = intel_pt_process_queues(pt, timestamp);
+	}
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_AUX &&
+	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+	    pt->synth_opts.errors) {
+		err = intel_pt_lost(pt, sample);
+		if (err)
+			return err;
+	}
+
+	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
+		err = intel_pt_process_switch(pt, sample);
+	else if (event->header.type == PERF_RECORD_ITRACE_START)
+		err = intel_pt_process_itrace_start(pt, event, sample);
+	else if (event->header.type == PERF_RECORD_SWITCH ||
+		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+		err = intel_pt_context_switch(pt, event, sample);
+
+	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
+		     perf_event__name(event->header.type), event->header.type,
+		     sample->cpu, sample->time, timestamp);
+
+	return err;
+}
+
+static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	int ret;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	ret = intel_pt_update_queues(pt);
+	if (ret < 0)
+		return ret;
+
+	if (pt->timeless_decoding)
+		return intel_pt_process_timeless_queues(pt, -1,
+							MAX_TIMESTAMP - 1);
+
+	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
+}
+
+static void intel_pt_free_events(struct perf_session *session)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+	struct auxtrace_queues *queues = &pt->queues;
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		intel_pt_free_queue(queues->queue_array[i].priv);
+		queues->queue_array[i].priv = NULL;
+	}
+	intel_pt_log_disable();
+	auxtrace_queues__free(queues);
+}
+
+static void intel_pt_free(struct perf_session *session)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	auxtrace_heap__free(&pt->heap);
+	intel_pt_free_events(session);
+	session->auxtrace = NULL;
+	thread__put(pt->unknown_thread);
+	addr_filters__exit(&pt->filts);
+	zfree(&pt->filter);
+	free(pt);
+}
+
+static int intel_pt_process_auxtrace_event(struct perf_session *session,
+					   union perf_event *event,
+					   struct perf_tool *tool __maybe_unused)
+{
+	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+					   auxtrace);
+
+	if (!pt->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t data_offset;
+		int fd = perf_data__fd(session->data);
+		int err;
+
+		if (perf_data__is_pipe(session->data)) {
+			data_offset = 0;
+		} else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
+
+		err = auxtrace_queues__add_event(&pt->queues, session, event,
+						 data_offset, &buffer);
+		if (err)
+			return err;
+
+		/* Dump here now we have copied a piped trace out of the pipe */
+		if (dump_trace) {
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				intel_pt_dump_event(pt, buffer->data,
+						    buffer->size);
+				auxtrace_buffer__put_data(buffer);
+			}
+		}
+	}
+
+	return 0;
+}
+
+struct intel_pt_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int intel_pt_event_synth(struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample __maybe_unused,
+				struct machine *machine __maybe_unused)
+{
+	struct intel_pt_synth *intel_pt_synth =
+			container_of(tool, struct intel_pt_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
+						 NULL);
+}
+
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
+				struct perf_event_attr *attr, u64 id)
+{
+	struct intel_pt_synth intel_pt_synth;
+	int err;
+
+	pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+		 name, id, (u64)attr->sample_type);
+
+	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
+	intel_pt_synth.session = session;
+
+	err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+					  &id, intel_pt_event_synth);
+	if (err)
+		pr_err("%s: failed to synthesize '%s' event type\n",
+		       __func__, name);
+
+	return err;
+}
+
+static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
+				    const char *name)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->id && evsel->id[0] == id) {
+			if (evsel->name)
+				zfree(&evsel->name);
+			evsel->name = strdup(name);
+			break;
+		}
+	}
+}
+
+static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
+					 struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == pt->pmu_type && evsel->ids)
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+				 struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
+	struct perf_event_attr attr;
+	u64 id;
+	int err;
+
+	if (!evsel) {
+		pr_debug("There are no selected events with Intel Processor Trace data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+			    PERF_SAMPLE_PERIOD;
+	if (pt->timeless_decoding)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	else
+		attr.sample_type |= PERF_SAMPLE_TIME;
+	if (!pt->per_cpu_mmaps)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+	attr.exclude_user = evsel->attr.exclude_user;
+	attr.exclude_kernel = evsel->attr.exclude_kernel;
+	attr.exclude_hv = evsel->attr.exclude_hv;
+	attr.exclude_host = evsel->attr.exclude_host;
+	attr.exclude_guest = evsel->attr.exclude_guest;
+	attr.sample_id_all = evsel->attr.sample_id_all;
+	attr.read_format = evsel->attr.read_format;
+
+	id = evsel->id[0] + 1000000000;
+	if (!id)
+		id = 1;
+
+	if (pt->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		err = intel_pt_synth_event(session, "branches", &attr, id);
+		if (err)
+			return err;
+		pt->sample_branches = true;
+		pt->branches_sample_type = attr.sample_type;
+		pt->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (pt->synth_opts.callchain)
+		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+	if (pt->synth_opts.last_branch)
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+	if (pt->synth_opts.instructions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+			attr.sample_period =
+				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+		else
+			attr.sample_period = pt->synth_opts.period;
+		err = intel_pt_synth_event(session, "instructions", &attr, id);
+		if (err)
+			return err;
+		pt->sample_instructions = true;
+		pt->instructions_sample_type = attr.sample_type;
+		pt->instructions_id = id;
+		id += 1;
+	}
+
+	attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+	attr.sample_period = 1;
+
+	if (pt->synth_opts.transactions) {
+		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+		err = intel_pt_synth_event(session, "transactions", &attr, id);
+		if (err)
+			return err;
+		pt->sample_transactions = true;
+		pt->transactions_sample_type = attr.sample_type;
+		pt->transactions_id = id;
+		intel_pt_set_event_name(evlist, id, "transactions");
+		id += 1;
+	}
+
+	attr.type = PERF_TYPE_SYNTH;
+	attr.sample_type |= PERF_SAMPLE_RAW;
+
+	if (pt->synth_opts.ptwrites) {
+		attr.config = PERF_SYNTH_INTEL_PTWRITE;
+		err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+		if (err)
+			return err;
+		pt->sample_ptwrites = true;
+		pt->ptwrites_sample_type = attr.sample_type;
+		pt->ptwrites_id = id;
+		intel_pt_set_event_name(evlist, id, "ptwrite");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events) {
+		pt->sample_pwr_events = true;
+		pt->pwr_events_sample_type = attr.sample_type;
+
+		attr.config = PERF_SYNTH_INTEL_CBR;
+		err = intel_pt_synth_event(session, "cbr", &attr, id);
+		if (err)
+			return err;
+		pt->cbr_id = id;
+		intel_pt_set_event_name(evlist, id, "cbr");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
+		attr.config = PERF_SYNTH_INTEL_MWAIT;
+		err = intel_pt_synth_event(session, "mwait", &attr, id);
+		if (err)
+			return err;
+		pt->mwait_id = id;
+		intel_pt_set_event_name(evlist, id, "mwait");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRE;
+		err = intel_pt_synth_event(session, "pwre", &attr, id);
+		if (err)
+			return err;
+		pt->pwre_id = id;
+		intel_pt_set_event_name(evlist, id, "pwre");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_EXSTOP;
+		err = intel_pt_synth_event(session, "exstop", &attr, id);
+		if (err)
+			return err;
+		pt->exstop_id = id;
+		intel_pt_set_event_name(evlist, id, "exstop");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRX;
+		err = intel_pt_synth_event(session, "pwrx", &attr, id);
+		if (err)
+			return err;
+		pt->pwrx_id = id;
+		intel_pt_set_event_name(evlist, id, "pwrx");
+		id += 1;
+	}
+
+	return 0;
+}
+
+static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry_reverse(evlist, evsel) {
+		const char *name = perf_evsel__name(evsel);
+
+		if (!strcmp(name, "sched:sched_switch"))
+			return evsel;
+	}
+
+	return NULL;
+}
+
+static bool intel_pt_find_switch(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.context_switch)
+			return true;
+	}
+
+	return false;
+}
+
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+	struct intel_pt *pt = data;
+
+	if (!strcmp(var, "intel-pt.mispred-all"))
+		pt->mispred_all = perf_config_bool(var, value);
+
+	return 0;
+}
+
+static const char * const intel_pt_info_fmts[] = {
+	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
+	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
+	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
+	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
+	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
+	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
+	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
+	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
+	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
+	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
+	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
+	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
+	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
+	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
+	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
+	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
+};
+
+static void intel_pt_print_info(u64 *arr, int start, int finish)
+{
+	int i;
+
+	if (!dump_trace)
+		return;
+
+	for (i = start; i <= finish; i++)
+		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
+}
+
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
+}
+
+static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
+{
+	return auxtrace_info->header.size >=
+		sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
+}
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+				   struct perf_session *session)
+{
+	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
+	struct intel_pt *pt;
+	void *info_end;
+	u64 *info;
+	int err;
+
+	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+					min_sz)
+		return -EINVAL;
+
+	pt = zalloc(sizeof(struct intel_pt));
+	if (!pt)
+		return -ENOMEM;
+
+	addr_filters__init(&pt->filts);
+
+	err = perf_config(intel_pt_perf_config, pt);
+	if (err)
+		goto err_free;
+
+	err = auxtrace_queues__init(&pt->queues);
+	if (err)
+		goto err_free;
+
+	intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+
+	pt->session = session;
+	pt->machine = &session->machines.host; /* No kvm support */
+	pt->auxtrace_type = auxtrace_info->type;
+	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
+	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
+	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
+	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
+	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
+	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
+	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
+	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
+	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
+	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
+	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
+			    INTEL_PT_PER_CPU_MMAPS);
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
+		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
+		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
+		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
+		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
+		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
+		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
+				    INTEL_PT_CYC_BIT);
+	}
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
+		pt->max_non_turbo_ratio =
+			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_MAX_NONTURBO_RATIO,
+				    INTEL_PT_MAX_NONTURBO_RATIO);
+	}
+
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+	info_end = (void *)info + auxtrace_info->header.size;
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+		size_t len;
+
+		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_FILTER_STR_LEN,
+				    INTEL_PT_FILTER_STR_LEN);
+		if (len) {
+			const char *filter = (const char *)info;
+
+			len = roundup(len + 1, 8);
+			info += len >> 3;
+			if ((void *)info > info_end) {
+				pr_err("%s: bad filter string length\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			pt->filter = memdup(filter, len);
+			if (!pt->filter) {
+				err = -ENOMEM;
+				goto err_free_queues;
+			}
+			if (session->header.needs_swap)
+				mem_bswap_64(pt->filter, len);
+			if (pt->filter[len - 1]) {
+				pr_err("%s: filter string not null terminated\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			err = addr_filters__parse_bare_filter(&pt->filts,
+							      filter);
+			if (err)
+				goto err_free_queues;
+		}
+		intel_pt_print_info_str("Filter string", pt->filter);
+	}
+
+	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
+	pt->have_tsc = intel_pt_have_tsc(pt);
+	pt->sampling_mode = false;
+	pt->est_tsc = !pt->timeless_decoding;
+
+	pt->unknown_thread = thread__new(999999999, 999999999);
+	if (!pt->unknown_thread) {
+		err = -ENOMEM;
+		goto err_free_queues;
+	}
+
+	/*
+	 * Since this thread will not be kept in any rbtree not in a
+	 * list, initialize its list node so that at thread__put() the
+	 * current thread lifetime assuption is kept and we don't segfault
+	 * at list_del_init().
+	 */
+	INIT_LIST_HEAD(&pt->unknown_thread->node);
+
+	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
+	if (err)
+		goto err_delete_thread;
+	if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
+		err = -ENOMEM;
+		goto err_delete_thread;
+	}
+
+	pt->auxtrace.process_event = intel_pt_process_event;
+	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
+	pt->auxtrace.flush_events = intel_pt_flush;
+	pt->auxtrace.free_events = intel_pt_free_events;
+	pt->auxtrace.free = intel_pt_free;
+	session->auxtrace = &pt->auxtrace;
+
+	if (dump_trace)
+		return 0;
+
+	if (pt->have_sched_switch == 1) {
+		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
+		if (!pt->switch_evsel) {
+			pr_err("%s: missing sched_switch event\n", __func__);
+			err = -EINVAL;
+			goto err_delete_thread;
+		}
+	} else if (pt->have_sched_switch == 2 &&
+		   !intel_pt_find_switch(session->evlist)) {
+		pr_err("%s: missing context_switch attribute flag\n", __func__);
+		err = -EINVAL;
+		goto err_delete_thread;
+	}
+
+	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+		pt->synth_opts = *session->itrace_synth_opts;
+	} else {
+		itrace_synth_opts__set_default(&pt->synth_opts);
+		if (use_browser != -1) {
+			pt->synth_opts.branches = false;
+			pt->synth_opts.callchain = true;
+		}
+		if (session->itrace_synth_opts)
+			pt->synth_opts.thread_stack =
+				session->itrace_synth_opts->thread_stack;
+	}
+
+	if (pt->synth_opts.log)
+		intel_pt_log_enable();
+
+	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
+	if (pt->tc.time_mult) {
+		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
+
+		if (!pt->max_non_turbo_ratio)
+			pt->max_non_turbo_ratio =
+					(tsc_freq + 50000000) / 100000000;
+		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
+		intel_pt_log("Maximum non-turbo ratio %u\n",
+			     pt->max_non_turbo_ratio);
+		pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
+	}
+
+	if (pt->synth_opts.calls)
+		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+				       PERF_IP_FLAG_TRACE_END;
+	if (pt->synth_opts.returns)
+		pt->branches_filter |= PERF_IP_FLAG_RETURN |
+				       PERF_IP_FLAG_TRACE_BEGIN;
+
+	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+		symbol_conf.use_callchain = true;
+		if (callchain_register_param(&callchain_param) < 0) {
+			symbol_conf.use_callchain = false;
+			pt->synth_opts.callchain = false;
+		}
+	}
+
+	err = intel_pt_synth_events(pt, session);
+	if (err)
+		goto err_delete_thread;
+
+	err = auxtrace_queues__process_index(&pt->queues, session);
+	if (err)
+		goto err_delete_thread;
+
+	if (pt->queues.populated)
+		pt->data_queued = true;
+
+	if (pt->timeless_decoding)
+		pr_debug2("Intel PT decoding without timestamps\n");
+
+	return 0;
+
+err_delete_thread:
+	thread__zput(pt->unknown_thread);
+err_free_queues:
+	intel_pt_log_disable();
+	auxtrace_queues__free(&pt->queues);
+	session->auxtrace = NULL;
+err_free:
+	addr_filters__exit(&pt->filts);
+	zfree(&pt->filter);
+	free(pt);
+	return err;
+}
diff --git a/util/intel-pt.h b/util/intel-pt.h
new file mode 100644
index 0000000..e13b14e
--- /dev/null
+++ b/util/intel-pt.h
@@ -0,0 +1,56 @@
+/*
+ * intel_pt.h: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_PT_H__
+#define INCLUDE__PERF_INTEL_PT_H__
+
+#define INTEL_PT_PMU_NAME "intel_pt"
+
+enum {
+	INTEL_PT_PMU_TYPE,
+	INTEL_PT_TIME_SHIFT,
+	INTEL_PT_TIME_MULT,
+	INTEL_PT_TIME_ZERO,
+	INTEL_PT_CAP_USER_TIME_ZERO,
+	INTEL_PT_TSC_BIT,
+	INTEL_PT_NORETCOMP_BIT,
+	INTEL_PT_HAVE_SCHED_SWITCH,
+	INTEL_PT_SNAPSHOT_MODE,
+	INTEL_PT_PER_CPU_MMAPS,
+	INTEL_PT_MTC_BIT,
+	INTEL_PT_MTC_FREQ_BITS,
+	INTEL_PT_TSC_CTC_N,
+	INTEL_PT_TSC_CTC_D,
+	INTEL_PT_CYC_BIT,
+	INTEL_PT_MAX_NONTURBO_RATIO,
+	INTEL_PT_FILTER_STR_LEN,
+	INTEL_PT_AUXTRACE_PRIV_MAX,
+};
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct perf_event_attr;
+struct perf_pmu;
+
+struct auxtrace_record *intel_pt_recording_init(int *err);
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+				   struct perf_session *session);
+
+struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+
+#endif
diff --git a/util/intlist.c b/util/intlist.c
new file mode 100644
index 0000000..89715b6
--- /dev/null
+++ b/util/intlist.c
@@ -0,0 +1,146 @@
+/*
+ * Based on intlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+#include "intlist.h"
+
+static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
+					 const void *entry)
+{
+	int i = (int)((long)entry);
+	struct rb_node *rc = NULL;
+	struct int_node *node = malloc(sizeof(*node));
+
+	if (node != NULL) {
+		node->i = i;
+		node->priv = NULL;
+		rc = &node->rb_node;
+	}
+
+	return rc;
+}
+
+static void int_node__delete(struct int_node *ilist)
+{
+	free(ilist);
+}
+
+static void intlist__node_delete(struct rblist *rblist __maybe_unused,
+				 struct rb_node *rb_node)
+{
+	struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+	int_node__delete(node);
+}
+
+static int intlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+	int i = (int)((long)entry);
+	struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+	return node->i - i;
+}
+
+int intlist__add(struct intlist *ilist, int i)
+{
+	return rblist__add_node(&ilist->rblist, (void *)((long)i));
+}
+
+void intlist__remove(struct intlist *ilist, struct int_node *node)
+{
+	rblist__remove_node(&ilist->rblist, &node->rb_node);
+}
+
+static struct int_node *__intlist__findnew(struct intlist *ilist,
+					   int i, bool create)
+{
+	struct int_node *node = NULL;
+	struct rb_node *rb_node;
+
+	if (ilist == NULL)
+		return NULL;
+
+	if (create)
+		rb_node = rblist__findnew(&ilist->rblist, (void *)((long)i));
+	else
+		rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
+
+	if (rb_node)
+		node = container_of(rb_node, struct int_node, rb_node);
+
+	return node;
+}
+
+struct int_node *intlist__find(struct intlist *ilist, int i)
+{
+	return __intlist__findnew(ilist, i, false);
+}
+
+struct int_node *intlist__findnew(struct intlist *ilist, int i)
+{
+	return __intlist__findnew(ilist, i, true);
+}
+
+static int intlist__parse_list(struct intlist *ilist, const char *s)
+{
+	char *sep;
+	int err;
+
+	do {
+		long value = strtol(s, &sep, 10);
+		err = -EINVAL;
+		if (*sep != ',' && *sep != '\0')
+			break;
+		err = intlist__add(ilist, value);
+		if (err)
+			break;
+		s = sep + 1;
+	} while (*sep != '\0');
+
+	return err;
+}
+
+struct intlist *intlist__new(const char *slist)
+{
+	struct intlist *ilist = malloc(sizeof(*ilist));
+
+	if (ilist != NULL) {
+		rblist__init(&ilist->rblist);
+		ilist->rblist.node_cmp    = intlist__node_cmp;
+		ilist->rblist.node_new    = intlist__node_new;
+		ilist->rblist.node_delete = intlist__node_delete;
+
+		if (slist && intlist__parse_list(ilist, slist))
+			goto out_delete;
+	}
+
+	return ilist;
+out_delete:
+	intlist__delete(ilist);
+	return NULL;
+}
+
+void intlist__delete(struct intlist *ilist)
+{
+	if (ilist != NULL)
+		rblist__delete(&ilist->rblist);
+}
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx)
+{
+	struct int_node *node = NULL;
+	struct rb_node *rb_node;
+
+	rb_node = rblist__entry(&ilist->rblist, idx);
+	if (rb_node)
+		node = container_of(rb_node, struct int_node, rb_node);
+
+	return node;
+}
diff --git a/util/intlist.h b/util/intlist.h
new file mode 100644
index 0000000..85bab87
--- /dev/null
+++ b/util/intlist.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_INTLIST_H
+#define __PERF_INTLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct int_node {
+	struct rb_node rb_node;
+	int i;
+	void *priv;
+};
+
+struct intlist {
+	struct rblist rblist;
+};
+
+struct intlist *intlist__new(const char *slist);
+void intlist__delete(struct intlist *ilist);
+
+void intlist__remove(struct intlist *ilist, struct int_node *in);
+int intlist__add(struct intlist *ilist, int i);
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx);
+struct int_node *intlist__find(struct intlist *ilist, int i);
+struct int_node *intlist__findnew(struct intlist *ilist, int i);
+
+static inline bool intlist__has_entry(struct intlist *ilist, int i)
+{
+	return intlist__find(ilist, i) != NULL;
+}
+
+static inline bool intlist__empty(const struct intlist *ilist)
+{
+	return rblist__empty(&ilist->rblist);
+}
+
+static inline unsigned int intlist__nr_entries(const struct intlist *ilist)
+{
+	return rblist__nr_entries(&ilist->rblist);
+}
+
+/* For intlist iteration */
+static inline struct int_node *intlist__first(struct intlist *ilist)
+{
+	struct rb_node *rn = rb_first(&ilist->rblist.entries);
+	return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+static inline struct int_node *intlist__next(struct int_node *in)
+{
+	struct rb_node *rn;
+	if (!in)
+		return NULL;
+	rn = rb_next(&in->rb_node);
+	return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+
+/**
+ * intlist__for_each_entry      - iterate over a intlist
+ * @pos:	the &struct int_node to use as a loop cursor.
+ * @ilist:	the &struct intlist for loop.
+ */
+#define intlist__for_each_entry(pos, ilist)	\
+	for (pos = intlist__first(ilist); pos; pos = intlist__next(pos))
+
+/**
+ * intlist__for_each_entry_safe - iterate over a intlist safe against removal of
+ *                         int_node
+ * @pos:	the &struct int_node to use as a loop cursor.
+ * @n:		another &struct int_node to use as temporary storage.
+ * @ilist:	the &struct intlist for loop.
+ */
+#define intlist__for_each_entry_safe(pos, n, ilist)	\
+	for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\
+	     pos = n, n = intlist__next(n))
+#endif /* __PERF_INTLIST_H */
diff --git a/util/jit.h b/util/jit.h
new file mode 100644
index 0000000..6817ffc
--- /dev/null
+++ b/util/jit.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+int jit_process(struct perf_session *session, struct perf_data *output,
+		struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
+
+int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/util/jitdump.c b/util/jitdump.c
new file mode 100644
index 0000000..a186300
--- /dev/null
+++ b/util/jitdump.c
@@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/stringify.h>
+
+#include "util.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "symbol.h"
+#include <elf.h>
+
+#include "tsc.h"
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "../builtin.h"
+
+#include "sane_ctype.h"
+
+struct jit_buf_desc {
+	struct perf_data *output;
+	struct perf_session *session;
+	struct machine *machine;
+	union jr_entry   *entry;
+	void             *buf;
+	uint64_t	 sample_type;
+	size_t           bufsize;
+	FILE             *in;
+	bool		 needs_bswap; /* handles cross-endianess */
+	bool		 use_arch_timestamp;
+	void		 *debug_data;
+	void		 *unwinding_data;
+	uint64_t	 unwinding_size;
+	uint64_t	 unwinding_mapped_size;
+	uint64_t         eh_frame_hdr_size;
+	size_t		 nr_debug_entries;
+	uint32_t         code_load_count;
+	u64		 bytes_written;
+	struct rb_root   code_root;
+	char		 dir[PATH_MAX];
+};
+
+struct debug_line_info {
+	unsigned long vma;
+	unsigned int lineno;
+	/* The filename format is unspecified, absolute path, relative etc. */
+	char const filename[0];
+};
+
+struct jit_tool {
+	struct perf_tool tool;
+	struct perf_data	output;
+	struct perf_data	input;
+	u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(char *filename,
+	     const char *sym,
+	     uint64_t code_addr,
+	     const void *code,
+	     int csize,
+	     void *debug,
+	     int nr_debug_entries,
+	     void *unwinding,
+	     uint32_t unwinding_header_size,
+	     uint32_t unwinding_size)
+{
+	int ret, fd;
+
+	if (verbose > 0)
+		fprintf(stderr, "write ELF image %s\n", filename);
+
+	fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+	if (fd == -1) {
+		pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+		return -1;
+	}
+
+	ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries,
+			    unwinding, unwinding_header_size, unwinding_size);
+
+        close(fd);
+
+        if (ret)
+                unlink(filename);
+
+	return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+	if (!(jd && jd->in))
+		return;
+	funlockfile(jd->in);
+	fclose(jd->in);
+	jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+	struct perf_evsel *evsel;
+
+	/*
+	 * check that all events use CLOCK_MONOTONIC
+	 */
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
+			return -1;
+	}
+	return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+	struct jitheader header;
+	struct jr_prefix *prefix;
+	ssize_t bs, bsz = 0;
+	void *n, *buf = NULL;
+	int ret, retval = -1;
+
+	jd->in = fopen(name, "r");
+	if (!jd->in)
+		return -1;
+
+	bsz = hmax(sizeof(header), sizeof(*prefix));
+
+	buf = malloc(bsz);
+	if (!buf)
+		goto error;
+
+	/*
+	 * protect from writer modifying the file while we are reading it
+	 */
+	flockfile(jd->in);
+
+	ret = fread(buf, sizeof(header), 1, jd->in);
+	if (ret != 1)
+		goto error;
+
+	memcpy(&header, buf, sizeof(header));
+
+	if (header.magic != JITHEADER_MAGIC) {
+		if (header.magic != JITHEADER_MAGIC_SW)
+			goto error;
+		jd->needs_bswap = true;
+	}
+
+	if (jd->needs_bswap) {
+		header.version    = bswap_32(header.version);
+		header.total_size = bswap_32(header.total_size);
+		header.pid	  = bswap_32(header.pid);
+		header.elf_mach   = bswap_32(header.elf_mach);
+		header.timestamp  = bswap_64(header.timestamp);
+		header.flags      = bswap_64(header.flags);
+	}
+
+	jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
+	if (verbose > 2)
+		pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
+			header.version,
+			header.total_size,
+			(unsigned long long)header.timestamp,
+			header.pid,
+			header.elf_mach,
+			jd->use_arch_timestamp);
+
+	if (header.version > JITHEADER_VERSION) {
+		pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION),
+			header.version);
+		goto error;
+	}
+
+	if (header.flags & JITDUMP_FLAGS_RESERVED) {
+		pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+		       (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+		goto error;
+	}
+
+	if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+		pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+		goto error;
+	}
+
+	/*
+	 * validate event is using the correct clockid
+	 */
+	if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
+		pr_err("error, jitted code must be sampled with perf record -k 1\n");
+		goto error;
+	}
+
+	bs = header.total_size - sizeof(header);
+
+	if (bs > bsz) {
+		n = realloc(buf, bs);
+		if (!n)
+			goto error;
+		bsz = bs;
+		buf = n;
+		/* read extra we do not know about */
+		ret = fread(buf, bs - bsz, 1, jd->in);
+		if (ret != 1)
+			goto error;
+	}
+	/*
+	 * keep dirname for generating files and mmap records
+	 */
+	strcpy(jd->dir, name);
+	dirname(jd->dir);
+
+	return 0;
+error:
+	funlockfile(jd->in);
+	fclose(jd->in);
+	return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+	struct jr_prefix *prefix;
+	union jr_entry *jr;
+	void *addr;
+	size_t bs, size;
+	int id, ret;
+
+	if (!(jd && jd->in))
+		return NULL;
+
+	if (jd->buf == NULL) {
+		size_t sz = getpagesize();
+		if (sz < sizeof(*prefix))
+			sz = sizeof(*prefix);
+
+		jd->buf = malloc(sz);
+		if (jd->buf == NULL)
+			return NULL;
+
+		jd->bufsize = sz;
+	}
+
+	prefix = jd->buf;
+
+	/*
+	 * file is still locked at this point
+	 */
+	ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+	if (ret  != 1)
+		return NULL;
+
+	if (jd->needs_bswap) {
+		prefix->id   	   = bswap_32(prefix->id);
+		prefix->total_size = bswap_32(prefix->total_size);
+		prefix->timestamp  = bswap_64(prefix->timestamp);
+	}
+	id   = prefix->id;
+	size = prefix->total_size;
+
+	bs = (size_t)size;
+	if (bs < sizeof(*prefix))
+		return NULL;
+
+	if (id >= JIT_CODE_MAX) {
+		pr_warning("next_entry: unknown record type %d, skipping\n", id);
+	}
+	if (bs > jd->bufsize) {
+		void *n;
+		n = realloc(jd->buf, bs);
+		if (!n)
+			return NULL;
+		jd->buf = n;
+		jd->bufsize = bs;
+	}
+
+	addr = ((void *)jd->buf) + sizeof(*prefix);
+
+	ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+	if (ret != 1)
+		return NULL;
+
+	jr = (union jr_entry *)jd->buf;
+
+	switch(id) {
+	case JIT_CODE_DEBUG_INFO:
+		if (jd->needs_bswap) {
+			uint64_t n;
+			jr->info.code_addr = bswap_64(jr->info.code_addr);
+			jr->info.nr_entry  = bswap_64(jr->info.nr_entry);
+			for (n = 0 ; n < jr->info.nr_entry; n++) {
+				jr->info.entries[n].addr    = bswap_64(jr->info.entries[n].addr);
+				jr->info.entries[n].lineno  = bswap_32(jr->info.entries[n].lineno);
+				jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+			}
+		}
+		break;
+	case JIT_CODE_UNWINDING_INFO:
+		if (jd->needs_bswap) {
+			jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size);
+			jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size);
+			jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size);
+		}
+		break;
+	case JIT_CODE_CLOSE:
+		break;
+	case JIT_CODE_LOAD:
+		if (jd->needs_bswap) {
+			jr->load.pid       = bswap_32(jr->load.pid);
+			jr->load.tid       = bswap_32(jr->load.tid);
+			jr->load.vma       = bswap_64(jr->load.vma);
+			jr->load.code_addr = bswap_64(jr->load.code_addr);
+			jr->load.code_size = bswap_64(jr->load.code_size);
+			jr->load.code_index= bswap_64(jr->load.code_index);
+		}
+		jd->code_load_count++;
+		break;
+	case JIT_CODE_MOVE:
+		if (jd->needs_bswap) {
+			jr->move.pid           = bswap_32(jr->move.pid);
+			jr->move.tid           = bswap_32(jr->move.tid);
+			jr->move.vma           = bswap_64(jr->move.vma);
+			jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+			jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+			jr->move.code_size     = bswap_64(jr->move.code_size);
+			jr->move.code_index    = bswap_64(jr->move.code_index);
+		}
+		break;
+	case JIT_CODE_MAX:
+	default:
+		/* skip unknown record (we have read them) */
+		break;
+	}
+	return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+	ssize_t size;
+
+	size = perf_data__write(jd->output, event, event->header.size);
+	if (size < 0)
+		return -1;
+
+	jd->bytes_written += size;
+	return 0;
+}
+
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+	struct perf_tsc_conversion tc;
+
+	if (!jd->use_arch_timestamp)
+		return timestamp;
+
+	tc.time_shift = jd->session->time_conv.time_shift;
+	tc.time_mult  = jd->session->time_conv.time_mult;
+	tc.time_zero  = jd->session->time_conv.time_zero;
+
+	if (!tc.time_mult)
+		return 0;
+
+	return tsc_to_perf_time(timestamp, &tc);
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	uint64_t code, addr;
+	uintptr_t uaddr;
+	char *filename;
+	struct stat st;
+	size_t size;
+	u16 idr_size;
+	const char *sym;
+	uint32_t count;
+	int ret, csize, usize;
+	pid_t pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	pid   = jr->load.pid;
+	tid   = jr->load.tid;
+	csize = jr->load.code_size;
+	usize = jd->unwinding_mapped_size;
+	addr  = jr->load.code_addr;
+	sym   = (void *)((unsigned long)jr + sizeof(jr->load));
+	code  = (unsigned long)jr + jr->load.p.total_size - csize;
+	count = jr->load.code_index;
+	idr_size = jd->machine->id_hdr_size;
+
+	event = calloc(1, sizeof(*event) + idr_size);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+			jd->dir,
+			pid,
+			count);
+
+	size++; /* for \0 */
+
+	size = PERF_ALIGN(size, sizeof(u64));
+	uaddr = (uintptr_t)code;
+	ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
+			   jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size);
+
+	if (jd->debug_data && jd->nr_debug_entries) {
+		free(jd->debug_data);
+		jd->debug_data = NULL;
+		jd->nr_debug_entries = 0;
+	}
+
+	if (jd->unwinding_data && jd->eh_frame_hdr_size) {
+		free(jd->unwinding_data);
+		jd->unwinding_data = NULL;
+		jd->eh_frame_hdr_size = 0;
+		jd->unwinding_mapped_size = 0;
+		jd->unwinding_size = 0;
+	}
+
+	if (ret) {
+		free(event);
+		return -1;
+	}
+	if (stat(filename, &st))
+		memset(&st, 0, sizeof(st));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = addr;
+	event->mmap2.len   = usize ? ALIGN_8(csize) + usize : csize;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	/*
+	 * mark dso as use to generate buildid in the header
+	 */
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	char *filename;
+	size_t size;
+	struct stat st;
+	int usize;
+	u16 idr_size;
+	int ret;
+	pid_t pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	pid = jr->move.pid;
+	tid =  jr->move.tid;
+	usize = jd->unwinding_mapped_size;
+	idr_size = jd->machine->id_hdr_size;
+
+	/*
+	 * +16 to account for sample_id_all (hack)
+	 */
+	event = calloc(1, sizeof(*event) + 16);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+	         jd->dir,
+	         pid,
+		 jr->move.code_index);
+
+	size++; /* for \0 */
+
+	if (stat(filename, &st))
+		memset(&st, 0, sizeof(st));
+
+	size = PERF_ALIGN(size, sizeof(u64));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = jr->move.new_code_addr;
+	event->mmap2.len   = usize ? ALIGN_8(jr->move.code_size) + usize
+				   : jr->move.code_size;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = jr->move.new_code_addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	void *data;
+	size_t sz;
+
+	if (!(jd && jr))
+		return -1;
+
+	sz  = jr->prefix.total_size - sizeof(jr->info);
+	data = malloc(sz);
+	if (!data)
+		return -1;
+
+	memcpy(data, &jr->info.entries, sz);
+
+	jd->debug_data       = data;
+
+	/*
+	 * we must use nr_entry instead of size here because
+	 * we cannot distinguish actual entry from padding otherwise
+	 */
+	jd->nr_debug_entries = jr->info.nr_entry;
+
+	return 0;
+}
+
+static int
+jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	void *unwinding_data;
+	uint32_t unwinding_data_size;
+
+	if (!(jd && jr))
+		return -1;
+
+	unwinding_data_size  = jr->prefix.total_size - sizeof(jr->unwinding);
+	unwinding_data = malloc(unwinding_data_size);
+	if (!unwinding_data)
+		return -1;
+
+	memcpy(unwinding_data, &jr->unwinding.unwinding_data,
+	       unwinding_data_size);
+
+	jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size;
+	jd->unwinding_size = jr->unwinding.unwinding_size;
+	jd->unwinding_mapped_size = jr->unwinding.mapped_size;
+	jd->unwinding_data = unwinding_data;
+
+	return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+	union jr_entry *jr;
+	int ret = 0;
+
+	while ((jr = jit_get_next_entry(jd))) {
+		switch(jr->prefix.id) {
+		case JIT_CODE_LOAD:
+			ret = jit_repipe_code_load(jd, jr);
+			break;
+		case JIT_CODE_MOVE:
+			ret = jit_repipe_code_move(jd, jr);
+			break;
+		case JIT_CODE_DEBUG_INFO:
+			ret = jit_repipe_debug_info(jd, jr);
+			break;
+		case JIT_CODE_UNWINDING_INFO:
+			ret = jit_repipe_unwinding_info(jd, jr);
+			break;
+		default:
+			ret = 0;
+			continue;
+		}
+	}
+	return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+	int ret;
+
+	if (verbose > 0)
+		fprintf(stderr, "injecting: %s\n", path);
+
+	ret = jit_open(jd, path);
+	if (ret)
+		return -1;
+
+	ret = jit_process_dump(jd);
+
+	jit_close(jd);
+
+	if (verbose > 0)
+		fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+	return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid)
+ {
+	char *p;
+	char *end = NULL;
+	pid_t pid2;
+
+	if (verbose > 2)
+		fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+	/*
+	 * get file name
+	 */
+	p = strrchr(mmap_name, '/');
+	if (!p)
+		return -1;
+
+	/*
+	 * match prefix
+	 */
+	if (strncmp(p, "/jit-", 5))
+		return -1;
+
+	/*
+	 * skip prefix
+	 */
+	p += 5;
+
+	/*
+	 * must be followed by a pid
+	 */
+	if (!isdigit(*p))
+		return -1;
+
+	pid2 = (int)strtol(p, &end, 10);
+	if (!end)
+		return -1;
+
+	/*
+	 * pid does not match mmap pid
+	 * pid==0 in system-wide mode (synthesized)
+	 */
+	if (pid && pid2 != pid)
+		return -1;
+	/*
+	 * validate suffix
+	 */
+	if (strcmp(end, ".dump"))
+		return -1;
+
+	if (verbose > 0)
+		fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+	return 0;
+}
+
+int
+jit_process(struct perf_session *session,
+	    struct perf_data *output,
+	    struct machine *machine,
+	    char *filename,
+	    pid_t pid,
+	    u64 *nbytes)
+{
+	struct perf_evsel *first;
+	struct jit_buf_desc jd;
+	int ret;
+
+	/*
+	 * first, detect marker mmap (i.e., the jitdump mmap)
+	 */
+	if (jit_detect(filename, pid))
+		return 0;
+
+	memset(&jd, 0, sizeof(jd));
+
+	jd.session = session;
+	jd.output  = output;
+	jd.machine = machine;
+
+	/*
+	 * track sample_type to compute id_all layout
+	 * perf sets the same sample type to all events as of now
+	 */
+	first = perf_evlist__first(session->evlist);
+	jd.sample_type = first->attr.sample_type;
+
+	*nbytes = 0;
+
+	ret = jit_inject(&jd, filename);
+	if (!ret) {
+		*nbytes = jd.bytes_written;
+		ret = 1;
+	}
+
+	return ret;
+}
diff --git a/util/jitdump.h b/util/jitdump.h
new file mode 100644
index 0000000..c6b9b67
--- /dev/null
+++ b/util/jitdump.h
@@ -0,0 +1,139 @@
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC		0x4A695444
+#define JITHEADER_MAGIC_SW	0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+#define ALIGN_8(x) (((x) + 7) & (~7))
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+	JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
+	JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP	(1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+				(~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+	uint32_t magic;		/* characters "jItD" */
+	uint32_t version;	/* header version */
+	uint32_t total_size;	/* total size of header */
+	uint32_t elf_mach;	/* elf mach target */
+	uint32_t pad1;		/* reserved */
+	uint32_t pid;		/* JIT process id */
+	uint64_t timestamp;	/* timestamp */
+	uint64_t flags;		/* flags */
+};
+
+enum jit_record_type {
+	JIT_CODE_LOAD		= 0,
+        JIT_CODE_MOVE           = 1,
+	JIT_CODE_DEBUG_INFO	= 2,
+	JIT_CODE_CLOSE		= 3,
+	JIT_CODE_UNWINDING_INFO	= 4,
+
+	JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+	uint32_t id;
+	uint32_t total_size;
+	uint64_t timestamp;
+};
+
+struct jr_code_load {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct jr_code_close {
+	struct jr_prefix p;
+};
+
+struct jr_code_move {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t old_code_addr;
+	uint64_t new_code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct debug_entry {
+	uint64_t addr;
+	int lineno;	    /* source line number starting at 1 */
+	int discrim;	    /* column discriminator, 0 is default */
+	const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+	struct jr_prefix p;
+
+	uint64_t code_addr;
+	uint64_t nr_entry;
+	struct debug_entry entries[0];
+};
+
+struct jr_code_unwinding_info {
+	struct jr_prefix p;
+
+	uint64_t unwinding_size;
+	uint64_t eh_frame_hdr_size;
+	uint64_t mapped_size;
+	const char unwinding_data[0];
+};
+
+union jr_entry {
+        struct jr_code_debug_info info;
+        struct jr_code_close close;
+        struct jr_code_load load;
+        struct jr_code_move move;
+        struct jr_prefix prefix;
+        struct jr_code_unwinding_info unwinding;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	size_t l = strlen(ent->name) + 1;
+	return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	return a;
+}
+
+#endif /* !JITDUMP_H */
diff --git a/util/kvm-stat.h b/util/kvm-stat.h
new file mode 100644
index 0000000..7b1f065
--- /dev/null
+++ b/util/kvm-stat.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_KVM_STAT_H
+#define __PERF_KVM_STAT_H
+
+#include "../perf.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "session.h"
+#include "tool.h"
+#include "stat.h"
+
+struct event_key {
+	#define INVALID_KEY     (~0ULL)
+	u64 key;
+	int info;
+	struct exit_reasons_table *exit_reasons;
+};
+
+struct kvm_event_stats {
+	u64 time;
+	struct stats stats;
+};
+
+struct kvm_event {
+	struct list_head hash_entry;
+	struct rb_node rb;
+
+	struct event_key key;
+
+	struct kvm_event_stats total;
+
+	#define DEFAULT_VCPU_NUM 8
+	int max_vcpu;
+	struct kvm_event_stats *vcpu;
+};
+
+typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
+
+struct kvm_event_key {
+	const char *name;
+	key_cmp_fun key;
+};
+
+struct perf_kvm_stat;
+
+struct child_event_ops {
+	void (*get_key)(struct perf_evsel *evsel,
+			struct perf_sample *sample,
+			struct event_key *key);
+	const char *name;
+};
+
+struct kvm_events_ops {
+	bool (*is_begin_event)(struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       struct event_key *key);
+	bool (*is_end_event)(struct perf_evsel *evsel,
+			     struct perf_sample *sample, struct event_key *key);
+	struct child_event_ops *child_ops;
+	void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
+			   char *decode);
+	const char *name;
+};
+
+struct exit_reasons_table {
+	unsigned long exit_code;
+	const char *reason;
+};
+
+#define EVENTS_BITS		12
+#define EVENTS_CACHE_SIZE	(1UL << EVENTS_BITS)
+
+struct perf_kvm_stat {
+	struct perf_tool    tool;
+	struct record_opts  opts;
+	struct perf_evlist  *evlist;
+	struct perf_session *session;
+
+	const char *file_name;
+	const char *report_event;
+	const char *sort_key;
+	int trace_vcpu;
+
+	struct exit_reasons_table *exit_reasons;
+	const char *exit_reasons_isa;
+
+	struct kvm_events_ops *events_ops;
+	key_cmp_fun compare;
+	struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
+	u64 total_time;
+	u64 total_count;
+	u64 lost_events;
+	u64 duration;
+
+	struct intlist *pid_list;
+
+	struct rb_root result;
+
+	int timerfd;
+	unsigned int display_time;
+	bool live;
+	bool force;
+};
+
+struct kvm_reg_events_ops {
+	const char *name;
+	struct kvm_events_ops *ops;
+};
+
+void exit_event_get_key(struct perf_evsel *evsel,
+			struct perf_sample *sample,
+			struct event_key *key);
+bool exit_event_begin(struct perf_evsel *evsel,
+		      struct perf_sample *sample,
+		      struct event_key *key);
+bool exit_event_end(struct perf_evsel *evsel,
+		    struct perf_sample *sample,
+		    struct event_key *key);
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+			   struct event_key *key,
+			   char *decode);
+
+bool kvm_exit_event(struct perf_evsel *evsel);
+bool kvm_entry_event(struct perf_evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
+
+#define define_exit_reasons_table(name, symbols)	\
+	static struct exit_reasons_table name[] = {	\
+		symbols, { -1, NULL }			\
+	}
+
+/*
+ * arch specific callbacks and data structures
+ */
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
+
+extern const char *kvm_events_tp[];
+extern struct kvm_reg_events_ops kvm_reg_events_ops[];
+extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
+
+#endif /* __PERF_KVM_STAT_H */
diff --git a/util/levenshtein.c b/util/levenshtein.c
new file mode 100644
index 0000000..a217ecf
--- /dev/null
+++ b/util/levenshtein.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "levenshtein.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings.  To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substition, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+		int w, int s, int a, int d)
+{
+	int len1 = strlen(string1), len2 = strlen(string2);
+	int *row0 = malloc(sizeof(int) * (len2 + 1));
+	int *row1 = malloc(sizeof(int) * (len2 + 1));
+	int *row2 = malloc(sizeof(int) * (len2 + 1));
+	int i, j;
+
+	for (j = 0; j <= len2; j++)
+		row1[j] = j * a;
+	for (i = 0; i < len1; i++) {
+		int *dummy;
+
+		row2[0] = (i + 1) * d;
+		for (j = 0; j < len2; j++) {
+			/* substitution */
+			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+			/* swap */
+			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+					string1[i] == string2[j - 1] &&
+					row2[j + 1] > row0[j - 1] + w)
+				row2[j + 1] = row0[j - 1] + w;
+			/* deletion */
+			if (row2[j + 1] > row1[j + 1] + d)
+				row2[j + 1] = row1[j + 1] + d;
+			/* insertion */
+			if (row2[j + 1] > row2[j] + a)
+				row2[j + 1] = row2[j] + a;
+		}
+
+		dummy = row0;
+		row0 = row1;
+		row1 = row2;
+		row2 = dummy;
+	}
+
+	i = row1[len2];
+	free(row0);
+	free(row1);
+	free(row2);
+
+	return i;
+}
diff --git a/util/levenshtein.h b/util/levenshtein.h
new file mode 100644
index 0000000..34ca173
--- /dev/null
+++ b/util/levenshtein.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LEVENSHTEIN_H
+#define __PERF_LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+	int swap_penalty, int substition_penalty,
+	int insertion_penalty, int deletion_penalty);
+
+#endif /* __PERF_LEVENSHTEIN_H */
diff --git a/util/libunwind/arm64.c b/util/libunwind/arm64.c
new file mode 100644
index 0000000..66756e6
--- /dev/null
+++ b/util/libunwind/arm64.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-aarch64.h"
+#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/util/libunwind/x86_32.c b/util/libunwind/x86_32.c
new file mode 100644
index 0000000..c5e5681
--- /dev/null
+++ b/util/libunwind/x86_32.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/util/llvm-utils.c b/util/llvm-utils.c
new file mode 100644
index 0000000..1cca0a2
--- /dev/null
+++ b/util/llvm-utils.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include "debug.h"
+#include "llvm-utils.h"
+#include "config.h"
+#include "util.h"
+#include <sys/wait.h>
+
+#define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
+		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
+		"-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "	\
+		"$CLANG_OPTIONS $KERNEL_INC_OPTIONS "		\
+		"-Wno-unused-value -Wno-pointer-sign "		\
+		"-working-directory $WORKING_DIR "		\
+		"-c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
+
+struct llvm_param llvm_param = {
+	.clang_path = "clang",
+	.clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
+	.clang_opt = NULL,
+	.kbuild_dir = NULL,
+	.kbuild_opts = NULL,
+	.user_set_param = false,
+};
+
+int perf_llvm_config(const char *var, const char *value)
+{
+	if (!strstarts(var, "llvm."))
+		return 0;
+	var += sizeof("llvm.") - 1;
+
+	if (!strcmp(var, "clang-path"))
+		llvm_param.clang_path = strdup(value);
+	else if (!strcmp(var, "clang-bpf-cmd-template"))
+		llvm_param.clang_bpf_cmd_template = strdup(value);
+	else if (!strcmp(var, "clang-opt"))
+		llvm_param.clang_opt = strdup(value);
+	else if (!strcmp(var, "kbuild-dir"))
+		llvm_param.kbuild_dir = strdup(value);
+	else if (!strcmp(var, "kbuild-opts"))
+		llvm_param.kbuild_opts = strdup(value);
+	else if (!strcmp(var, "dump-obj"))
+		llvm_param.dump_obj = !!perf_config_bool(var, value);
+	else {
+		pr_debug("Invalid LLVM config option: %s\n", value);
+		return -1;
+	}
+	llvm_param.user_set_param = true;
+	return 0;
+}
+
+static int
+search_program(const char *def, const char *name,
+	       char *output)
+{
+	char *env, *path, *tmp = NULL;
+	char buf[PATH_MAX];
+	int ret;
+
+	output[0] = '\0';
+	if (def && def[0] != '\0') {
+		if (def[0] == '/') {
+			if (access(def, F_OK) == 0) {
+				strlcpy(output, def, PATH_MAX);
+				return 0;
+			}
+		} else if (def[0] != '\0')
+			name = def;
+	}
+
+	env = getenv("PATH");
+	if (!env)
+		return -1;
+	env = strdup(env);
+	if (!env)
+		return -1;
+
+	ret = -ENOENT;
+	path = strtok_r(env, ":",  &tmp);
+	while (path) {
+		scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+		if (access(buf, F_OK) == 0) {
+			strlcpy(output, buf, PATH_MAX);
+			ret = 0;
+			break;
+		}
+		path = strtok_r(NULL, ":", &tmp);
+	}
+
+	free(env);
+	return ret;
+}
+
+#define READ_SIZE	4096
+static int
+read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
+{
+	int err = 0;
+	void *buf = NULL;
+	FILE *file = NULL;
+	size_t read_sz = 0, buf_sz = 0;
+	char serr[STRERR_BUFSIZE];
+
+	file = popen(cmd, "r");
+	if (!file) {
+		pr_err("ERROR: unable to popen cmd: %s\n",
+		       str_error_r(errno, serr, sizeof(serr)));
+		return -EINVAL;
+	}
+
+	while (!feof(file) && !ferror(file)) {
+		/*
+		 * Make buf_sz always have obe byte extra space so we
+		 * can put '\0' there.
+		 */
+		if (buf_sz - read_sz < READ_SIZE + 1) {
+			void *new_buf;
+
+			buf_sz = read_sz + READ_SIZE + 1;
+			new_buf = realloc(buf, buf_sz);
+
+			if (!new_buf) {
+				pr_err("ERROR: failed to realloc memory\n");
+				err = -ENOMEM;
+				goto errout;
+			}
+
+			buf = new_buf;
+		}
+		read_sz += fread(buf + read_sz, 1, READ_SIZE, file);
+	}
+
+	if (buf_sz - read_sz < 1) {
+		pr_err("ERROR: internal error\n");
+		err = -EINVAL;
+		goto errout;
+	}
+
+	if (ferror(file)) {
+		pr_err("ERROR: error occurred when reading from pipe: %s\n",
+		       str_error_r(errno, serr, sizeof(serr)));
+		err = -EIO;
+		goto errout;
+	}
+
+	err = WEXITSTATUS(pclose(file));
+	file = NULL;
+	if (err) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	/*
+	 * If buf is string, give it terminal '\0' to make our life
+	 * easier. If buf is not string, that '\0' is out of space
+	 * indicated by read_sz so caller won't even notice it.
+	 */
+	((char *)buf)[read_sz] = '\0';
+
+	if (!p_buf)
+		free(buf);
+	else
+		*p_buf = buf;
+
+	if (p_read_sz)
+		*p_read_sz = read_sz;
+	return 0;
+
+errout:
+	if (file)
+		pclose(file);
+	free(buf);
+	if (p_buf)
+		*p_buf = NULL;
+	if (p_read_sz)
+		*p_read_sz = 0;
+	return err;
+}
+
+static inline void
+force_set_env(const char *var, const char *value)
+{
+	if (value) {
+		setenv(var, value, 1);
+		pr_debug("set env: %s=%s\n", var, value);
+	} else {
+		unsetenv(var);
+		pr_debug("unset env: %s\n", var);
+	}
+}
+
+static void
+version_notice(void)
+{
+	pr_err(
+"     \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n"
+"     \tYou may want to try git trunk:\n"
+"     \t\tgit clone http://llvm.org/git/llvm.git\n"
+"     \t\t     and\n"
+"     \t\tgit clone http://llvm.org/git/clang.git\n\n"
+"     \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
+"     \tdebian/ubuntu:\n"
+"     \t\thttp://llvm.org/apt\n\n"
+"     \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
+"     \toption in [llvm] section of ~/.perfconfig to:\n\n"
+"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n"
+"     \t     -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
+"     \t     -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
+"     \t(Replace /path/to/llc with path to your llc)\n\n"
+);
+}
+
+static int detect_kbuild_dir(char **kbuild_dir)
+{
+	const char *test_dir = llvm_param.kbuild_dir;
+	const char *prefix_dir = "";
+	const char *suffix_dir = "";
+
+	char *autoconf_path;
+
+	int err;
+
+	if (!test_dir) {
+		/* _UTSNAME_LENGTH is 65 */
+		char release[128];
+
+		err = fetch_kernel_version(NULL, release,
+					   sizeof(release));
+		if (err)
+			return -EINVAL;
+
+		test_dir = release;
+		prefix_dir = "/lib/modules/";
+		suffix_dir = "/build";
+	}
+
+	err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h",
+		       prefix_dir, test_dir, suffix_dir);
+	if (err < 0)
+		return -ENOMEM;
+
+	if (access(autoconf_path, R_OK) == 0) {
+		free(autoconf_path);
+
+		err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir,
+			       suffix_dir);
+		if (err < 0)
+			return -ENOMEM;
+		return 0;
+	}
+	free(autoconf_path);
+	return -ENOENT;
+}
+
+static const char *kinc_fetch_script =
+"#!/usr/bin/env sh\n"
+"if ! test -d \"$KBUILD_DIR\"\n"
+"then\n"
+"	exit -1\n"
+"fi\n"
+"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
+"then\n"
+"	exit -1\n"
+"fi\n"
+"TMPDIR=`mktemp -d`\n"
+"if test -z \"$TMPDIR\"\n"
+"then\n"
+"    exit -1\n"
+"fi\n"
+"cat << EOF > $TMPDIR/Makefile\n"
+"obj-y := dummy.o\n"
+"\\$(obj)/%.o: \\$(src)/%.c\n"
+"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
+"EOF\n"
+"touch $TMPDIR/dummy.c\n"
+"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
+"RET=$?\n"
+"rm -rf $TMPDIR\n"
+"exit $RET\n";
+
+void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
+{
+	static char *saved_kbuild_dir;
+	static char *saved_kbuild_include_opts;
+	int err;
+
+	if (!kbuild_dir || !kbuild_include_opts)
+		return;
+
+	*kbuild_dir = NULL;
+	*kbuild_include_opts = NULL;
+
+	if (saved_kbuild_dir && saved_kbuild_include_opts &&
+	    !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) {
+		*kbuild_dir = strdup(saved_kbuild_dir);
+		*kbuild_include_opts = strdup(saved_kbuild_include_opts);
+
+		if (*kbuild_dir && *kbuild_include_opts)
+			return;
+
+		zfree(kbuild_dir);
+		zfree(kbuild_include_opts);
+		/*
+		 * Don't fall through: it may breaks saved_kbuild_dir and
+		 * saved_kbuild_include_opts if detect them again when
+		 * memory is low.
+		 */
+		return;
+	}
+
+	if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) {
+		pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n");
+		pr_debug("Skip kbuild options detection.\n");
+		goto errout;
+	}
+
+	err = detect_kbuild_dir(kbuild_dir);
+	if (err) {
+		pr_warning(
+"WARNING:\tunable to get correct kernel building directory.\n"
+"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n"
+"     \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n"
+"     \tdetection.\n\n");
+		goto errout;
+	}
+
+	pr_debug("Kernel build dir is set to %s\n", *kbuild_dir);
+	force_set_env("KBUILD_DIR", *kbuild_dir);
+	force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts);
+	err = read_from_pipe(kinc_fetch_script,
+			     (void **)kbuild_include_opts,
+			     NULL);
+	if (err) {
+		pr_warning(
+"WARNING:\tunable to get kernel include directories from '%s'\n"
+"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n"
+"     \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n"
+"     \toption in [llvm] to \"\" to suppress this detection.\n\n",
+			*kbuild_dir);
+
+		free(*kbuild_dir);
+		*kbuild_dir = NULL;
+		goto errout;
+	}
+
+	pr_debug("include option is set to %s\n", *kbuild_include_opts);
+
+	saved_kbuild_dir = strdup(*kbuild_dir);
+	saved_kbuild_include_opts = strdup(*kbuild_include_opts);
+
+	if (!saved_kbuild_dir || !saved_kbuild_include_opts) {
+		zfree(&saved_kbuild_dir);
+		zfree(&saved_kbuild_include_opts);
+	}
+	return;
+errout:
+	saved_kbuild_dir = ERR_PTR(-EINVAL);
+	saved_kbuild_include_opts = ERR_PTR(-EINVAL);
+}
+
+int llvm__get_nr_cpus(void)
+{
+	static int nr_cpus_avail = 0;
+	char serr[STRERR_BUFSIZE];
+
+	if (nr_cpus_avail > 0)
+		return nr_cpus_avail;
+
+	nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+	if (nr_cpus_avail <= 0) {
+		pr_err(
+"WARNING:\tunable to get available CPUs in this system: %s\n"
+"        \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr)));
+		nr_cpus_avail = 128;
+	}
+	return nr_cpus_avail;
+}
+
+void llvm__dump_obj(const char *path, void *obj_buf, size_t size)
+{
+	char *obj_path = strdup(path);
+	FILE *fp;
+	char *p;
+
+	if (!obj_path) {
+		pr_warning("WARNING: Not enough memory, skip object dumping\n");
+		return;
+	}
+
+	p = strrchr(obj_path, '.');
+	if (!p || (strcmp(p, ".c") != 0)) {
+		pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
+			   obj_path);
+		goto out;
+	}
+
+	p[1] = 'o';
+	fp = fopen(obj_path, "wb");
+	if (!fp) {
+		pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+		goto out;
+	}
+
+	pr_info("LLVM: dumping %s\n", obj_path);
+	if (fwrite(obj_buf, size, 1, fp) != 1)
+		pr_warning("WARNING: failed to write to file '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+	fclose(fp);
+out:
+	free(obj_path);
+}
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf,
+		      size_t *p_obj_buf_sz)
+{
+	size_t obj_buf_sz;
+	void *obj_buf = NULL;
+	int err, nr_cpus_avail;
+	unsigned int kernel_version;
+	char linux_version_code_str[64];
+	const char *clang_opt = llvm_param.clang_opt;
+	char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+	char serr[STRERR_BUFSIZE];
+	char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
+	const char *template = llvm_param.clang_bpf_cmd_template;
+	char *command_echo, *command_out;
+
+	if (path[0] != '-' && realpath(path, abspath) == NULL) {
+		err = errno;
+		pr_err("ERROR: problems with path %s: %s\n",
+		       path, str_error_r(err, serr, sizeof(serr)));
+		return -err;
+	}
+
+	if (!template)
+		template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
+
+	err = search_program(llvm_param.clang_path,
+			     "clang", clang_path);
+	if (err) {
+		pr_err(
+"ERROR:\tunable to find clang.\n"
+"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+"     \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n");
+		version_notice();
+		return -ENOENT;
+	}
+
+	/*
+	 * This is an optional work. Even it fail we can continue our
+	 * work. Needn't to check error return.
+	 */
+	llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
+
+	nr_cpus_avail = llvm__get_nr_cpus();
+	snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
+		 nr_cpus_avail);
+
+	if (fetch_kernel_version(&kernel_version, NULL, 0))
+		kernel_version = 0;
+
+	snprintf(linux_version_code_str, sizeof(linux_version_code_str),
+		 "0x%x", kernel_version);
+
+	force_set_env("NR_CPUS", nr_cpus_avail_str);
+	force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
+	force_set_env("CLANG_EXEC", clang_path);
+	force_set_env("CLANG_OPTIONS", clang_opt);
+	force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+	force_set_env("WORKING_DIR", kbuild_dir ? : ".");
+
+	/*
+	 * Since we may reset clang's working dir, path of source file
+	 * should be transferred into absolute path, except we want
+	 * stdin to be source file (testing).
+	 */
+	force_set_env("CLANG_SOURCE",
+		      (path[0] == '-') ? path : abspath);
+
+	pr_debug("llvm compiling command template: %s\n", template);
+
+	if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0)
+		goto errout;
+
+	err = read_from_pipe(command_echo, (void **) &command_out, NULL);
+	if (err)
+		goto errout;
+
+	pr_debug("llvm compiling command : %s\n", command_out);
+
+	err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
+	if (err) {
+		pr_err("ERROR:\tunable to compile %s\n", path);
+		pr_err("Hint:\tCheck error message shown above.\n");
+		pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
+		pr_err("     \t\tclang -target bpf -O2 -c %s\n", path);
+		pr_err("     \twith proper -I and -D options.\n");
+		goto errout;
+	}
+
+	free(command_echo);
+	free(command_out);
+	free(kbuild_dir);
+	free(kbuild_include_opts);
+
+	if (!p_obj_buf)
+		free(obj_buf);
+	else
+		*p_obj_buf = obj_buf;
+
+	if (p_obj_buf_sz)
+		*p_obj_buf_sz = obj_buf_sz;
+	return 0;
+errout:
+	free(command_echo);
+	free(kbuild_dir);
+	free(kbuild_include_opts);
+	free(obj_buf);
+	if (p_obj_buf)
+		*p_obj_buf = NULL;
+	if (p_obj_buf_sz)
+		*p_obj_buf_sz = 0;
+	return err;
+}
+
+int llvm__search_clang(void)
+{
+	char clang_path[PATH_MAX];
+
+	return search_program(llvm_param.clang_path, "clang", clang_path);
+}
diff --git a/util/llvm-utils.h b/util/llvm-utils.h
new file mode 100644
index 0000000..d3ad8de
--- /dev/null
+++ b/util/llvm-utils.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __LLVM_UTILS_H
+#define __LLVM_UTILS_H
+
+#include "debug.h"
+
+struct llvm_param {
+	/* Path of clang executable */
+	const char *clang_path;
+	/*
+	 * Template of clang bpf compiling. 5 env variables
+	 * can be used:
+	 *   $CLANG_EXEC:		Path to clang.
+	 *   $CLANG_OPTIONS:		Extra options to clang.
+	 *   $KERNEL_INC_OPTIONS:	Kernel include directories.
+	 *   $WORKING_DIR:		Kernel source directory.
+	 *   $CLANG_SOURCE:		Source file to be compiled.
+	 */
+	const char *clang_bpf_cmd_template;
+	/* Will be filled in $CLANG_OPTIONS */
+	const char *clang_opt;
+	/* Where to find kbuild system */
+	const char *kbuild_dir;
+	/*
+	 * Arguments passed to make, like 'ARCH=arm' if doing cross
+	 * compiling. Should not be used for dynamic compiling.
+	 */
+	const char *kbuild_opts;
+	/*
+	 * Default is false. If set to true, write compiling result
+	 * to object file.
+	 */
+	bool dump_obj;
+	/*
+	 * Default is false. If one of the above fields is set by user
+	 * explicitly then user_set_llvm is set to true. This is used
+	 * for perf test. If user doesn't set anything in .perfconfig
+	 * and clang is not found, don't trigger llvm test.
+	 */
+	bool user_set_param;
+};
+
+extern struct llvm_param llvm_param;
+int perf_llvm_config(const char *var, const char *value);
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
+
+/* This function is for test__llvm() use only */
+int llvm__search_clang(void);
+
+/* Following functions are reused by builtin clang support */
+void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts);
+int llvm__get_nr_cpus(void);
+
+void llvm__dump_obj(const char *path, void *obj_buf, size_t size);
+#endif
diff --git a/util/lzma.c b/util/lzma.c
new file mode 100644
index 0000000..07498ea
--- /dev/null
+++ b/util/lzma.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <lzma.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include "compress.h"
+#include "util.h"
+#include "debug.h"
+
+#define BUFSIZE 8192
+
+static const char *lzma_strerror(lzma_ret ret)
+{
+	switch ((int) ret) {
+	case LZMA_MEM_ERROR:
+		return "Memory allocation failed";
+	case LZMA_OPTIONS_ERROR:
+		return "Unsupported decompressor flags";
+	case LZMA_FORMAT_ERROR:
+		return "The input is not in the .xz format";
+	case LZMA_DATA_ERROR:
+		return "Compressed file is corrupt";
+	case LZMA_BUF_ERROR:
+		return "Compressed file is truncated or otherwise corrupt";
+	default:
+		return "Unknown error, possibly a bug";
+	}
+}
+
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+	lzma_action action = LZMA_RUN;
+	lzma_stream strm   = LZMA_STREAM_INIT;
+	lzma_ret ret;
+	int err = -1;
+
+	u8 buf_in[BUFSIZE];
+	u8 buf_out[BUFSIZE];
+	FILE *infile;
+
+	infile = fopen(input, "rb");
+	if (!infile) {
+		pr_err("lzma: fopen failed on %s: '%s'\n",
+		       input, strerror(errno));
+		return -1;
+	}
+
+	ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+	if (ret != LZMA_OK) {
+		pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
+			lzma_strerror(ret), ret);
+		goto err_fclose;
+	}
+
+	strm.next_in   = NULL;
+	strm.avail_in  = 0;
+	strm.next_out  = buf_out;
+	strm.avail_out = sizeof(buf_out);
+
+	while (1) {
+		if (strm.avail_in == 0 && !feof(infile)) {
+			strm.next_in  = buf_in;
+			strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile);
+
+			if (ferror(infile)) {
+				pr_err("lzma: read error: %s\n", strerror(errno));
+				goto err_fclose;
+			}
+
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		ret = lzma_code(&strm, action);
+
+		if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
+			ssize_t write_size = sizeof(buf_out) - strm.avail_out;
+
+			if (writen(output_fd, buf_out, write_size) != write_size) {
+				pr_err("lzma: write error: %s\n", strerror(errno));
+				goto err_fclose;
+			}
+
+			strm.next_out  = buf_out;
+			strm.avail_out = sizeof(buf_out);
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END)
+				break;
+
+			pr_err("lzma: failed %s\n", lzma_strerror(ret));
+			goto err_fclose;
+		}
+	}
+
+	err = 0;
+err_fclose:
+	fclose(infile);
+	return err;
+}
diff --git a/util/machine.c b/util/machine.c
new file mode 100644
index 0000000..32d5049
--- /dev/null
+++ b/util/machine.c
@@ -0,0 +1,2384 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include "callchain.h"
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "hist.h"
+#include "machine.h"
+#include "map.h"
+#include "sort.h"
+#include "strlist.h"
+#include "thread.h"
+#include "vdso.h"
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "unwind.h"
+#include "linux/hash.h"
+#include "asm/bug.h"
+
+#include "sane_ctype.h"
+#include <symbol/kallsyms.h>
+
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
+
+static void dsos__init(struct dsos *dsos)
+{
+	INIT_LIST_HEAD(&dsos->head);
+	dsos->root = RB_ROOT;
+	init_rwsem(&dsos->lock);
+}
+
+static void machine__threads_init(struct machine *machine)
+{
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		threads->entries = RB_ROOT;
+		init_rwsem(&threads->lock);
+		threads->nr = 0;
+		INIT_LIST_HEAD(&threads->dead);
+		threads->last_match = NULL;
+	}
+}
+
+static int machine__set_mmap_name(struct machine *machine)
+{
+	if (machine__is_host(machine))
+		machine->mmap_name = strdup("[kernel.kallsyms]");
+	else if (machine__is_default_guest(machine))
+		machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+	else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+			  machine->pid) < 0)
+		machine->mmap_name = NULL;
+
+	return machine->mmap_name ? 0 : -ENOMEM;
+}
+
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
+{
+	int err = -ENOMEM;
+
+	memset(machine, 0, sizeof(*machine));
+	map_groups__init(&machine->kmaps, machine);
+	RB_CLEAR_NODE(&machine->rb_node);
+	dsos__init(&machine->dsos);
+
+	machine__threads_init(machine);
+
+	machine->vdso_info = NULL;
+	machine->env = NULL;
+
+	machine->pid = pid;
+
+	machine->id_hdr_size = 0;
+	machine->kptr_restrict_warned = false;
+	machine->comm_exec = false;
+	machine->kernel_start = 0;
+
+	memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+
+	machine->root_dir = strdup(root_dir);
+	if (machine->root_dir == NULL)
+		return -ENOMEM;
+
+	if (machine__set_mmap_name(machine))
+		goto out;
+
+	if (pid != HOST_KERNEL_ID) {
+		struct thread *thread = machine__findnew_thread(machine, -1,
+								pid);
+		char comm[64];
+
+		if (thread == NULL)
+			goto out;
+
+		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
+		thread__set_comm(thread, comm, 0);
+		thread__put(thread);
+	}
+
+	machine->current_tid = NULL;
+	err = 0;
+
+out:
+	if (err) {
+		zfree(&machine->root_dir);
+		zfree(&machine->mmap_name);
+	}
+	return 0;
+}
+
+struct machine *machine__new_host(void)
+{
+	struct machine *machine = malloc(sizeof(*machine));
+
+	if (machine != NULL) {
+		machine__init(machine, "", HOST_KERNEL_ID);
+
+		if (machine__create_kernel_maps(machine) < 0)
+			goto out_delete;
+	}
+
+	return machine;
+out_delete:
+	free(machine);
+	return NULL;
+}
+
+struct machine *machine__new_kallsyms(void)
+{
+	struct machine *machine = machine__new_host();
+	/*
+	 * FIXME:
+	 * 1) MAP__FUNCTION will go away when we stop loading separate maps for
+	 *    functions and data objects.
+	 * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely
+	 *    ask for not using the kcore parsing code, once this one is fixed
+	 *    to create a map per module.
+	 */
+	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
+		machine__delete(machine);
+		machine = NULL;
+	}
+
+	return machine;
+}
+
+static void dsos__purge(struct dsos *dsos)
+{
+	struct dso *pos, *n;
+
+	down_write(&dsos->lock);
+
+	list_for_each_entry_safe(pos, n, &dsos->head, node) {
+		RB_CLEAR_NODE(&pos->rb_node);
+		pos->root = NULL;
+		list_del_init(&pos->node);
+		dso__put(pos);
+	}
+
+	up_write(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+	dsos__purge(dsos);
+	exit_rwsem(&dsos->lock);
+}
+
+void machine__delete_threads(struct machine *machine)
+{
+	struct rb_node *nd;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		down_write(&threads->lock);
+		nd = rb_first(&threads->entries);
+		while (nd) {
+			struct thread *t = rb_entry(nd, struct thread, rb_node);
+
+			nd = rb_next(nd);
+			__machine__remove_thread(machine, t, false);
+		}
+		up_write(&threads->lock);
+	}
+}
+
+void machine__exit(struct machine *machine)
+{
+	int i;
+
+	if (machine == NULL)
+		return;
+
+	machine__destroy_kernel_maps(machine);
+	map_groups__exit(&machine->kmaps);
+	dsos__exit(&machine->dsos);
+	machine__exit_vdso(machine);
+	zfree(&machine->root_dir);
+	zfree(&machine->mmap_name);
+	zfree(&machine->current_tid);
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+		exit_rwsem(&threads->lock);
+	}
+}
+
+void machine__delete(struct machine *machine)
+{
+	if (machine) {
+		machine__exit(machine);
+		free(machine);
+	}
+}
+
+void machines__init(struct machines *machines)
+{
+	machine__init(&machines->host, "", HOST_KERNEL_ID);
+	machines->guests = RB_ROOT;
+}
+
+void machines__exit(struct machines *machines)
+{
+	machine__exit(&machines->host);
+	/* XXX exit guest */
+}
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+			      const char *root_dir)
+{
+	struct rb_node **p = &machines->guests.rb_node;
+	struct rb_node *parent = NULL;
+	struct machine *pos, *machine = malloc(sizeof(*machine));
+
+	if (machine == NULL)
+		return NULL;
+
+	if (machine__init(machine, root_dir, pid) != 0) {
+		free(machine);
+		return NULL;
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		pos = rb_entry(parent, struct machine, rb_node);
+		if (pid < pos->pid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&machine->rb_node, parent, p);
+	rb_insert_color(&machine->rb_node, &machines->guests);
+
+	return machine;
+}
+
+void machines__set_comm_exec(struct machines *machines, bool comm_exec)
+{
+	struct rb_node *nd;
+
+	machines->host.comm_exec = comm_exec;
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		machine->comm_exec = comm_exec;
+	}
+}
+
+struct machine *machines__find(struct machines *machines, pid_t pid)
+{
+	struct rb_node **p = &machines->guests.rb_node;
+	struct rb_node *parent = NULL;
+	struct machine *machine;
+	struct machine *default_machine = NULL;
+
+	if (pid == HOST_KERNEL_ID)
+		return &machines->host;
+
+	while (*p != NULL) {
+		parent = *p;
+		machine = rb_entry(parent, struct machine, rb_node);
+		if (pid < machine->pid)
+			p = &(*p)->rb_left;
+		else if (pid > machine->pid)
+			p = &(*p)->rb_right;
+		else
+			return machine;
+		if (!machine->pid)
+			default_machine = machine;
+	}
+
+	return default_machine;
+}
+
+struct machine *machines__findnew(struct machines *machines, pid_t pid)
+{
+	char path[PATH_MAX];
+	const char *root_dir = "";
+	struct machine *machine = machines__find(machines, pid);
+
+	if (machine && (machine->pid == pid))
+		goto out;
+
+	if ((pid != HOST_KERNEL_ID) &&
+	    (pid != DEFAULT_GUEST_KERNEL_ID) &&
+	    (symbol_conf.guestmount)) {
+		sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
+		if (access(path, R_OK)) {
+			static struct strlist *seen;
+
+			if (!seen)
+				seen = strlist__new(NULL, NULL);
+
+			if (!strlist__has_entry(seen, path)) {
+				pr_err("Can't access file %s\n", path);
+				strlist__add(seen, path);
+			}
+			machine = NULL;
+			goto out;
+		}
+		root_dir = path;
+	}
+
+	machine = machines__add(machines, pid, root_dir);
+out:
+	return machine;
+}
+
+void machines__process_guests(struct machines *machines,
+			      machine__process_t process, void *data)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		process(pos, data);
+	}
+}
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
+{
+	struct rb_node *node;
+	struct machine *machine;
+
+	machines->host.id_hdr_size = id_hdr_size;
+
+	for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
+		machine = rb_entry(node, struct machine, rb_node);
+		machine->id_hdr_size = id_hdr_size;
+	}
+
+	return;
+}
+
+static void machine__update_thread_pid(struct machine *machine,
+				       struct thread *th, pid_t pid)
+{
+	struct thread *leader;
+
+	if (pid == th->pid_ || pid == -1 || th->pid_ != -1)
+		return;
+
+	th->pid_ = pid;
+
+	if (th->pid_ == th->tid)
+		return;
+
+	leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
+	if (!leader)
+		goto out_err;
+
+	if (!leader->mg)
+		leader->mg = map_groups__new(machine);
+
+	if (!leader->mg)
+		goto out_err;
+
+	if (th->mg == leader->mg)
+		return;
+
+	if (th->mg) {
+		/*
+		 * Maps are created from MMAP events which provide the pid and
+		 * tid.  Consequently there never should be any maps on a thread
+		 * with an unknown pid.  Just print an error if there are.
+		 */
+		if (!map_groups__empty(th->mg))
+			pr_err("Discarding thread maps for %d:%d\n",
+			       th->pid_, th->tid);
+		map_groups__put(th->mg);
+	}
+
+	th->mg = map_groups__get(leader->mg);
+out_put:
+	thread__put(leader);
+	return;
+out_err:
+	pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+	goto out_put;
+}
+
+/*
+ * Caller must eventually drop thread->refcnt returned with a successful
+ * lookup/new thread inserted.
+ */
+static struct thread *____machine__findnew_thread(struct machine *machine,
+						  struct threads *threads,
+						  pid_t pid, pid_t tid,
+						  bool create)
+{
+	struct rb_node **p = &threads->entries.rb_node;
+	struct rb_node *parent = NULL;
+	struct thread *th;
+
+	/*
+	 * Front-end cache - TID lookups come in blocks,
+	 * so most of the time we dont have to look up
+	 * the full rbtree:
+	 */
+	th = threads->last_match;
+	if (th != NULL) {
+		if (th->tid == tid) {
+			machine__update_thread_pid(machine, th, pid);
+			return thread__get(th);
+		}
+
+		threads->last_match = NULL;
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		th = rb_entry(parent, struct thread, rb_node);
+
+		if (th->tid == tid) {
+			threads->last_match = th;
+			machine__update_thread_pid(machine, th, pid);
+			return thread__get(th);
+		}
+
+		if (tid < th->tid)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	if (!create)
+		return NULL;
+
+	th = thread__new(pid, tid);
+	if (th != NULL) {
+		rb_link_node(&th->rb_node, parent, p);
+		rb_insert_color(&th->rb_node, &threads->entries);
+
+		/*
+		 * We have to initialize map_groups separately
+		 * after rb tree is updated.
+		 *
+		 * The reason is that we call machine__findnew_thread
+		 * within thread__init_map_groups to find the thread
+		 * leader and that would screwed the rb tree.
+		 */
+		if (thread__init_map_groups(th, machine)) {
+			rb_erase_init(&th->rb_node, &threads->entries);
+			RB_CLEAR_NODE(&th->rb_node);
+			thread__put(th);
+			return NULL;
+		}
+		/*
+		 * It is now in the rbtree, get a ref
+		 */
+		thread__get(th);
+		threads->last_match = th;
+		++threads->nr;
+	}
+
+	return th;
+}
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+	return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
+}
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
+				       pid_t tid)
+{
+	struct threads *threads = machine__threads(machine, tid);
+	struct thread *th;
+
+	down_write(&threads->lock);
+	th = __machine__findnew_thread(machine, pid, tid);
+	up_write(&threads->lock);
+	return th;
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+				    pid_t tid)
+{
+	struct threads *threads = machine__threads(machine, tid);
+	struct thread *th;
+
+	down_read(&threads->lock);
+	th =  ____machine__findnew_thread(machine, threads, pid, tid, false);
+	up_read(&threads->lock);
+	return th;
+}
+
+struct comm *machine__thread_exec_comm(struct machine *machine,
+				       struct thread *thread)
+{
+	if (machine->comm_exec)
+		return thread__exec_comm(thread);
+	else
+		return thread__comm(thread);
+}
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->comm.pid,
+							event->comm.tid);
+	bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+	int err = 0;
+
+	if (exec)
+		machine->comm_exec = true;
+
+	if (dump_trace)
+		perf_event__fprintf_comm(event, stdout);
+
+	if (thread == NULL ||
+	    __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+		err = -1;
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+				      union perf_event *event,
+				      struct perf_sample *sample __maybe_unused)
+{
+	struct thread *thread = machine__findnew_thread(machine,
+							event->namespaces.pid,
+							event->namespaces.tid);
+	int err = 0;
+
+	WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+		  "\nWARNING: kernel seems to support more namespaces than perf"
+		  " tool.\nTry updating the perf tool..\n\n");
+
+	WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+		  "\nWARNING: perf tool seems to support more namespaces than"
+		  " the kernel.\nTry updating the kernel..\n\n");
+
+	if (dump_trace)
+		perf_event__fprintf_namespaces(event, stdout);
+
+	if (thread == NULL ||
+	    thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+		dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+		err = -1;
+	}
+
+	thread__put(thread);
+
+	return err;
+}
+
+int machine__process_lost_event(struct machine *machine __maybe_unused,
+				union perf_event *event, struct perf_sample *sample __maybe_unused)
+{
+	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
+		    event->lost.id, event->lost.lost);
+	return 0;
+}
+
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+					union perf_event *event, struct perf_sample *sample)
+{
+	dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
+		    sample->id, event->lost_samples.lost);
+	return 0;
+}
+
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+					       struct kmod_path *m,
+					       const char *filename)
+{
+	struct dso *dso;
+
+	down_write(&machine->dsos.lock);
+
+	dso = __dsos__find(&machine->dsos, m->name, true);
+	if (!dso) {
+		dso = __dsos__addnew(&machine->dsos, m->name);
+		if (dso == NULL)
+			goto out_unlock;
+
+		dso__set_module_info(dso, m, machine);
+		dso__set_long_name(dso, strdup(filename), true);
+	}
+
+	dso__get(dso);
+out_unlock:
+	up_write(&machine->dsos.lock);
+	return dso;
+}
+
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+			       union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_aux(event, stdout);
+	return 0;
+}
+
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+					union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_itrace_start(event, stdout);
+	return 0;
+}
+
+int machine__process_switch_event(struct machine *machine __maybe_unused,
+				  union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_switch(event, stdout);
+	return 0;
+}
+
+static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
+{
+	const char *dup_filename;
+
+	if (!filename || !dso || !dso->long_name)
+		return;
+	if (dso->long_name[0] != '[')
+		return;
+	if (!strchr(filename, '/'))
+		return;
+
+	dup_filename = strdup(filename);
+	if (!dup_filename)
+		return;
+
+	dso__set_long_name(dso, dup_filename, true);
+}
+
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+					const char *filename)
+{
+	struct map *map = NULL;
+	struct dso *dso = NULL;
+	struct kmod_path m;
+
+	if (kmod_path__parse_name(&m, filename))
+		return NULL;
+
+	map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
+				       m.name);
+	if (map) {
+		/*
+		 * If the map's dso is an offline module, give dso__load()
+		 * a chance to find the file path of that module by fixing
+		 * long_name.
+		 */
+		dso__adjust_kmod_long_name(map->dso, filename);
+		goto out;
+	}
+
+	dso = machine__findnew_module_dso(machine, &m, filename);
+	if (dso == NULL)
+		goto out;
+
+	map = map__new2(start, dso, MAP__FUNCTION);
+	if (map == NULL)
+		goto out;
+
+	map_groups__insert(&machine->kmaps, map);
+
+	/* Put the map here because map_groups__insert alread got it */
+	map__put(map);
+out:
+	/* put the dso here, corresponding to  machine__findnew_module_dso */
+	dso__put(dso);
+	free(m.name);
+	return map;
+}
+
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret += __dsos__fprintf(&pos->dsos.head, fp);
+	}
+
+	return ret;
+}
+
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm)
+{
+	return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
+}
+
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm)
+{
+	struct rb_node *nd;
+	size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
+		ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
+	}
+	return ret;
+}
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
+{
+	int i;
+	size_t printed = 0;
+	struct dso *kdso = machine__kernel_map(machine)->dso;
+
+	if (kdso->has_build_id) {
+		char filename[PATH_MAX];
+		if (dso__build_id_filename(kdso, filename, sizeof(filename),
+					   false))
+			printed += fprintf(fp, "[0] %s\n", filename);
+	}
+
+	for (i = 0; i < vmlinux_path__nr_entries; ++i)
+		printed += fprintf(fp, "[%d] %s\n",
+				   i + kdso->has_build_id, vmlinux_path[i]);
+
+	return printed;
+}
+
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+	struct rb_node *nd;
+	size_t ret;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		struct threads *threads = &machine->threads[i];
+
+		down_read(&threads->lock);
+
+		ret = fprintf(fp, "Threads: %u\n", threads->nr);
+
+		for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+			struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+			ret += thread__fprintf(pos, fp);
+		}
+
+		up_read(&threads->lock);
+	}
+	return ret;
+}
+
+static struct dso *machine__get_kernel(struct machine *machine)
+{
+	const char *vmlinux_name = machine->mmap_name;
+	struct dso *kernel;
+
+	if (machine__is_host(machine)) {
+		if (symbol_conf.vmlinux_name)
+			vmlinux_name = symbol_conf.vmlinux_name;
+
+		kernel = machine__findnew_kernel(machine, vmlinux_name,
+						 "[kernel]", DSO_TYPE_KERNEL);
+	} else {
+		if (symbol_conf.default_guest_vmlinux_name)
+			vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+
+		kernel = machine__findnew_kernel(machine, vmlinux_name,
+						 "[guest.kernel]",
+						 DSO_TYPE_GUEST_KERNEL);
+	}
+
+	if (kernel != NULL && (!kernel->has_build_id))
+		dso__read_running_kernel_build_id(kernel, machine);
+
+	return kernel;
+}
+
+struct process_args {
+	u64 start;
+};
+
+static void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+					   size_t bufsz)
+{
+	if (machine__is_default_guest(machine))
+		scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
+	else
+		scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir);
+}
+
+const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
+
+/* Figure out the start address of kernel map from /proc/kallsyms.
+ * Returns the name of the start symbol in *symbol_name. Pass in NULL as
+ * symbol_name if it's not that important.
+ */
+static int machine__get_running_kernel_start(struct machine *machine,
+					     const char **symbol_name, u64 *start)
+{
+	char filename[PATH_MAX];
+	int i, err = -1;
+	const char *name;
+	u64 addr = 0;
+
+	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return 0;
+
+	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+		err = kallsyms__get_function_start(filename, name, &addr);
+		if (!err)
+			break;
+	}
+
+	if (err)
+		return -1;
+
+	if (symbol_name)
+		*symbol_name = name;
+
+	*start = addr;
+	return 0;
+}
+
+static int
+__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+{
+	int type;
+
+	/* In case of renewal the kernel map, destroy previous one */
+	machine__destroy_kernel_maps(machine);
+
+	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		struct kmap *kmap;
+		struct map *map;
+
+		machine->vmlinux_maps[type] = map__new2(0, kernel, type);
+		if (machine->vmlinux_maps[type] == NULL)
+			return -1;
+
+		machine->vmlinux_maps[type]->map_ip =
+			machine->vmlinux_maps[type]->unmap_ip =
+				identity__map_ip;
+		map = __machine__kernel_map(machine, type);
+		kmap = map__kmap(map);
+		if (!kmap)
+			return -1;
+
+		kmap->kmaps = &machine->kmaps;
+		map_groups__insert(&machine->kmaps, map);
+	}
+
+	return 0;
+}
+
+void machine__destroy_kernel_maps(struct machine *machine)
+{
+	int type;
+
+	for (type = 0; type < MAP__NR_TYPES; ++type) {
+		struct kmap *kmap;
+		struct map *map = __machine__kernel_map(machine, type);
+
+		if (map == NULL)
+			continue;
+
+		kmap = map__kmap(map);
+		map_groups__remove(&machine->kmaps, map);
+		if (kmap && kmap->ref_reloc_sym) {
+			/*
+			 * ref_reloc_sym is shared among all maps, so free just
+			 * on one of them.
+			 */
+			if (type == MAP__FUNCTION) {
+				zfree((char **)&kmap->ref_reloc_sym->name);
+				zfree(&kmap->ref_reloc_sym);
+			} else
+				kmap->ref_reloc_sym = NULL;
+		}
+
+		map__put(machine->vmlinux_maps[type]);
+		machine->vmlinux_maps[type] = NULL;
+	}
+}
+
+int machines__create_guest_kernel_maps(struct machines *machines)
+{
+	int ret = 0;
+	struct dirent **namelist = NULL;
+	int i, items = 0;
+	char path[PATH_MAX];
+	pid_t pid;
+	char *endp;
+
+	if (symbol_conf.default_guest_vmlinux_name ||
+	    symbol_conf.default_guest_modules ||
+	    symbol_conf.default_guest_kallsyms) {
+		machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
+	}
+
+	if (symbol_conf.guestmount) {
+		items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
+		if (items <= 0)
+			return -ENOENT;
+		for (i = 0; i < items; i++) {
+			if (!isdigit(namelist[i]->d_name[0])) {
+				/* Filter out . and .. */
+				continue;
+			}
+			pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
+			if ((*endp != '\0') ||
+			    (endp == namelist[i]->d_name) ||
+			    (errno == ERANGE)) {
+				pr_debug("invalid directory (%s). Skipping.\n",
+					 namelist[i]->d_name);
+				continue;
+			}
+			sprintf(path, "%s/%s/proc/kallsyms",
+				symbol_conf.guestmount,
+				namelist[i]->d_name);
+			ret = access(path, R_OK);
+			if (ret) {
+				pr_debug("Can't access file %s\n", path);
+				goto failure;
+			}
+			machines__create_kernel_maps(machines, pid);
+		}
+failure:
+		free(namelist);
+	}
+
+	return ret;
+}
+
+void machines__destroy_kernel_maps(struct machines *machines)
+{
+	struct rb_node *next = rb_first(&machines->guests);
+
+	machine__destroy_kernel_maps(&machines->host);
+
+	while (next) {
+		struct machine *pos = rb_entry(next, struct machine, rb_node);
+
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, &machines->guests);
+		machine__delete(pos);
+	}
+}
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid)
+{
+	struct machine *machine = machines__findnew(machines, pid);
+
+	if (machine == NULL)
+		return -1;
+
+	return machine__create_kernel_maps(machine);
+}
+
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+			     enum map_type type)
+{
+	struct map *map = machine__kernel_map(machine);
+	int ret = __dso__load_kallsyms(map->dso, filename, map, true);
+
+	if (ret > 0) {
+		dso__set_loaded(map->dso, type);
+		/*
+		 * Since /proc/kallsyms will have multiple sessions for the
+		 * kernel, with modules between them, fixup the end of all
+		 * sections.
+		 */
+		__map_groups__fixup_end(&machine->kmaps, type);
+	}
+
+	return ret;
+}
+
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
+{
+	struct map *map = machine__kernel_map(machine);
+	int ret = dso__load_vmlinux_path(map->dso, map);
+
+	if (ret > 0)
+		dso__set_loaded(map->dso, type);
+
+	return ret;
+}
+
+static char *get_kernel_version(const char *root_dir)
+{
+	char version[PATH_MAX];
+	FILE *file;
+	char *name, *tmp;
+	const char *prefix = "Linux version ";
+
+	sprintf(version, "%s/proc/version", root_dir);
+	file = fopen(version, "r");
+	if (!file)
+		return NULL;
+
+	version[0] = '\0';
+	tmp = fgets(version, sizeof(version), file);
+	fclose(file);
+
+	name = strstr(version, prefix);
+	if (!name)
+		return NULL;
+	name += strlen(prefix);
+	tmp = strchr(name, ' ');
+	if (tmp)
+		*tmp = '\0';
+
+	return strdup(name);
+}
+
+static bool is_kmod_dso(struct dso *dso)
+{
+	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+	       dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+}
+
+static int map_groups__set_module_path(struct map_groups *mg, const char *path,
+				       struct kmod_path *m)
+{
+	struct map *map;
+	char *long_name;
+
+	map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name);
+	if (map == NULL)
+		return 0;
+
+	long_name = strdup(path);
+	if (long_name == NULL)
+		return -ENOMEM;
+
+	dso__set_long_name(map->dso, long_name, true);
+	dso__kernel_module_get_build_id(map->dso, "");
+
+	/*
+	 * Full name could reveal us kmod compression, so
+	 * we need to update the symtab_type if needed.
+	 */
+	if (m->comp && is_kmod_dso(map->dso))
+		map->dso->symtab_type++;
+
+	return 0;
+}
+
+static int map_groups__set_modules_path_dir(struct map_groups *mg,
+				const char *dir_name, int depth)
+{
+	struct dirent *dent;
+	DIR *dir = opendir(dir_name);
+	int ret = 0;
+
+	if (!dir) {
+		pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+		return -1;
+	}
+
+	while ((dent = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		struct stat st;
+
+		/*sshfs might return bad dent->d_type, so we have to stat*/
+		snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+		if (stat(path, &st))
+			continue;
+
+		if (S_ISDIR(st.st_mode)) {
+			if (!strcmp(dent->d_name, ".") ||
+			    !strcmp(dent->d_name, ".."))
+				continue;
+
+			/* Do not follow top-level source and build symlinks */
+			if (depth == 0) {
+				if (!strcmp(dent->d_name, "source") ||
+				    !strcmp(dent->d_name, "build"))
+					continue;
+			}
+
+			ret = map_groups__set_modules_path_dir(mg, path,
+							       depth + 1);
+			if (ret < 0)
+				goto out;
+		} else {
+			struct kmod_path m;
+
+			ret = kmod_path__parse_name(&m, dent->d_name);
+			if (ret)
+				goto out;
+
+			if (m.kmod)
+				ret = map_groups__set_module_path(mg, path, &m);
+
+			free(m.name);
+
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	closedir(dir);
+	return ret;
+}
+
+static int machine__set_modules_path(struct machine *machine)
+{
+	char *version;
+	char modules_path[PATH_MAX];
+
+	version = get_kernel_version(machine->root_dir);
+	if (!version)
+		return -1;
+
+	snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s",
+		 machine->root_dir, version);
+	free(version);
+
+	return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0);
+}
+int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
+				const char *name __maybe_unused)
+{
+	return 0;
+}
+
+static int machine__create_module(void *arg, const char *name, u64 start,
+				  u64 size)
+{
+	struct machine *machine = arg;
+	struct map *map;
+
+	if (arch__fix_module_text_start(&start, name) < 0)
+		return -1;
+
+	map = machine__findnew_module_map(machine, start, name);
+	if (map == NULL)
+		return -1;
+	map->end = start + size;
+
+	dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+
+	return 0;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
+	const char *modules;
+	char path[PATH_MAX];
+
+	if (machine__is_default_guest(machine)) {
+		modules = symbol_conf.default_guest_modules;
+	} else {
+		snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir);
+		modules = path;
+	}
+
+	if (symbol__restricted_filename(modules, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(modules, machine, machine__create_module))
+		return -1;
+
+	if (!machine__set_modules_path(machine))
+		return 0;
+
+	pr_debug("Problems setting modules path maps, continuing anyway...\n");
+
+	return 0;
+}
+
+static void machine__set_kernel_mmap(struct machine *machine,
+				     u64 start, u64 end)
+{
+	int i;
+
+	for (i = 0; i < MAP__NR_TYPES; i++) {
+		machine->vmlinux_maps[i]->start = start;
+		machine->vmlinux_maps[i]->end   = end;
+
+		/*
+		 * Be a bit paranoid here, some perf.data file came with
+		 * a zero sized synthesized MMAP event for the kernel.
+		 */
+		if (start == 0 && end == 0)
+			machine->vmlinux_maps[i]->end = ~0ULL;
+	}
+}
+
+int machine__create_kernel_maps(struct machine *machine)
+{
+	struct dso *kernel = machine__get_kernel(machine);
+	const char *name = NULL;
+	struct map *map;
+	u64 addr = 0;
+	int ret;
+
+	if (kernel == NULL)
+		return -1;
+
+	ret = __machine__create_kernel_maps(machine, kernel);
+	dso__put(kernel);
+	if (ret < 0)
+		return -1;
+
+	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
+		if (machine__is_host(machine))
+			pr_debug("Problems creating module maps, "
+				 "continuing anyway...\n");
+		else
+			pr_debug("Problems creating module maps for guest %d, "
+				 "continuing anyway...\n", machine->pid);
+	}
+
+	if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+		if (name &&
+		    maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+			machine__destroy_kernel_maps(machine);
+			return -1;
+		}
+
+		/* we have a real start address now, so re-order the kmaps */
+		map = machine__kernel_map(machine);
+
+		map__get(map);
+		map_groups__remove(&machine->kmaps, map);
+
+		/* assume it's the last in the kmaps */
+		machine__set_kernel_mmap(machine, addr, ~0ULL);
+
+		map_groups__insert(&machine->kmaps, map);
+		map__put(map);
+	}
+
+	/* update end address of the kernel map using adjacent module address */
+	map = map__next(machine__kernel_map(machine));
+	if (map)
+		machine__set_kernel_mmap(machine, addr, map->start);
+
+	return 0;
+}
+
+static bool machine__uses_kcore(struct machine *machine)
+{
+	struct dso *dso;
+
+	list_for_each_entry(dso, &machine->dsos.head, node) {
+		if (dso__is_kcore(dso))
+			return true;
+	}
+
+	return false;
+}
+
+static int machine__process_kernel_mmap_event(struct machine *machine,
+					      union perf_event *event)
+{
+	struct map *map;
+	enum dso_kernel_type kernel_type;
+	bool is_kernel_mmap;
+
+	/* If we have maps from kcore then we do not need or want any others */
+	if (machine__uses_kcore(machine))
+		return 0;
+
+	if (machine__is_host(machine))
+		kernel_type = DSO_TYPE_KERNEL;
+	else
+		kernel_type = DSO_TYPE_GUEST_KERNEL;
+
+	is_kernel_mmap = memcmp(event->mmap.filename,
+				machine->mmap_name,
+				strlen(machine->mmap_name) - 1) == 0;
+	if (event->mmap.filename[0] == '/' ||
+	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
+		map = machine__findnew_module_map(machine, event->mmap.start,
+						  event->mmap.filename);
+		if (map == NULL)
+			goto out_problem;
+
+		map->end = map->start + event->mmap.len;
+	} else if (is_kernel_mmap) {
+		const char *symbol_name = (event->mmap.filename +
+				strlen(machine->mmap_name));
+		/*
+		 * Should be there already, from the build-id table in
+		 * the header.
+		 */
+		struct dso *kernel = NULL;
+		struct dso *dso;
+
+		down_read(&machine->dsos.lock);
+
+		list_for_each_entry(dso, &machine->dsos.head, node) {
+
+			/*
+			 * The cpumode passed to is_kernel_module is not the
+			 * cpumode of *this* event. If we insist on passing
+			 * correct cpumode to is_kernel_module, we should
+			 * record the cpumode when we adding this dso to the
+			 * linked list.
+			 *
+			 * However we don't really need passing correct
+			 * cpumode.  We know the correct cpumode must be kernel
+			 * mode (if not, we should not link it onto kernel_dsos
+			 * list).
+			 *
+			 * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+			 * is_kernel_module() treats it as a kernel cpumode.
+			 */
+
+			if (!dso->kernel ||
+			    is_kernel_module(dso->long_name,
+					     PERF_RECORD_MISC_CPUMODE_UNKNOWN))
+				continue;
+
+
+			kernel = dso;
+			break;
+		}
+
+		up_read(&machine->dsos.lock);
+
+		if (kernel == NULL)
+			kernel = machine__findnew_dso(machine, machine->mmap_name);
+		if (kernel == NULL)
+			goto out_problem;
+
+		kernel->kernel = kernel_type;
+		if (__machine__create_kernel_maps(machine, kernel) < 0) {
+			dso__put(kernel);
+			goto out_problem;
+		}
+
+		if (strstr(kernel->long_name, "vmlinux"))
+			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
+
+		machine__set_kernel_mmap(machine, event->mmap.start,
+					 event->mmap.start + event->mmap.len);
+
+		/*
+		 * Avoid using a zero address (kptr_restrict) for the ref reloc
+		 * symbol. Effectively having zero here means that at record
+		 * time /proc/sys/kernel/kptr_restrict was non zero.
+		 */
+		if (event->mmap.pgoff != 0) {
+			maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
+							 symbol_name,
+							 event->mmap.pgoff);
+		}
+
+		if (machine__is_default_guest(machine)) {
+			/*
+			 * preload dso of guest kernel and modules
+			 */
+			dso__load(kernel, machine__kernel_map(machine));
+		}
+	}
+	return 0;
+out_problem:
+	return -1;
+}
+
+int machine__process_mmap2_event(struct machine *machine,
+				 union perf_event *event,
+				 struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct map *map;
+	enum map_type type;
+	int ret = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_mmap2(event, stdout);
+
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+		ret = machine__process_kernel_mmap_event(machine, event);
+		if (ret < 0)
+			goto out_problem;
+		return 0;
+	}
+
+	thread = machine__findnew_thread(machine, event->mmap2.pid,
+					event->mmap2.tid);
+	if (thread == NULL)
+		goto out_problem;
+
+	if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
+		type = MAP__VARIABLE;
+	else
+		type = MAP__FUNCTION;
+
+	map = map__new(machine, event->mmap2.start,
+			event->mmap2.len, event->mmap2.pgoff,
+			event->mmap2.maj,
+			event->mmap2.min, event->mmap2.ino,
+			event->mmap2.ino_generation,
+			event->mmap2.prot,
+			event->mmap2.flags,
+			event->mmap2.filename, type, thread);
+
+	if (map == NULL)
+		goto out_problem_map;
+
+	ret = thread__insert_map(thread, map);
+	if (ret)
+		goto out_problem_insert;
+
+	thread__put(thread);
+	map__put(map);
+	return 0;
+
+out_problem_insert:
+	map__put(map);
+out_problem_map:
+	thread__put(thread);
+out_problem:
+	dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
+	return 0;
+}
+
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct map *map;
+	enum map_type type;
+	int ret = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_mmap(event, stdout);
+
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+		ret = machine__process_kernel_mmap_event(machine, event);
+		if (ret < 0)
+			goto out_problem;
+		return 0;
+	}
+
+	thread = machine__findnew_thread(machine, event->mmap.pid,
+					 event->mmap.tid);
+	if (thread == NULL)
+		goto out_problem;
+
+	if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
+		type = MAP__VARIABLE;
+	else
+		type = MAP__FUNCTION;
+
+	map = map__new(machine, event->mmap.start,
+			event->mmap.len, event->mmap.pgoff,
+			0, 0, 0, 0, 0, 0,
+			event->mmap.filename,
+			type, thread);
+
+	if (map == NULL)
+		goto out_problem_map;
+
+	ret = thread__insert_map(thread, map);
+	if (ret)
+		goto out_problem_insert;
+
+	thread__put(thread);
+	map__put(map);
+	return 0;
+
+out_problem_insert:
+	map__put(map);
+out_problem_map:
+	thread__put(thread);
+out_problem:
+	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+	return 0;
+}
+
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
+{
+	struct threads *threads = machine__threads(machine, th->tid);
+
+	if (threads->last_match == th)
+		threads->last_match = NULL;
+
+	BUG_ON(refcount_read(&th->refcnt) == 0);
+	if (lock)
+		down_write(&threads->lock);
+	rb_erase_init(&th->rb_node, &threads->entries);
+	RB_CLEAR_NODE(&th->rb_node);
+	--threads->nr;
+	/*
+	 * Move it first to the dead_threads list, then drop the reference,
+	 * if this is the last reference, then the thread__delete destructor
+	 * will be called and we will remove it from the dead_threads list.
+	 */
+	list_add_tail(&th->node, &threads->dead);
+	if (lock)
+		up_write(&threads->lock);
+	thread__put(th);
+}
+
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+	return __machine__remove_thread(machine, th, true);
+}
+
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct thread *thread = machine__find_thread(machine,
+						     event->fork.pid,
+						     event->fork.tid);
+	struct thread *parent = machine__findnew_thread(machine,
+							event->fork.ppid,
+							event->fork.ptid);
+	int err = 0;
+
+	if (dump_trace)
+		perf_event__fprintf_task(event, stdout);
+
+	/*
+	 * There may be an existing thread that is not actually the parent,
+	 * either because we are processing events out of order, or because the
+	 * (fork) event that would have removed the thread was lost. Assume the
+	 * latter case and continue on as best we can.
+	 */
+	if (parent->pid_ != (pid_t)event->fork.ppid) {
+		dump_printf("removing erroneous parent thread %d/%d\n",
+			    parent->pid_, parent->tid);
+		machine__remove_thread(machine, parent);
+		thread__put(parent);
+		parent = machine__findnew_thread(machine, event->fork.ppid,
+						 event->fork.ptid);
+	}
+
+	/* if a thread currently exists for the thread id remove it */
+	if (thread != NULL) {
+		machine__remove_thread(machine, thread);
+		thread__put(thread);
+	}
+
+	thread = machine__findnew_thread(machine, event->fork.pid,
+					 event->fork.tid);
+
+	if (thread == NULL || parent == NULL ||
+	    thread__fork(thread, parent, sample->time) < 0) {
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+		err = -1;
+	}
+	thread__put(thread);
+	thread__put(parent);
+
+	return err;
+}
+
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample __maybe_unused)
+{
+	struct thread *thread = machine__find_thread(machine,
+						     event->fork.pid,
+						     event->fork.tid);
+
+	if (dump_trace)
+		perf_event__fprintf_task(event, stdout);
+
+	if (thread != NULL) {
+		thread__exited(thread);
+		thread__put(thread);
+	}
+
+	return 0;
+}
+
+int machine__process_event(struct machine *machine, union perf_event *event,
+			   struct perf_sample *sample)
+{
+	int ret;
+
+	switch (event->header.type) {
+	case PERF_RECORD_COMM:
+		ret = machine__process_comm_event(machine, event, sample); break;
+	case PERF_RECORD_MMAP:
+		ret = machine__process_mmap_event(machine, event, sample); break;
+	case PERF_RECORD_NAMESPACES:
+		ret = machine__process_namespaces_event(machine, event, sample); break;
+	case PERF_RECORD_MMAP2:
+		ret = machine__process_mmap2_event(machine, event, sample); break;
+	case PERF_RECORD_FORK:
+		ret = machine__process_fork_event(machine, event, sample); break;
+	case PERF_RECORD_EXIT:
+		ret = machine__process_exit_event(machine, event, sample); break;
+	case PERF_RECORD_LOST:
+		ret = machine__process_lost_event(machine, event, sample); break;
+	case PERF_RECORD_AUX:
+		ret = machine__process_aux_event(machine, event); break;
+	case PERF_RECORD_ITRACE_START:
+		ret = machine__process_itrace_start_event(machine, event); break;
+	case PERF_RECORD_LOST_SAMPLES:
+		ret = machine__process_lost_samples_event(machine, event, sample); break;
+	case PERF_RECORD_SWITCH:
+	case PERF_RECORD_SWITCH_CPU_WIDE:
+		ret = machine__process_switch_event(machine, event); break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
+{
+	if (!regexec(regex, sym->name, 0, NULL, 0))
+		return 1;
+	return 0;
+}
+
+static void ip__resolve_ams(struct thread *thread,
+			    struct addr_map_symbol *ams,
+			    u64 ip)
+{
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(al));
+	/*
+	 * We cannot use the header.misc hint to determine whether a
+	 * branch stack address is user, kernel, guest, hypervisor.
+	 * Branches may straddle the kernel/user/hypervisor boundaries.
+	 * Thus, we have to try consecutively until we find a match
+	 * or else, the symbol is unknown
+	 */
+	thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al);
+
+	ams->addr = ip;
+	ams->al_addr = al.addr;
+	ams->sym = al.sym;
+	ams->map = al.map;
+	ams->phys_addr = 0;
+}
+
+static void ip__resolve_data(struct thread *thread,
+			     u8 m, struct addr_map_symbol *ams,
+			     u64 addr, u64 phys_addr)
+{
+	struct addr_location al;
+
+	memset(&al, 0, sizeof(al));
+
+	thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al);
+	if (al.map == NULL) {
+		/*
+		 * some shared data regions have execute bit set which puts
+		 * their mapping in the MAP__FUNCTION type array.
+		 * Check there as a fallback option before dropping the sample.
+		 */
+		thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al);
+	}
+
+	ams->addr = addr;
+	ams->al_addr = al.addr;
+	ams->sym = al.sym;
+	ams->map = al.map;
+	ams->phys_addr = phys_addr;
+}
+
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+				     struct addr_location *al)
+{
+	struct mem_info *mi = mem_info__new();
+
+	if (!mi)
+		return NULL;
+
+	ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
+	ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+			 sample->addr, sample->phys_addr);
+	mi->data_src.val = sample->data_src;
+
+	return mi;
+}
+
+static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
+{
+	char *srcline = NULL;
+
+	if (!map || callchain_param.key == CCKEY_FUNCTION)
+		return srcline;
+
+	srcline = srcline__tree_find(&map->dso->srclines, ip);
+	if (!srcline) {
+		bool show_sym = false;
+		bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+
+		srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
+				      sym, show_sym, show_addr, ip);
+		srcline__tree_insert(&map->dso->srclines, ip, srcline);
+	}
+
+	return srcline;
+}
+
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
+static int add_callchain_ip(struct thread *thread,
+			    struct callchain_cursor *cursor,
+			    struct symbol **parent,
+			    struct addr_location *root_al,
+			    u8 *cpumode,
+			    u64 ip,
+			    bool branch,
+			    struct branch_flags *flags,
+			    struct iterations *iter,
+			    u64 branch_from)
+{
+	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
+	const char *srcline = NULL;
+
+	al.filtered = 0;
+	al.sym = NULL;
+	if (!cpumode) {
+		thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
+						   ip, &al);
+	} else {
+		if (ip >= PERF_CONTEXT_MAX) {
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				*cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				break;
+			case PERF_CONTEXT_KERNEL:
+				*cpumode = PERF_RECORD_MISC_KERNEL;
+				break;
+			case PERF_CONTEXT_USER:
+				*cpumode = PERF_RECORD_MISC_USER;
+				break;
+			default:
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				callchain_cursor_reset(cursor);
+				return 1;
+			}
+			return 0;
+		}
+		thread__find_addr_location(thread, *cpumode, MAP__FUNCTION,
+					   ip, &al);
+	}
+
+	if (al.sym != NULL) {
+		if (perf_hpp_list.parent && !*parent &&
+		    symbol__match_regex(al.sym, &parent_regex))
+			*parent = al.sym;
+		else if (have_ignore_callees && root_al &&
+		  symbol__match_regex(al.sym, &ignore_callees_regex)) {
+			/* Treat this symbol as the root,
+			   forgetting its callees. */
+			*root_al = al;
+			callchain_cursor_reset(cursor);
+		}
+	}
+
+	if (symbol_conf.hide_unresolved && al.sym == NULL)
+		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
+	srcline = callchain_srcline(al.map, al.sym, al.addr);
+	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from, srcline);
+}
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+					   struct addr_location *al)
+{
+	unsigned int i;
+	const struct branch_stack *bs = sample->branch_stack;
+	struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
+
+	if (!bi)
+		return NULL;
+
+	for (i = 0; i < bs->nr; i++) {
+		ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
+		ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
+		bi[i].flags = bs->entries[i].flags;
+	}
+	return bi;
+}
+
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr)
+{
+	int i;
+
+	iter->nr_loop_iter = nr;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
+{
+	int i, j, off;
+	unsigned char chash[CHASHSZ];
+
+	memset(chash, NO_ENTRY, sizeof(chash));
+
+	BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
+
+	for (i = 0; i < nr; i++) {
+		int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+		/* no collision handling for now */
+		if (chash[h] == NO_ENTRY) {
+			chash[h] = i;
+		} else if (l[chash[h]].from == l[i].from) {
+			bool is_loop = true;
+			/* check if it is a real loop */
+			off = 0;
+			for (j = chash[h]; j < i && i + off < nr; j++, off++)
+				if (l[j].from != l[i + off].from) {
+					is_loop = false;
+					break;
+				}
+			if (is_loop) {
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
+				nr -= off;
+			}
+		}
+	}
+	return nr;
+}
+
+/*
+ * Recolve LBR callstack chain sample
+ * Return:
+ * 1 on success get LBR callchain information
+ * 0 no available LBR callchain information, should try fp
+ * negative error code on other errors.
+ */
+static int resolve_lbr_callchain_sample(struct thread *thread,
+					struct callchain_cursor *cursor,
+					struct perf_sample *sample,
+					struct symbol **parent,
+					struct addr_location *root_al,
+					int max_stack)
+{
+	struct ip_callchain *chain = sample->callchain;
+	int chain_nr = min(max_stack, (int)chain->nr), i;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	u64 ip, branch_from = 0;
+
+	for (i = 0; i < chain_nr; i++) {
+		if (chain->ips[i] == PERF_CONTEXT_USER)
+			break;
+	}
+
+	/* LBR only affects the user callchain */
+	if (i != chain_nr) {
+		struct branch_stack *lbr_stack = sample->branch_stack;
+		int lbr_nr = lbr_stack->nr, j, k;
+		bool branch;
+		struct branch_flags *flags;
+		/*
+		 * LBR callstack can only get user call chain.
+		 * The mix_chain_nr is kernel call chain
+		 * number plus LBR user call chain number.
+		 * i is kernel call chain number,
+		 * 1 is PERF_CONTEXT_USER,
+		 * lbr_nr + 1 is the user call chain number.
+		 * For details, please refer to the comments
+		 * in callchain__printf
+		 */
+		int mix_chain_nr = i + 1 + lbr_nr + 1;
+
+		for (j = 0; j < mix_chain_nr; j++) {
+			int err;
+			branch = false;
+			flags = NULL;
+
+			if (callchain_param.order == ORDER_CALLEE) {
+				if (j < i + 1)
+					ip = chain->ips[j];
+				else if (j > i + 1) {
+					k = j - i - 2;
+					ip = lbr_stack->entries[k].from;
+					branch = true;
+					flags = &lbr_stack->entries[k].flags;
+				} else {
+					ip = lbr_stack->entries[0].to;
+					branch = true;
+					flags = &lbr_stack->entries[0].flags;
+					branch_from =
+						lbr_stack->entries[0].from;
+				}
+			} else {
+				if (j < lbr_nr) {
+					k = lbr_nr - j - 1;
+					ip = lbr_stack->entries[k].from;
+					branch = true;
+					flags = &lbr_stack->entries[k].flags;
+				}
+				else if (j > lbr_nr)
+					ip = chain->ips[i + 1 - (j - lbr_nr)];
+				else {
+					ip = lbr_stack->entries[0].to;
+					branch = true;
+					flags = &lbr_stack->entries[0].flags;
+					branch_from =
+						lbr_stack->entries[0].from;
+				}
+			}
+
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, &cpumode, ip,
+					       branch, flags, NULL,
+					       branch_from);
+			if (err)
+				return (err < 0) ? err : 0;
+		}
+		return 1;
+	}
+
+	return 0;
+}
+
+static int thread__resolve_callchain_sample(struct thread *thread,
+					    struct callchain_cursor *cursor,
+					    struct perf_evsel *evsel,
+					    struct perf_sample *sample,
+					    struct symbol **parent,
+					    struct addr_location *root_al,
+					    int max_stack)
+{
+	struct branch_stack *branch = sample->branch_stack;
+	struct ip_callchain *chain = sample->callchain;
+	int chain_nr = 0;
+	u8 cpumode = PERF_RECORD_MISC_USER;
+	int i, j, err, nr_entries;
+	int skip_idx = -1;
+	int first_call = 0;
+
+	if (chain)
+		chain_nr = chain->nr;
+
+	if (perf_evsel__has_branch_callstack(evsel)) {
+		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
+						   root_al, max_stack);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
+
+	/*
+	 * Based on DWARF debug information, some architectures skip
+	 * a callchain entry saved by the kernel.
+	 */
+	skip_idx = arch_skip_callchain_idx(thread, chain);
+
+	/*
+	 * Add branches to call stack for easier browsing. This gives
+	 * more context for a sample than just the callers.
+	 *
+	 * This uses individual histograms of paths compared to the
+	 * aggregated histograms the normal LBR mode uses.
+	 *
+	 * Limitations for now:
+	 * - No extra filters
+	 * - No annotations (should annotate somehow)
+	 */
+
+	if (branch && callchain_param.branch_callstack) {
+		int nr = min(max_stack, (int)branch->nr);
+		struct branch_entry be[nr];
+		struct iterations iter[nr];
+
+		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+			pr_warning("corrupted branch chain. skipping...\n");
+			goto check_calls;
+		}
+
+		for (i = 0; i < nr; i++) {
+			if (callchain_param.order == ORDER_CALLEE) {
+				be[i] = branch->entries[i];
+
+				if (chain == NULL)
+					continue;
+
+				/*
+				 * Check for overlap into the callchain.
+				 * The return address is one off compared to
+				 * the branch entry. To adjust for this
+				 * assume the calling instruction is not longer
+				 * than 8 bytes.
+				 */
+				if (i == skip_idx ||
+				    chain->ips[first_call] >= PERF_CONTEXT_MAX)
+					first_call++;
+				else if (be[i].from < chain->ips[first_call] &&
+				    be[i].from >= chain->ips[first_call] - 8)
+					first_call++;
+			} else
+				be[i] = branch->entries[branch->nr - i - 1];
+		}
+
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
+
+		for (i = 0; i < nr; i++) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
+
+			if (!err)
+				err = add_callchain_ip(thread, cursor, parent, root_al,
+						       NULL, be[i].from,
+						       true, &be[i].flags,
+						       &iter[i], 0);
+			if (err == -EINVAL)
+				break;
+			if (err)
+				return err;
+		}
+
+		if (chain_nr == 0)
+			return 0;
+
+		chain_nr -= nr;
+	}
+
+check_calls:
+	for (i = first_call, nr_entries = 0;
+	     i < chain_nr && nr_entries < max_stack; i++) {
+		u64 ip;
+
+		if (callchain_param.order == ORDER_CALLEE)
+			j = i;
+		else
+			j = chain->nr - i - 1;
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+		if (j == skip_idx)
+			continue;
+#endif
+		ip = chain->ips[j];
+
+		if (ip < PERF_CONTEXT_MAX)
+                       ++nr_entries;
+
+		err = add_callchain_ip(thread, cursor, parent,
+				       root_al, &cpumode, ip,
+				       false, NULL, NULL, 0);
+
+		if (err)
+			return (err < 0) ? err : 0;
+	}
+
+	return 0;
+}
+
+static int append_inlines(struct callchain_cursor *cursor,
+			  struct map *map, struct symbol *sym, u64 ip)
+{
+	struct inline_node *inline_node;
+	struct inline_list *ilist;
+	u64 addr;
+	int ret = 1;
+
+	if (!symbol_conf.inline_name || !map || !sym)
+		return ret;
+
+	addr = map__rip_2objdump(map, ip);
+
+	inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
+	if (!inline_node) {
+		inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+		if (!inline_node)
+			return ret;
+		inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
+	}
+
+	list_for_each_entry(ilist, &inline_node->val, list) {
+		ret = callchain_cursor_append(cursor, ip, map,
+					      ilist->symbol, false,
+					      NULL, 0, 0, 0, ilist->srcline);
+
+		if (ret != 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+	struct callchain_cursor *cursor = arg;
+	const char *srcline = NULL;
+
+	if (symbol_conf.hide_unresolved && entry->sym == NULL)
+		return 0;
+
+	if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
+		return 0;
+
+	srcline = callchain_srcline(entry->map, entry->sym, entry->ip);
+	return callchain_cursor_append(cursor, entry->ip,
+				       entry->map, entry->sym,
+				       false, NULL, 0, 0, 0, srcline);
+}
+
+static int thread__resolve_callchain_unwind(struct thread *thread,
+					    struct callchain_cursor *cursor,
+					    struct perf_evsel *evsel,
+					    struct perf_sample *sample,
+					    int max_stack)
+{
+	/* Can we do dwarf post unwind? */
+	if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
+	      (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
+		return 0;
+
+	/* Bail out if nothing was captured. */
+	if ((!sample->user_regs.regs) ||
+	    (!sample->user_stack.size))
+		return 0;
+
+	return unwind__get_entries(unwind_entry, cursor,
+				   thread, sample, max_stack);
+}
+
+int thread__resolve_callchain(struct thread *thread,
+			      struct callchain_cursor *cursor,
+			      struct perf_evsel *evsel,
+			      struct perf_sample *sample,
+			      struct symbol **parent,
+			      struct addr_location *root_al,
+			      int max_stack)
+{
+	int ret = 0;
+
+	callchain_cursor_reset(cursor);
+
+	if (callchain_param.order == ORDER_CALLEE) {
+		ret = thread__resolve_callchain_sample(thread, cursor,
+						       evsel, sample,
+						       parent, root_al,
+						       max_stack);
+		if (ret)
+			return ret;
+		ret = thread__resolve_callchain_unwind(thread, cursor,
+						       evsel, sample,
+						       max_stack);
+	} else {
+		ret = thread__resolve_callchain_unwind(thread, cursor,
+						       evsel, sample,
+						       max_stack);
+		if (ret)
+			return ret;
+		ret = thread__resolve_callchain_sample(thread, cursor,
+						       evsel, sample,
+						       parent, root_al,
+						       max_stack);
+	}
+
+	return ret;
+}
+
+int machine__for_each_thread(struct machine *machine,
+			     int (*fn)(struct thread *thread, void *p),
+			     void *priv)
+{
+	struct threads *threads;
+	struct rb_node *nd;
+	struct thread *thread;
+	int rc = 0;
+	int i;
+
+	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+		threads = &machine->threads[i];
+		for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+			thread = rb_entry(nd, struct thread, rb_node);
+			rc = fn(thread, priv);
+			if (rc != 0)
+				return rc;
+		}
+
+		list_for_each_entry(thread, &threads->dead, node) {
+			rc = fn(thread, priv);
+			if (rc != 0)
+				return rc;
+		}
+	}
+	return rc;
+}
+
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv)
+{
+	struct rb_node *nd;
+	int rc = 0;
+
+	rc = machine__for_each_thread(&machines->host, fn, priv);
+	if (rc != 0)
+		return rc;
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		rc = machine__for_each_thread(machine, fn, priv);
+		if (rc != 0)
+			return rc;
+	}
+	return rc;
+}
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+				  struct target *target, struct thread_map *threads,
+				  perf_event__handler_t process, bool data_mmap,
+				  unsigned int proc_map_timeout,
+				  unsigned int nr_threads_synthesize)
+{
+	if (target__has_task(target))
+		return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
+	else if (target__has_cpu(target))
+		return perf_event__synthesize_threads(tool, process,
+						      machine, data_mmap,
+						      proc_map_timeout,
+						      nr_threads_synthesize);
+	/* command specified */
+	return 0;
+}
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu)
+{
+	if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid)
+		return -1;
+
+	return machine->current_tid[cpu];
+}
+
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+			     pid_t tid)
+{
+	struct thread *thread;
+
+	if (cpu < 0)
+		return -EINVAL;
+
+	if (!machine->current_tid) {
+		int i;
+
+		machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t));
+		if (!machine->current_tid)
+			return -ENOMEM;
+		for (i = 0; i < MAX_NR_CPUS; i++)
+			machine->current_tid[i] = -1;
+	}
+
+	if (cpu >= MAX_NR_CPUS) {
+		pr_err("Requested CPU %d too large. ", cpu);
+		pr_err("Consider raising MAX_NR_CPUS\n");
+		return -EINVAL;
+	}
+
+	machine->current_tid[cpu] = tid;
+
+	thread = machine__findnew_thread(machine, pid, tid);
+	if (!thread)
+		return -ENOMEM;
+
+	thread->cpu = cpu;
+	thread__put(thread);
+
+	return 0;
+}
+
+int machine__get_kernel_start(struct machine *machine)
+{
+	struct map *map = machine__kernel_map(machine);
+	int err = 0;
+
+	/*
+	 * The only addresses above 2^63 are kernel addresses of a 64-bit
+	 * kernel.  Note that addresses are unsigned so that on a 32-bit system
+	 * all addresses including kernel addresses are less than 2^32.  In
+	 * that case (32-bit system), if the kernel mapping is unknown, all
+	 * addresses will be assumed to be in user space - see
+	 * machine__kernel_ip().
+	 */
+	machine->kernel_start = 1ULL << 63;
+	if (map) {
+		err = map__load(map);
+		if (!err)
+			machine->kernel_start = map->start;
+	}
+	return err;
+}
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+	return dsos__findnew(&machine->dsos, filename);
+}
+
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+	struct machine *machine = vmachine;
+	struct map *map;
+	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
+
+	if (sym == NULL)
+		return NULL;
+
+	*modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
+	*addrp = map->unmap_ip(map, sym->start);
+	return sym->name;
+}
diff --git a/util/machine.h b/util/machine.h
new file mode 100644
index 0000000..66cc200
--- /dev/null
+++ b/util/machine.h
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MACHINE_H
+#define __PERF_MACHINE_H
+
+#include <sys/types.h>
+#include <linux/rbtree.h>
+#include "map.h"
+#include "dso.h"
+#include "event.h"
+#include "rwsem.h"
+
+struct addr_location;
+struct branch_stack;
+struct perf_evsel;
+struct perf_sample;
+struct symbol;
+struct thread;
+union perf_event;
+
+/* Native host kernel uses -1 as pid index in machine */
+#define	HOST_KERNEL_ID			(-1)
+#define	DEFAULT_GUEST_KERNEL_ID		(0)
+
+extern const char *ref_reloc_sym_names[];
+
+struct vdso_info;
+
+#define THREADS__TABLE_BITS	8
+#define THREADS__TABLE_SIZE	(1 << THREADS__TABLE_BITS)
+
+struct threads {
+	struct rb_root	  entries;
+	struct rw_semaphore lock;
+	unsigned int	  nr;
+	struct list_head  dead;
+	struct thread	  *last_match;
+};
+
+struct machine {
+	struct rb_node	  rb_node;
+	pid_t		  pid;
+	u16		  id_hdr_size;
+	bool		  comm_exec;
+	bool		  kptr_restrict_warned;
+	char		  *root_dir;
+	char		  *mmap_name;
+	struct threads    threads[THREADS__TABLE_SIZE];
+	struct vdso_info  *vdso_info;
+	struct perf_env   *env;
+	struct dsos	  dsos;
+	struct map_groups kmaps;
+	struct map	  *vmlinux_maps[MAP__NR_TYPES];
+	u64		  kernel_start;
+	pid_t		  *current_tid;
+	union { /* Tool specific area */
+		void	  *priv;
+		u64	  db_id;
+	};
+};
+
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+	/* Cast it to handle tid == -1 */
+	return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
+static inline
+struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
+{
+	return machine->vmlinux_maps[type];
+}
+
+static inline
+struct map *machine__kernel_map(struct machine *machine)
+{
+	return __machine__kernel_map(machine, MAP__FUNCTION);
+}
+
+int machine__get_kernel_start(struct machine *machine);
+
+static inline u64 machine__kernel_start(struct machine *machine)
+{
+	if (!machine->kernel_start)
+		machine__get_kernel_start(machine);
+	return machine->kernel_start;
+}
+
+static inline bool machine__kernel_ip(struct machine *machine, u64 ip)
+{
+	u64 kernel_start = machine__kernel_start(machine);
+
+	return ip >= kernel_start;
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+				    pid_t tid);
+struct comm *machine__thread_exec_comm(struct machine *machine,
+				       struct thread *thread);
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_lost_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+					struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+			       union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+					union perf_event *event);
+int machine__process_switch_event(struct machine *machine,
+				  union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+				      union perf_event *event,
+				      struct perf_sample *sample);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
+				 struct perf_sample *sample);
+int machine__process_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+
+typedef void (*machine__process_t)(struct machine *machine, void *data);
+
+struct machines {
+	struct machine host;
+	struct rb_root guests;
+};
+
+void machines__init(struct machines *machines);
+void machines__exit(struct machines *machines);
+
+void machines__process_guests(struct machines *machines,
+			      machine__process_t process, void *data);
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+			      const char *root_dir);
+struct machine *machines__find_host(struct machines *machines);
+struct machine *machines__find(struct machines *machines, pid_t pid);
+struct machine *machines__findnew(struct machines *machines, pid_t pid);
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
+void machines__set_comm_exec(struct machines *machines, bool comm_exec);
+
+struct machine *machine__new_host(void);
+struct machine *machine__new_kallsyms(void);
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
+void machine__exit(struct machine *machine);
+void machine__delete_threads(struct machine *machine);
+void machine__delete(struct machine *machine);
+void machine__remove_thread(struct machine *machine, struct thread *th);
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+					   struct addr_location *al);
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+				     struct addr_location *al);
+
+struct callchain_cursor;
+
+int thread__resolve_callchain(struct thread *thread,
+			      struct callchain_cursor *cursor,
+			      struct perf_evsel *evsel,
+			      struct perf_sample *sample,
+			      struct symbol **parent,
+			      struct addr_location *root_al,
+			      int max_stack);
+
+/*
+ * Default guest kernel is defined by parameter --guestkallsyms
+ * and --guestmodules
+ */
+static inline bool machine__is_default_guest(struct machine *machine)
+{
+	return machine ? machine->pid == DEFAULT_GUEST_KERNEL_ID : false;
+}
+
+static inline bool machine__is_host(struct machine *machine)
+{
+	return machine ? machine->pid == HOST_KERNEL_ID : false;
+}
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
+
+size_t machine__fprintf(struct machine *machine, FILE *fp);
+
+static inline
+struct symbol *machine__find_kernel_symbol(struct machine *machine,
+					   enum map_type type, u64 addr,
+					   struct map **mapp)
+{
+	return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
+}
+
+static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+						   enum map_type type, const char *name,
+						   struct map **mapp)
+{
+	return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
+}
+
+static inline
+struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
+					     struct map **mapp)
+{
+	return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
+					   mapp);
+}
+
+static inline
+struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
+						     const char *name,
+						     struct map **mapp)
+{
+	return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
+}
+
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+					const char *filename);
+int arch__fix_module_text_start(u64 *start, const char *name);
+
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+			   enum map_type type);
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm);
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+				     bool (skip)(struct dso *dso, int parm), int parm);
+
+void machine__destroy_kernel_maps(struct machine *machine);
+int machine__create_kernel_maps(struct machine *machine);
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct machines *machines);
+void machines__destroy_kernel_maps(struct machines *machines);
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+
+int machine__for_each_thread(struct machine *machine,
+			     int (*fn)(struct thread *thread, void *p),
+			     void *priv);
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv);
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+				  struct target *target, struct thread_map *threads,
+				  perf_event__handler_t process, bool data_mmap,
+				  unsigned int proc_map_timeout,
+				  unsigned int nr_threads_synthesize);
+static inline
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+				struct thread_map *threads, bool data_mmap,
+				unsigned int proc_map_timeout,
+				unsigned int nr_threads_synthesize)
+{
+	return __machine__synthesize_threads(machine, NULL, target, threads,
+					     perf_event__process, data_mmap,
+					     proc_map_timeout,
+					     nr_threads_synthesize);
+}
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu);
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+			     pid_t tid);
+/*
+ * For use with libtraceevent's pevent_set_function_resolver()
+ */
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
+
+#endif /* __PERF_MACHINE_H */
diff --git a/util/map.c b/util/map.c
new file mode 100644
index 0000000..8fe5703
--- /dev/null
+++ b/util/map.c
@@ -0,0 +1,898 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "symbol.h"
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include "map.h"
+#include "thread.h"
+#include "vdso.h"
+#include "build-id.h"
+#include "util.h"
+#include "debug.h"
+#include "machine.h"
+#include <linux/string.h>
+#include "srcline.h"
+#include "namespaces.h"
+#include "unwind.h"
+
+static void __maps__insert(struct maps *maps, struct map *map);
+
+const char *map_type__name[MAP__NR_TYPES] = {
+	[MAP__FUNCTION] = "Functions",
+	[MAP__VARIABLE] = "Variables",
+};
+
+static inline int is_anon_memory(const char *filename, u32 flags)
+{
+	return flags & MAP_HUGETLB ||
+	       !strcmp(filename, "//anon") ||
+	       !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+	       !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
+}
+
+static inline int is_no_dso_memory(const char *filename)
+{
+	return !strncmp(filename, "[stack", 6) ||
+	       !strncmp(filename, "/SYSV",5)   ||
+	       !strcmp(filename, "[heap]");
+}
+
+static inline int is_android_lib(const char *filename)
+{
+	return !strncmp(filename, "/data/app-lib", 13) ||
+	       !strncmp(filename, "/system/lib", 11);
+}
+
+static inline bool replace_android_lib(const char *filename, char *newfilename)
+{
+	const char *libname;
+	char *app_abi;
+	size_t app_abi_length, new_length;
+	size_t lib_length = 0;
+
+	libname  = strrchr(filename, '/');
+	if (libname)
+		lib_length = strlen(libname);
+
+	app_abi = getenv("APP_ABI");
+	if (!app_abi)
+		return false;
+
+	app_abi_length = strlen(app_abi);
+
+	if (!strncmp(filename, "/data/app-lib", 13)) {
+		char *apk_path;
+
+		if (!app_abi_length)
+			return false;
+
+		new_length = 7 + app_abi_length + lib_length;
+
+		apk_path = getenv("APK_PATH");
+		if (apk_path) {
+			new_length += strlen(apk_path) + 1;
+			if (new_length > PATH_MAX)
+				return false;
+			snprintf(newfilename, new_length,
+				 "%s/libs/%s/%s", apk_path, app_abi, libname);
+		} else {
+			if (new_length > PATH_MAX)
+				return false;
+			snprintf(newfilename, new_length,
+				 "libs/%s/%s", app_abi, libname);
+		}
+		return true;
+	}
+
+	if (!strncmp(filename, "/system/lib/", 11)) {
+		char *ndk, *app;
+		const char *arch;
+		size_t ndk_length;
+		size_t app_length;
+
+		ndk = getenv("NDK_ROOT");
+		app = getenv("APP_PLATFORM");
+
+		if (!(ndk && app))
+			return false;
+
+		ndk_length = strlen(ndk);
+		app_length = strlen(app);
+
+		if (!(ndk_length && app_length && app_abi_length))
+			return false;
+
+		arch = !strncmp(app_abi, "arm", 3) ? "arm" :
+		       !strncmp(app_abi, "mips", 4) ? "mips" :
+		       !strncmp(app_abi, "x86", 3) ? "x86" : NULL;
+
+		if (!arch)
+			return false;
+
+		new_length = 27 + ndk_length +
+			     app_length + lib_length
+			   + strlen(arch);
+
+		if (new_length > PATH_MAX)
+			return false;
+		snprintf(newfilename, new_length,
+			"%s/platforms/%s/arch-%s/usr/lib/%s",
+			ndk, app, arch, libname);
+
+		return true;
+	}
+	return false;
+}
+
+void map__init(struct map *map, enum map_type type,
+	       u64 start, u64 end, u64 pgoff, struct dso *dso)
+{
+	map->type     = type;
+	map->start    = start;
+	map->end      = end;
+	map->pgoff    = pgoff;
+	map->reloc    = 0;
+	map->dso      = dso__get(dso);
+	map->map_ip   = map__map_ip;
+	map->unmap_ip = map__unmap_ip;
+	RB_CLEAR_NODE(&map->rb_node);
+	map->groups   = NULL;
+	map->erange_warned = false;
+	refcount_set(&map->refcnt, 1);
+}
+
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+		     u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
+		     u64 ino_gen, u32 prot, u32 flags, char *filename,
+		     enum map_type type, struct thread *thread)
+{
+	struct map *map = malloc(sizeof(*map));
+	struct nsinfo *nsi = NULL;
+	struct nsinfo *nnsi;
+
+	if (map != NULL) {
+		char newfilename[PATH_MAX];
+		struct dso *dso;
+		int anon, no_dso, vdso, android;
+
+		android = is_android_lib(filename);
+		anon = is_anon_memory(filename, flags);
+		vdso = is_vdso_map(filename);
+		no_dso = is_no_dso_memory(filename);
+
+		map->maj = d_maj;
+		map->min = d_min;
+		map->ino = ino;
+		map->ino_generation = ino_gen;
+		map->prot = prot;
+		map->flags = flags;
+		nsi = nsinfo__get(thread->nsinfo);
+
+		if ((anon || no_dso) && nsi && type == MAP__FUNCTION) {
+			snprintf(newfilename, sizeof(newfilename),
+				 "/tmp/perf-%d.map", nsi->pid);
+			filename = newfilename;
+		}
+
+		if (android) {
+			if (replace_android_lib(filename, newfilename))
+				filename = newfilename;
+		}
+
+		if (vdso) {
+			/* The vdso maps are always on the host and not the
+			 * container.  Ensure that we don't use setns to look
+			 * them up.
+			 */
+			nnsi = nsinfo__copy(nsi);
+			if (nnsi) {
+				nsinfo__put(nsi);
+				nnsi->need_setns = false;
+				nsi = nnsi;
+			}
+			pgoff = 0;
+			dso = machine__findnew_vdso(machine, thread);
+		} else
+			dso = machine__findnew_dso(machine, filename);
+
+		if (dso == NULL)
+			goto out_delete;
+
+		map__init(map, type, start, start + len, pgoff, dso);
+
+		if (anon || no_dso) {
+			map->map_ip = map->unmap_ip = identity__map_ip;
+
+			/*
+			 * Set memory without DSO as loaded. All map__find_*
+			 * functions still return NULL, and we avoid the
+			 * unnecessary map__load warning.
+			 */
+			if (type != MAP__FUNCTION)
+				dso__set_loaded(dso, map->type);
+		}
+		dso->nsinfo = nsi;
+		dso__put(dso);
+	}
+	return map;
+out_delete:
+	nsinfo__put(nsi);
+	free(map);
+	return NULL;
+}
+
+/*
+ * Constructor variant for modules (where we know from /proc/modules where
+ * they are loaded) and for vmlinux, where only after we load all the
+ * symbols we'll know where it starts and ends.
+ */
+struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
+{
+	struct map *map = calloc(1, (sizeof(*map) +
+				     (dso->kernel ? sizeof(struct kmap) : 0)));
+	if (map != NULL) {
+		/*
+		 * ->end will be filled after we load all the symbols
+		 */
+		map__init(map, type, start, 0, 0, dso);
+	}
+
+	return map;
+}
+
+/*
+ * Use this and __map__is_kmodule() for map instances that are in
+ * machine->kmaps, and thus have map->groups->machine all properly set, to
+ * disambiguate between the kernel and modules.
+ *
+ * When the need arises, introduce map__is_{kernel,kmodule)() that
+ * checks (map->groups != NULL && map->groups->machine != NULL &&
+ * map->dso->kernel) before calling __map__is_{kernel,kmodule}())
+ */
+bool __map__is_kernel(const struct map *map)
+{
+	return __machine__kernel_map(map->groups->machine, map->type) == map;
+}
+
+static void map__exit(struct map *map)
+{
+	BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+	dso__zput(map->dso);
+}
+
+void map__delete(struct map *map)
+{
+	map__exit(map);
+	free(map);
+}
+
+void map__put(struct map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		map__delete(map);
+}
+
+void map__fixup_start(struct map *map)
+{
+	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_node *nd = rb_first(symbols);
+	if (nd != NULL) {
+		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		map->start = sym->start;
+	}
+}
+
+void map__fixup_end(struct map *map)
+{
+	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_node *nd = rb_last(symbols);
+	if (nd != NULL) {
+		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+		map->end = sym->end;
+	}
+}
+
+#define DSO__DELETED "(deleted)"
+
+int map__load(struct map *map)
+{
+	const char *name = map->dso->long_name;
+	int nr;
+
+	if (dso__loaded(map->dso, map->type))
+		return 0;
+
+	nr = dso__load(map->dso, map);
+	if (nr < 0) {
+		if (map->dso->has_build_id) {
+			char sbuild_id[SBUILD_ID_SIZE];
+
+			build_id__sprintf(map->dso->build_id,
+					  sizeof(map->dso->build_id),
+					  sbuild_id);
+			pr_warning("%s with build id %s not found",
+				   name, sbuild_id);
+		} else
+			pr_warning("Failed to open %s", name);
+
+		pr_warning(", continuing without symbols\n");
+		return -1;
+	} else if (nr == 0) {
+#ifdef HAVE_LIBELF_SUPPORT
+		const size_t len = strlen(name);
+		const size_t real_len = len - sizeof(DSO__DELETED);
+
+		if (len > sizeof(DSO__DELETED) &&
+		    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
+			pr_warning("%.*s was updated (is prelink enabled?). "
+				"Restart the long running apps that use it!\n",
+				   (int)real_len, name);
+		} else {
+			pr_warning("no symbols found in %s, maybe install "
+				   "a debug package?\n", name);
+		}
+#endif
+		return -1;
+	}
+
+	return 0;
+}
+
+struct symbol *map__find_symbol(struct map *map, u64 addr)
+{
+	if (map__load(map) < 0)
+		return NULL;
+
+	return dso__find_symbol(map->dso, map->type, addr);
+}
+
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
+{
+	if (map__load(map) < 0)
+		return NULL;
+
+	if (!dso__sorted_by_name(map->dso, map->type))
+		dso__sort_by_name(map->dso, map->type);
+
+	return dso__find_symbol_by_name(map->dso, map->type, name);
+}
+
+struct map *map__clone(struct map *from)
+{
+	struct map *map = memdup(from, sizeof(*map));
+
+	if (map != NULL) {
+		refcount_set(&map->refcnt, 1);
+		RB_CLEAR_NODE(&map->rb_node);
+		dso__get(map->dso);
+		map->groups = NULL;
+	}
+
+	return map;
+}
+
+int map__overlap(struct map *l, struct map *r)
+{
+	if (l->start > r->start) {
+		struct map *t = l;
+		l = r;
+		r = t;
+	}
+
+	if (l->end > r->start)
+		return 1;
+
+	return 0;
+}
+
+size_t map__fprintf(struct map *map, FILE *fp)
+{
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
+		       map->start, map->end, map->pgoff, map->dso->name);
+}
+
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+	const char *dsoname = "[unknown]";
+
+	if (map && map->dso) {
+		if (symbol_conf.show_kernel_path && map->dso->long_name)
+			dsoname = map->dso->long_name;
+		else
+			dsoname = map->dso->name;
+	}
+
+	return fprintf(fp, "%s", dsoname);
+}
+
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+			 FILE *fp)
+{
+	char *srcline;
+	int ret = 0;
+
+	if (map && map->dso) {
+		srcline = get_srcline(map->dso,
+				      map__rip_2objdump(map, addr), NULL,
+				      true, true, addr);
+		if (srcline != SRCLINE_UNKNOWN)
+			ret = fprintf(fp, "%s%s", prefix, srcline);
+		free_srcline(srcline);
+	}
+	return ret;
+}
+
+/**
+ * map__rip_2objdump - convert symbol start address to objdump address.
+ * @map: memory map
+ * @rip: symbol start address
+ *
+ * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
+ * relative to section start.
+ *
+ * Return: Address suitable for passing to "objdump --start-address="
+ */
+u64 map__rip_2objdump(struct map *map, u64 rip)
+{
+	if (!map->dso->adjust_symbols)
+		return rip;
+
+	if (map->dso->rel)
+		return rip - map->pgoff;
+
+	/*
+	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
+	 * but all kernel modules are ET_REL, so won't get here.
+	 */
+	if (map->dso->kernel == DSO_TYPE_USER)
+		return rip + map->dso->text_offset;
+
+	return map->unmap_ip(map, rip) - map->reloc;
+}
+
+/**
+ * map__objdump_2mem - convert objdump address to a memory address.
+ * @map: memory map
+ * @ip: objdump address
+ *
+ * Closely related to map__rip_2objdump(), this function takes an address from
+ * objdump and converts it to a memory address.  Note this assumes that @map
+ * contains the address.  To be sure the result is valid, check it forwards
+ * e.g. map__rip_2objdump(map->map_ip(map, map__objdump_2mem(map, ip))) == ip
+ *
+ * Return: Memory address.
+ */
+u64 map__objdump_2mem(struct map *map, u64 ip)
+{
+	if (!map->dso->adjust_symbols)
+		return map->unmap_ip(map, ip);
+
+	if (map->dso->rel)
+		return map->unmap_ip(map, ip + map->pgoff);
+
+	/*
+	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
+	 * but all kernel modules are ET_REL, so won't get here.
+	 */
+	if (map->dso->kernel == DSO_TYPE_USER)
+		return map->unmap_ip(map, ip - map->dso->text_offset);
+
+	return ip + map->reloc;
+}
+
+static void maps__init(struct maps *maps)
+{
+	maps->entries = RB_ROOT;
+	init_rwsem(&maps->lock);
+}
+
+void map_groups__init(struct map_groups *mg, struct machine *machine)
+{
+	int i;
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		maps__init(&mg->maps[i]);
+	}
+	mg->machine = machine;
+	refcount_set(&mg->refcnt, 1);
+}
+
+static void __maps__purge(struct maps *maps)
+{
+	struct rb_root *root = &maps->entries;
+	struct rb_node *next = rb_first(root);
+
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+
+		next = rb_next(&pos->rb_node);
+		rb_erase_init(&pos->rb_node, root);
+		map__put(pos);
+	}
+}
+
+static void maps__exit(struct maps *maps)
+{
+	down_write(&maps->lock);
+	__maps__purge(maps);
+	up_write(&maps->lock);
+}
+
+void map_groups__exit(struct map_groups *mg)
+{
+	int i;
+
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		maps__exit(&mg->maps[i]);
+}
+
+bool map_groups__empty(struct map_groups *mg)
+{
+	int i;
+
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		if (maps__first(&mg->maps[i]))
+			return false;
+	}
+
+	return true;
+}
+
+struct map_groups *map_groups__new(struct machine *machine)
+{
+	struct map_groups *mg = malloc(sizeof(*mg));
+
+	if (mg != NULL)
+		map_groups__init(mg, machine);
+
+	return mg;
+}
+
+void map_groups__delete(struct map_groups *mg)
+{
+	map_groups__exit(mg);
+	free(mg);
+}
+
+void map_groups__put(struct map_groups *mg)
+{
+	if (mg && refcount_dec_and_test(&mg->refcnt))
+		map_groups__delete(mg);
+}
+
+struct symbol *map_groups__find_symbol(struct map_groups *mg,
+				       enum map_type type, u64 addr,
+				       struct map **mapp)
+{
+	struct map *map = map_groups__find(mg, type, addr);
+
+	/* Ensure map is loaded before using map->map_ip */
+	if (map != NULL && map__load(map) >= 0) {
+		if (mapp != NULL)
+			*mapp = map;
+		return map__find_symbol(map, map->map_ip(map, addr));
+	}
+
+	return NULL;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
+					 struct map **mapp)
+{
+	struct symbol *sym;
+	struct rb_node *nd;
+
+	down_read(&maps->lock);
+
+	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node);
+
+		sym = map__find_symbol_by_name(pos, name);
+
+		if (sym == NULL)
+			continue;
+		if (mapp != NULL)
+			*mapp = pos;
+		goto out;
+	}
+
+	sym = NULL;
+out:
+	up_read(&maps->lock);
+	return sym;
+}
+
+struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
+					       enum map_type type,
+					       const char *name,
+					       struct map **mapp)
+{
+	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
+
+	return sym;
+}
+
+int map_groups__find_ams(struct addr_map_symbol *ams)
+{
+	if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
+		if (ams->map->groups == NULL)
+			return -1;
+		ams->map = map_groups__find(ams->map->groups, ams->map->type,
+					    ams->addr);
+		if (ams->map == NULL)
+			return -1;
+	}
+
+	ams->al_addr = ams->map->map_ip(ams->map, ams->addr);
+	ams->sym = map__find_symbol(ams->map, ams->al_addr);
+
+	return ams->sym ? 0 : -1;
+}
+
+static size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+	size_t printed = 0;
+	struct rb_node *nd;
+
+	down_read(&maps->lock);
+
+	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+		struct map *pos = rb_entry(nd, struct map, rb_node);
+		printed += fprintf(fp, "Map:");
+		printed += map__fprintf(pos, fp);
+		if (verbose > 2) {
+			printed += dso__fprintf(pos->dso, pos->type, fp);
+			printed += fprintf(fp, "--\n");
+		}
+	}
+
+	up_read(&maps->lock);
+
+	return printed;
+}
+
+size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
+				  FILE *fp)
+{
+	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+	return printed += maps__fprintf(&mg->maps[type], fp);
+}
+
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
+{
+	size_t printed = 0, i;
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		printed += __map_groups__fprintf_maps(mg, i, fp);
+	return printed;
+}
+
+static void __map_groups__insert(struct map_groups *mg, struct map *map)
+{
+	__maps__insert(&mg->maps[map->type], map);
+	map->groups = mg;
+}
+
+static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+{
+	struct rb_root *root;
+	struct rb_node *next;
+	int err = 0;
+
+	down_write(&maps->lock);
+
+	root = &maps->entries;
+	next = rb_first(root);
+
+	while (next) {
+		struct map *pos = rb_entry(next, struct map, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		if (!map__overlap(pos, map))
+			continue;
+
+		if (verbose >= 2) {
+
+			if (use_browser) {
+				pr_warning("overlapping maps in %s "
+					   "(disable tui for more info)\n",
+					   map->dso->name);
+			} else {
+				fputs("overlapping maps:\n", fp);
+				map__fprintf(map, fp);
+				map__fprintf(pos, fp);
+			}
+		}
+
+		rb_erase_init(&pos->rb_node, root);
+		/*
+		 * Now check if we need to create new maps for areas not
+		 * overlapped by the new map:
+		 */
+		if (map->start > pos->start) {
+			struct map *before = map__clone(pos);
+
+			if (before == NULL) {
+				err = -ENOMEM;
+				goto put_map;
+			}
+
+			before->end = map->start;
+			__map_groups__insert(pos->groups, before);
+			if (verbose >= 2 && !use_browser)
+				map__fprintf(before, fp);
+			map__put(before);
+		}
+
+		if (map->end < pos->end) {
+			struct map *after = map__clone(pos);
+
+			if (after == NULL) {
+				err = -ENOMEM;
+				goto put_map;
+			}
+
+			after->start = map->end;
+			__map_groups__insert(pos->groups, after);
+			if (verbose >= 2 && !use_browser)
+				map__fprintf(after, fp);
+			map__put(after);
+		}
+put_map:
+		map__put(pos);
+
+		if (err)
+			goto out;
+	}
+
+	err = 0;
+out:
+	up_write(&maps->lock);
+	return err;
+}
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+				   FILE *fp)
+{
+	return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
+}
+
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+int map_groups__clone(struct thread *thread,
+		      struct map_groups *parent, enum map_type type)
+{
+	struct map_groups *mg = thread->mg;
+	int err = -ENOMEM;
+	struct map *map;
+	struct maps *maps = &parent->maps[type];
+
+	down_read(&maps->lock);
+
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct map *new = map__clone(map);
+		if (new == NULL)
+			goto out_unlock;
+
+		err = unwind__prepare_access(thread, new, NULL);
+		if (err)
+			goto out_unlock;
+
+		map_groups__insert(mg, new);
+		map__put(new);
+	}
+
+	err = 0;
+out_unlock:
+	up_read(&maps->lock);
+	return err;
+}
+
+static void __maps__insert(struct maps *maps, struct map *map)
+{
+	struct rb_node **p = &maps->entries.rb_node;
+	struct rb_node *parent = NULL;
+	const u64 ip = map->start;
+	struct map *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&map->rb_node, parent, p);
+	rb_insert_color(&map->rb_node, &maps->entries);
+	map__get(map);
+}
+
+void maps__insert(struct maps *maps, struct map *map)
+{
+	down_write(&maps->lock);
+	__maps__insert(maps, map);
+	up_write(&maps->lock);
+}
+
+static void __maps__remove(struct maps *maps, struct map *map)
+{
+	rb_erase_init(&map->rb_node, &maps->entries);
+	map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+	down_write(&maps->lock);
+	__maps__remove(maps, map);
+	up_write(&maps->lock);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+	struct rb_node **p, *parent = NULL;
+	struct map *m;
+
+	down_read(&maps->lock);
+
+	p = &maps->entries.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node);
+		if (ip < m->start)
+			p = &(*p)->rb_left;
+		else if (ip >= m->end)
+			p = &(*p)->rb_right;
+		else
+			goto out;
+	}
+
+	m = NULL;
+out:
+	up_read(&maps->lock);
+	return m;
+}
+
+struct map *maps__first(struct maps *maps)
+{
+	struct rb_node *first = rb_first(&maps->entries);
+
+	if (first)
+		return rb_entry(first, struct map, rb_node);
+	return NULL;
+}
+
+struct map *map__next(struct map *map)
+{
+	struct rb_node *next = rb_next(&map->rb_node);
+
+	if (next)
+		return rb_entry(next, struct map, rb_node);
+	return NULL;
+}
+
+struct kmap *map__kmap(struct map *map)
+{
+	if (!map->dso || !map->dso->kernel) {
+		pr_err("Internal error: map__kmap with a non-kernel map\n");
+		return NULL;
+	}
+	return (struct kmap *)(map + 1);
+}
+
+struct map_groups *map__kmaps(struct map *map)
+{
+	struct kmap *kmap = map__kmap(map);
+
+	if (!kmap || !kmap->kmaps) {
+		pr_err("Internal error: map__kmaps with a non-kernel map\n");
+		return NULL;
+	}
+	return kmap->kmaps;
+}
diff --git a/util/map.h b/util/map.h
new file mode 100644
index 0000000..0e9bbe0
--- /dev/null
+++ b/util/map.h
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/refcount.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include "rwsem.h"
+
+enum map_type {
+	MAP__FUNCTION = 0,
+	MAP__VARIABLE,
+};
+
+#define MAP__NR_TYPES (MAP__VARIABLE + 1)
+
+extern const char *map_type__name[MAP__NR_TYPES];
+
+struct dso;
+struct ip_callchain;
+struct ref_reloc_sym;
+struct map_groups;
+struct machine;
+struct perf_evsel;
+
+struct map {
+	union {
+		struct rb_node	rb_node;
+		struct list_head node;
+	};
+	u64			start;
+	u64			end;
+	u8 /* enum map_type */	type;
+	bool			erange_warned;
+	u32			priv;
+	u32			prot;
+	u32			flags;
+	u64			pgoff;
+	u64			reloc;
+	u32			maj, min; /* only valid for MMAP2 record */
+	u64			ino;      /* only valid for MMAP2 record */
+	u64			ino_generation;/* only valid for MMAP2 record */
+
+	/* ip -> dso rip */
+	u64			(*map_ip)(struct map *, u64);
+	/* dso rip -> ip */
+	u64			(*unmap_ip)(struct map *, u64);
+
+	struct dso		*dso;
+	struct map_groups	*groups;
+	refcount_t		refcnt;
+};
+
+struct kmap {
+	struct ref_reloc_sym	*ref_reloc_sym;
+	struct map_groups	*kmaps;
+};
+
+struct maps {
+	struct rb_root	 entries;
+	struct rw_semaphore lock;
+};
+
+struct map_groups {
+	struct maps	 maps[MAP__NR_TYPES];
+	struct machine	 *machine;
+	refcount_t	 refcnt;
+};
+
+struct map_groups *map_groups__new(struct machine *machine);
+void map_groups__delete(struct map_groups *mg);
+bool map_groups__empty(struct map_groups *mg);
+
+static inline struct map_groups *map_groups__get(struct map_groups *mg)
+{
+	if (mg)
+		refcount_inc(&mg->refcnt);
+	return mg;
+}
+
+void map_groups__put(struct map_groups *mg);
+
+struct kmap *map__kmap(struct map *map);
+struct map_groups *map__kmaps(struct map *map);
+
+static inline u64 map__map_ip(struct map *map, u64 ip)
+{
+	return ip - map->start + map->pgoff;
+}
+
+static inline u64 map__unmap_ip(struct map *map, u64 ip)
+{
+	return ip + map->start - map->pgoff;
+}
+
+static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
+{
+	return ip;
+}
+
+static inline size_t map__size(const struct map *map)
+{
+	return map->end - map->start;
+}
+
+/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
+u64 map__rip_2objdump(struct map *map, u64 rip);
+
+/* objdump address -> memory address */
+u64 map__objdump_2mem(struct map *map, u64 ip);
+
+struct symbol;
+struct thread;
+
+/* map__for_each_symbol - iterate over the symbols in the given map
+ *
+ * @map: the 'struct map *' in which symbols itereated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ * Note: caller must ensure map->dso is not NULL (map is loaded).
+ */
+#define map__for_each_symbol(map, pos, n)	\
+	dso__for_each_symbol(map->dso, pos, n, map->type)
+
+/* map__for_each_symbol_with_name - iterate over the symbols in the given map
+ *                                  that have the given name
+ *
+ * @map: the 'struct map *' in which symbols itereated
+ * @sym_name: the symbol name
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ */
+#define __map__for_each_symbol_by_name(map, sym_name, pos)	\
+	for (pos = map__find_symbol_by_name(map, sym_name);	\
+	     pos &&						\
+	     !symbol__match_symbol_name(pos->name, sym_name,	\
+					SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
+	     pos = symbol__next_by_name(pos))
+
+#define map__for_each_symbol_by_name(map, sym_name, pos)		\
+	__map__for_each_symbol_by_name(map, sym_name, (pos))
+
+void map__init(struct map *map, enum map_type type,
+	       u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+		     u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
+		     u64 ino_gen, u32 prot, u32 flags,
+		     char *filename, enum map_type type, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
+void map__delete(struct map *map);
+struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+	if (map)
+		refcount_inc(&map->refcnt);
+	return map;
+}
+
+void map__put(struct map *map);
+
+static inline void __map__zput(struct map **map)
+{
+	map__put(*map);
+	*map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
+int map__overlap(struct map *l, struct map *r);
+size_t map__fprintf(struct map *map, FILE *fp);
+size_t map__fprintf_dsoname(struct map *map, FILE *fp);
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+			 FILE *fp);
+
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
+void map__fixup_start(struct map *map);
+void map__fixup_end(struct map *map);
+
+void map__reloc_vmlinux(struct map *map);
+
+size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
+				  FILE *fp);
+void maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
+struct map *map__next(struct map *map);
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
+                                         struct map **mapp);
+void map_groups__init(struct map_groups *mg, struct machine *machine);
+void map_groups__exit(struct map_groups *mg);
+int map_groups__clone(struct thread *thread,
+		      struct map_groups *parent, enum map_type type);
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
+
+int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
+				     u64 addr);
+
+static inline void map_groups__insert(struct map_groups *mg, struct map *map)
+{
+	maps__insert(&mg->maps[map->type], map);
+	map->groups = mg;
+}
+
+static inline void map_groups__remove(struct map_groups *mg, struct map *map)
+{
+	maps__remove(&mg->maps[map->type], map);
+}
+
+static inline struct map *map_groups__find(struct map_groups *mg,
+					   enum map_type type, u64 addr)
+{
+	return maps__find(&mg->maps[type], addr);
+}
+
+static inline struct map *map_groups__first(struct map_groups *mg,
+					    enum map_type type)
+{
+	return maps__first(&mg->maps[type]);
+}
+
+static inline struct map *map_groups__next(struct map *map)
+{
+	return map__next(map);
+}
+
+struct symbol *map_groups__find_symbol(struct map_groups *mg,
+				       enum map_type type, u64 addr,
+				       struct map **mapp);
+
+struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
+					       enum map_type type,
+					       const char *name,
+					       struct map **mapp);
+
+struct addr_map_symbol;
+
+int map_groups__find_ams(struct addr_map_symbol *ams);
+
+static inline
+struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
+						 const char *name, struct map **mapp)
+{
+	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
+}
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+				   FILE *fp);
+
+struct map *map_groups__find_by_name(struct map_groups *mg,
+				     enum map_type type, const char *name);
+
+bool __map__is_kernel(const struct map *map);
+
+static inline bool __map__is_kmodule(const struct map *map)
+{
+	return !__map__is_kernel(map);
+}
+
+#endif /* __PERF_MAP_H */
diff --git a/util/mem-events.c b/util/mem-events.c
new file mode 100644
index 0000000..93f74d8
--- /dev/null
+++ b/util/mem-events.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include <linux/kernel.h>
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+#include "sort.h"
+
+unsigned int perf_mem_events__loads_ldlat = 30;
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"mem-loads"),
+	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
+};
+#undef E
+
+#undef E
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+char *perf_mem_events__name(int i)
+{
+	if (i == PERF_MEM_EVENTS__LOAD) {
+		if (!mem_loads_name__init) {
+			mem_loads_name__init = true;
+			scnprintf(mem_loads_name, sizeof(mem_loads_name),
+				  perf_mem_events[i].name,
+				  perf_mem_events__loads_ldlat);
+		}
+		return mem_loads_name;
+	}
+
+	return (char *)perf_mem_events[i].name;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+	char *tok, *saveptr = NULL;
+	bool found = false;
+	char *buf;
+	int j;
+
+	/* We need buffer that we know we can write to. */
+	buf = malloc(strlen(str) + 1);
+	if (!buf)
+		return -ENOMEM;
+
+	strcpy(buf, str);
+
+	tok = strtok_r((char *)buf, ",", &saveptr);
+
+	while (tok) {
+		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+			struct perf_mem_event *e = &perf_mem_events[j];
+
+			if (strstr(e->tag, tok))
+				e->record = found = true;
+		}
+
+		tok = strtok_r(NULL, ",", &saveptr);
+	}
+
+	free(buf);
+
+	if (found)
+		return 0;
+
+	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+	return -1;
+}
+
+int perf_mem_events__init(void)
+{
+	const char *mnt = sysfs__mount();
+	bool found = false;
+	int j;
+
+	if (!mnt)
+		return -ENOENT;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		char path[PATH_MAX];
+		struct perf_mem_event *e = &perf_mem_events[j];
+		struct stat st;
+
+		scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+			  mnt, e->sysfs_name);
+
+		if (!stat(path, &st))
+			e->supported = found = true;
+	}
+
+	return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"L2",
+	"Walker",
+	"Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t l = 0, i;
+	u64 m = PERF_MEM_TLB_NA;
+	u64 hit, miss;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_dtlb;
+
+	hit = m & PERF_MEM_TLB_HIT;
+	miss = m & PERF_MEM_TLB_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, tlb_access[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const mem_lvl[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"LFB",
+	"L2",
+	"L3",
+	"Local RAM",
+	"Remote RAM (1 hop)",
+	"Remote RAM (2 hops)",
+	"Remote Cache (1 hop)",
+	"Remote Cache (2 hops)",
+	"I/O",
+	"Uncached",
+};
+
+static const char * const mem_lvlnum[] = {
+	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
+	[PERF_MEM_LVLNUM_LFB] = "LFB",
+	[PERF_MEM_LVLNUM_RAM] = "RAM",
+	[PERF_MEM_LVLNUM_PMEM] = "PMEM",
+	[PERF_MEM_LVLNUM_NA] = "N/A",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m =  PERF_MEM_LVL_NA;
+	u64 hit, miss;
+	int printed;
+
+	if (mem_info)
+		m  = mem_info->data_src.mem_lvl;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	hit = m & PERF_MEM_LVL_HIT;
+	miss = m & PERF_MEM_LVL_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+
+	if (mem_info && mem_info->data_src.mem_remote) {
+		strcat(out, "Remote ");
+		l += 7;
+	}
+
+	printed = 0;
+	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (printed++) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, mem_lvl[i]);
+	}
+
+	if (mem_info && mem_info->data_src.mem_lvl_num) {
+		int lvl = mem_info->data_src.mem_lvl_num;
+		if (printed++) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		if (mem_lvlnum[lvl])
+			l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
+		else
+			l += scnprintf(out + l, sz - l, "L%d", lvl);
+	}
+
+	if (l == 0)
+		l += scnprintf(out + l, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const snoop_access[] = {
+	"N/A",
+	"None",
+	"Hit",
+	"Miss",
+	"HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m = PERF_MEM_SNOOP_NA;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_snoop;
+
+	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, snoop_access[i]);
+	}
+	if (mem_info &&
+	     (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, "Fwd");
+	}
+
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+
+	return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	u64 mask = PERF_MEM_LOCK_NA;
+	int l;
+
+	if (mem_info)
+		mask = mem_info->data_src.mem_lock;
+
+	if (mask & PERF_MEM_LOCK_NA)
+		l = scnprintf(out, sz, "N/A");
+	else if (mask & PERF_MEM_LOCK_LOCKED)
+		l = scnprintf(out, sz, "Yes");
+	else
+		l = scnprintf(out, sz, "No");
+
+	return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	int i = 0;
+
+	i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+	i += scnprintf(out + i, sz - i, "|SNP ");
+	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|TLB ");
+	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|LCK ");
+	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+	return i;
+}
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+	union perf_mem_data_src *data_src = &mi->data_src;
+	u64 daddr  = mi->daddr.addr;
+	u64 op     = data_src->mem_op;
+	u64 lvl    = data_src->mem_lvl;
+	u64 snoop  = data_src->mem_snoop;
+	u64 lock   = data_src->mem_lock;
+	/*
+	 * Skylake might report unknown remote level via this
+	 * bit, consider it when evaluating remote HITMs.
+	 */
+	bool mrem  = data_src->mem_remote;
+	int err = 0;
+
+#define HITM_INC(__f)		\
+do {				\
+	stats->__f++;		\
+	stats->tot_hitm++;	\
+} while (0)
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+	stats->nr_entries++;
+
+	if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+	if (op & P(OP, LOAD)) {
+		/* load */
+		stats->load++;
+
+		if (!daddr) {
+			stats->ld_noadrs++;
+			return -1;
+		}
+
+		if (lvl & P(LVL, HIT)) {
+			if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+			if (lvl & P(LVL, IO))  stats->ld_io++;
+			if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+			if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+			if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
+			if (lvl & P(LVL, L3 )) {
+				if (snoop & P(SNOOP, HITM))
+					HITM_INC(lcl_hitm);
+				else
+					stats->ld_llchit++;
+			}
+
+			if (lvl & P(LVL, LOC_RAM)) {
+				stats->lcl_dram++;
+				if (snoop & P(SNOOP, HIT))
+					stats->ld_shared++;
+				else
+					stats->ld_excl++;
+			}
+
+			if ((lvl & P(LVL, REM_RAM1)) ||
+			    (lvl & P(LVL, REM_RAM2)) ||
+			     mrem) {
+				stats->rmt_dram++;
+				if (snoop & P(SNOOP, HIT))
+					stats->ld_shared++;
+				else
+					stats->ld_excl++;
+			}
+		}
+
+		if ((lvl & P(LVL, REM_CCE1)) ||
+		    (lvl & P(LVL, REM_CCE2)) ||
+		     mrem) {
+			if (snoop & P(SNOOP, HIT))
+				stats->rmt_hit++;
+			else if (snoop & P(SNOOP, HITM))
+				HITM_INC(rmt_hitm);
+		}
+
+		if ((lvl & P(LVL, MISS)))
+			stats->ld_miss++;
+
+	} else if (op & P(OP, STORE)) {
+		/* store */
+		stats->store++;
+
+		if (!daddr) {
+			stats->st_noadrs++;
+			return -1;
+		}
+
+		if (lvl & P(LVL, HIT)) {
+			if (lvl & P(LVL, UNC)) stats->st_uncache++;
+			if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+		}
+		if (lvl & P(LVL, MISS))
+			if (lvl & P(LVL, L1)) stats->st_l1miss++;
+	} else {
+		/* unparsable data_src? */
+		stats->noparse++;
+		return -1;
+	}
+
+	if (!mi->daddr.map || !mi->iaddr.map) {
+		stats->nomap++;
+		return -1;
+	}
+
+#undef P
+#undef HITM_INC
+	return err;
+}
+
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
+{
+	stats->nr_entries	+= add->nr_entries;
+
+	stats->locks		+= add->locks;
+	stats->store		+= add->store;
+	stats->st_uncache	+= add->st_uncache;
+	stats->st_noadrs	+= add->st_noadrs;
+	stats->st_l1hit		+= add->st_l1hit;
+	stats->st_l1miss	+= add->st_l1miss;
+	stats->load		+= add->load;
+	stats->ld_excl		+= add->ld_excl;
+	stats->ld_shared	+= add->ld_shared;
+	stats->ld_uncache	+= add->ld_uncache;
+	stats->ld_io		+= add->ld_io;
+	stats->ld_miss		+= add->ld_miss;
+	stats->ld_noadrs	+= add->ld_noadrs;
+	stats->ld_fbhit		+= add->ld_fbhit;
+	stats->ld_l1hit		+= add->ld_l1hit;
+	stats->ld_l2hit		+= add->ld_l2hit;
+	stats->ld_llchit	+= add->ld_llchit;
+	stats->lcl_hitm		+= add->lcl_hitm;
+	stats->rmt_hitm		+= add->rmt_hitm;
+	stats->tot_hitm		+= add->tot_hitm;
+	stats->rmt_hit		+= add->rmt_hit;
+	stats->lcl_dram		+= add->lcl_dram;
+	stats->rmt_dram		+= add->rmt_dram;
+	stats->nomap		+= add->nomap;
+	stats->noparse		+= add->noparse;
+}
diff --git a/util/mem-events.h b/util/mem-events.h
new file mode 100644
index 0000000..a889ec2
--- /dev/null
+++ b/util/mem-events.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include "stat.h"
+
+struct perf_mem_event {
+	bool		record;
+	bool		supported;
+	const char	*tag;
+	const char	*name;
+	const char	*sysfs_name;
+};
+
+enum {
+	PERF_MEM_EVENTS__LOAD,
+	PERF_MEM_EVENTS__STORE,
+	PERF_MEM_EVENTS__MAX,
+};
+
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+extern unsigned int perf_mem_events__loads_ldlat;
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+struct c2c_stats {
+	u32	nr_entries;
+
+	u32	locks;               /* count of 'lock' transactions */
+	u32	store;               /* count of all stores in trace */
+	u32	st_uncache;          /* stores to uncacheable address */
+	u32	st_noadrs;           /* cacheable store with no address */
+	u32	st_l1hit;            /* count of stores that hit L1D */
+	u32	st_l1miss;           /* count of stores that miss L1D */
+	u32	load;                /* count of all loads in trace */
+	u32	ld_excl;             /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+	u32	ld_shared;           /* shared loads, rmt/lcl DRAM - snp hit */
+	u32	ld_uncache;          /* loads to uncacheable address */
+	u32	ld_io;               /* loads to io address */
+	u32	ld_miss;             /* loads miss */
+	u32	ld_noadrs;           /* cacheable load with no address */
+	u32	ld_fbhit;            /* count of loads hitting Fill Buffer */
+	u32	ld_l1hit;            /* count of loads that hit L1D */
+	u32	ld_l2hit;            /* count of loads that hit L2D */
+	u32	ld_llchit;           /* count of loads that hit LLC */
+	u32	lcl_hitm;            /* count of loads with local HITM  */
+	u32	rmt_hitm;            /* count of loads with remote HITM */
+	u32	tot_hitm;            /* count of loads with local and remote HITM */
+	u32	rmt_hit;             /* count of loads with remote hit clean; */
+	u32	lcl_dram;            /* count of loads miss to local DRAM */
+	u32	rmt_dram;            /* count of loads miss to remote DRAM */
+	u32	nomap;               /* count of load/stores with no phys adrs */
+	u32	noparse;             /* count of unparsable data sources */
+};
+
+struct hist_entry;
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
+
+#endif /* __PERF_MEM_EVENTS_H */
diff --git a/util/mem2node.c b/util/mem2node.c
new file mode 100644
index 0000000..c6fd81c
--- /dev/null
+++ b/util/mem2node.c
@@ -0,0 +1,134 @@
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitmap.h>
+#include "mem2node.h"
+#include "util.h"
+
+struct phys_entry {
+	struct rb_node	rb_node;
+	u64	start;
+	u64	end;
+	u64	node;
+};
+
+static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct phys_entry *e;
+
+	while (*p != NULL) {
+		parent = *p;
+		e = rb_entry(parent, struct phys_entry, rb_node);
+
+		if (entry->start < e->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&entry->rb_node, parent, p);
+	rb_insert_color(&entry->rb_node, root);
+}
+
+static void
+phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node)
+{
+	entry->start = start;
+	entry->end   = start + bsize;
+	entry->node  = node;
+	RB_CLEAR_NODE(&entry->rb_node);
+}
+
+int mem2node__init(struct mem2node *map, struct perf_env *env)
+{
+	struct memory_node *n, *nodes = &env->memory_nodes[0];
+	struct phys_entry *entries, *tmp_entries;
+	u64 bsize = env->memory_bsize;
+	int i, j = 0, max = 0;
+
+	memset(map, 0x0, sizeof(*map));
+	map->root = RB_ROOT;
+
+	for (i = 0; i < env->nr_memory_nodes; i++) {
+		n = &nodes[i];
+		max += bitmap_weight(n->set, n->size);
+	}
+
+	entries = zalloc(sizeof(*entries) * max);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < env->nr_memory_nodes; i++) {
+		u64 bit;
+
+		n = &nodes[i];
+
+		for (bit = 0; bit < n->size; bit++) {
+			u64 start;
+
+			if (!test_bit(bit, n->set))
+				continue;
+
+			start = bit * bsize;
+
+			/*
+			 * Merge nearby areas, we walk in order
+			 * through the bitmap, so no need to sort.
+			 */
+			if (j > 0) {
+				struct phys_entry *prev = &entries[j - 1];
+
+				if ((prev->end == start) &&
+				    (prev->node == n->node)) {
+					prev->end += bsize;
+					continue;
+				}
+			}
+
+			phys_entry__init(&entries[j++], start, bsize, n->node);
+		}
+	}
+
+	/* Cut unused entries, due to merging. */
+	tmp_entries = realloc(entries, sizeof(*entries) * j);
+	if (tmp_entries)
+		entries = tmp_entries;
+
+	for (i = 0; i < j; i++) {
+		pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n",
+			 entries[i].node, entries[i].start, entries[i].end);
+
+		phys_entry__insert(&entries[i], &map->root);
+	}
+
+	map->entries = entries;
+	return 0;
+}
+
+void mem2node__exit(struct mem2node *map)
+{
+	zfree(&map->entries);
+}
+
+int mem2node__node(struct mem2node *map, u64 addr)
+{
+	struct rb_node **p, *parent = NULL;
+	struct phys_entry *entry;
+
+	p = &map->root.rb_node;
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct phys_entry, rb_node);
+		if (addr < entry->start)
+			p = &(*p)->rb_left;
+		else if (addr >= entry->end)
+			p = &(*p)->rb_right;
+		else
+			goto out;
+	}
+
+	entry = NULL;
+out:
+	return entry ? (int) entry->node : -1;
+}
diff --git a/util/mem2node.h b/util/mem2node.h
new file mode 100644
index 0000000..59c4752
--- /dev/null
+++ b/util/mem2node.h
@@ -0,0 +1,19 @@
+#ifndef __MEM2NODE_H
+#define __MEM2NODE_H
+
+#include <linux/rbtree.h>
+#include "env.h"
+
+struct phys_entry;
+
+struct mem2node {
+	struct rb_root		 root;
+	struct phys_entry	*entries;
+	int			 cnt;
+};
+
+int  mem2node__init(struct mem2node *map, struct perf_env *env);
+void mem2node__exit(struct mem2node *map);
+int  mem2node__node(struct mem2node *map, u64 addr);
+
+#endif /* __MEM2NODE_H */
diff --git a/util/memswap.c b/util/memswap.c
new file mode 100644
index 0000000..c1317e4
--- /dev/null
+++ b/util/memswap.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <byteswap.h>
+#include "memswap.h"
+#include <linux/types.h>
+
+void mem_bswap_32(void *src, int byte_size)
+{
+	u32 *m = src;
+	while (byte_size > 0) {
+		*m = bswap_32(*m);
+		byte_size -= sizeof(u32);
+		++m;
+	}
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+	u64 *m = src;
+
+	while (byte_size > 0) {
+		*m = bswap_64(*m);
+		byte_size -= sizeof(u64);
+		++m;
+	}
+}
diff --git a/util/memswap.h b/util/memswap.h
new file mode 100644
index 0000000..1e29ff9
--- /dev/null
+++ b/util/memswap.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_MEMSWAP_H_
+#define PERF_MEMSWAP_H_
+
+void mem_bswap_64(void *src, int byte_size);
+void mem_bswap_32(void *src, int byte_size);
+
+#endif /* PERF_MEMSWAP_H_ */
diff --git a/util/metricgroup.c b/util/metricgroup.c
new file mode 100644
index 0000000..1ddc3d1
--- /dev/null
+++ b/util/metricgroup.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+/* Manage metrics and groups of metrics from JSON files */
+
+#include "metricgroup.h"
+#include "evlist.h"
+#include "strbuf.h"
+#include "pmu.h"
+#include "expr.h"
+#include "rblist.h"
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include "pmu-events/pmu-events.h"
+#include "strlist.h"
+#include <assert.h>
+#include <ctype.h>
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+					 struct perf_evsel *evsel,
+					 bool create)
+{
+	struct rb_node *nd;
+	struct metric_event me = {
+		.evsel = evsel
+	};
+
+	if (!metric_events)
+		return NULL;
+
+	nd = rblist__find(metric_events, &me);
+	if (nd)
+		return container_of(nd, struct metric_event, nd);
+	if (create) {
+		rblist__add_node(metric_events, &me);
+		nd = rblist__find(metric_events, &me);
+		if (nd)
+			return container_of(nd, struct metric_event, nd);
+	}
+	return NULL;
+}
+
+static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
+{
+	struct metric_event *a = container_of(rb_node,
+					      struct metric_event,
+					      nd);
+	const struct metric_event *b = entry;
+
+	if (a->evsel == b->evsel)
+		return 0;
+	if ((char *)a->evsel < (char *)b->evsel)
+		return -1;
+	return +1;
+}
+
+static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
+					const void *entry)
+{
+	struct metric_event *me = malloc(sizeof(struct metric_event));
+
+	if (!me)
+		return NULL;
+	memcpy(me, entry, sizeof(struct metric_event));
+	me->evsel = ((struct metric_event *)entry)->evsel;
+	INIT_LIST_HEAD(&me->head);
+	return &me->nd;
+}
+
+static void metricgroup__rblist_init(struct rblist *metric_events)
+{
+	rblist__init(metric_events);
+	metric_events->node_cmp = metric_event_cmp;
+	metric_events->node_new = metric_event_new;
+}
+
+struct egroup {
+	struct list_head nd;
+	int idnum;
+	const char **ids;
+	const char *metric_name;
+	const char *metric_expr;
+};
+
+static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist,
+				     const char **ids,
+				     int idnum,
+				     struct perf_evsel **metric_events)
+{
+	struct perf_evsel *ev, *start = NULL;
+	int ind = 0;
+
+	evlist__for_each_entry (perf_evlist, ev) {
+		if (!strcmp(ev->name, ids[ind])) {
+			metric_events[ind] = ev;
+			if (ind == 0)
+				start = ev;
+			if (++ind == idnum) {
+				metric_events[ind] = NULL;
+				return start;
+			}
+		} else {
+			ind = 0;
+			start = NULL;
+		}
+	}
+	/*
+	 * This can happen when an alias expands to multiple
+	 * events, like for uncore events.
+	 * We don't support this case for now.
+	 */
+	return NULL;
+}
+
+static int metricgroup__setup_events(struct list_head *groups,
+				     struct perf_evlist *perf_evlist,
+				     struct rblist *metric_events_list)
+{
+	struct metric_event *me;
+	struct metric_expr *expr;
+	int i = 0;
+	int ret = 0;
+	struct egroup *eg;
+	struct perf_evsel *evsel;
+
+	list_for_each_entry (eg, groups, nd) {
+		struct perf_evsel **metric_events;
+
+		metric_events = calloc(sizeof(void *), eg->idnum + 1);
+		if (!metric_events) {
+			ret = -ENOMEM;
+			break;
+		}
+		evsel = find_evsel(perf_evlist, eg->ids, eg->idnum,
+				   metric_events);
+		if (!evsel) {
+			pr_debug("Cannot resolve %s: %s\n",
+					eg->metric_name, eg->metric_expr);
+			continue;
+		}
+		for (i = 0; i < eg->idnum; i++)
+			metric_events[i]->collect_stat = true;
+		me = metricgroup__lookup(metric_events_list, evsel, true);
+		if (!me) {
+			ret = -ENOMEM;
+			break;
+		}
+		expr = malloc(sizeof(struct metric_expr));
+		if (!expr) {
+			ret = -ENOMEM;
+			break;
+		}
+		expr->metric_expr = eg->metric_expr;
+		expr->metric_name = eg->metric_name;
+		expr->metric_events = metric_events;
+		list_add(&expr->nd, &me->head);
+	}
+	return ret;
+}
+
+static bool match_metric(const char *n, const char *list)
+{
+	int len;
+	char *m;
+
+	if (!list)
+		return false;
+	if (!strcmp(list, "all"))
+		return true;
+	if (!n)
+		return !strcasecmp(list, "No_group");
+	len = strlen(list);
+	m = strcasestr(n, list);
+	if (!m)
+		return false;
+	if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
+	    (m[len] == 0 || m[len] == ';'))
+		return true;
+	return false;
+}
+
+struct mep {
+	struct rb_node nd;
+	const char *name;
+	struct strlist *metrics;
+};
+
+static int mep_cmp(struct rb_node *rb_node, const void *entry)
+{
+	struct mep *a = container_of(rb_node, struct mep, nd);
+	struct mep *b = (struct mep *)entry;
+
+	return strcmp(a->name, b->name);
+}
+
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
+					const void *entry)
+{
+	struct mep *me = malloc(sizeof(struct mep));
+
+	if (!me)
+		return NULL;
+	memcpy(me, entry, sizeof(struct mep));
+	me->name = strdup(me->name);
+	if (!me->name)
+		goto out_me;
+	me->metrics = strlist__new(NULL, NULL);
+	if (!me->metrics)
+		goto out_name;
+	return &me->nd;
+out_name:
+	free((char *)me->name);
+out_me:
+	free(me);
+	return NULL;
+}
+
+static struct mep *mep_lookup(struct rblist *groups, const char *name)
+{
+	struct rb_node *nd;
+	struct mep me = {
+		.name = name
+	};
+	nd = rblist__find(groups, &me);
+	if (nd)
+		return container_of(nd, struct mep, nd);
+	rblist__add_node(groups, &me);
+	nd = rblist__find(groups, &me);
+	if (nd)
+		return container_of(nd, struct mep, nd);
+	return NULL;
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+		       struct rb_node *nd)
+{
+	struct mep *me = container_of(nd, struct mep, nd);
+
+	strlist__delete(me->metrics);
+	free((void *)me->name);
+	free(me);
+}
+
+static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
+{
+	struct str_node *sn;
+	int n = 0;
+
+	strlist__for_each_entry (sn, metrics) {
+		if (raw)
+			printf("%s%s", n > 0 ? " " : "", sn->s);
+		else
+			printf("  %s\n", sn->s);
+		n++;
+	}
+	if (raw)
+		putchar('\n');
+}
+
+void metricgroup__print(bool metrics, bool metricgroups, char *filter,
+			bool raw)
+{
+	struct pmu_events_map *map = perf_pmu__find_map(NULL);
+	struct pmu_event *pe;
+	int i;
+	struct rblist groups;
+	struct rb_node *node, *next;
+	struct strlist *metriclist = NULL;
+
+	if (!map)
+		return;
+
+	if (!metricgroups) {
+		metriclist = strlist__new(NULL, NULL);
+		if (!metriclist)
+			return;
+	}
+
+	rblist__init(&groups);
+	groups.node_new = mep_new;
+	groups.node_cmp = mep_cmp;
+	groups.node_delete = mep_delete;
+	for (i = 0; ; i++) {
+		const char *g;
+		pe = &map->table[i];
+
+		if (!pe->name && !pe->metric_group && !pe->metric_name)
+			break;
+		if (!pe->metric_expr)
+			continue;
+		g = pe->metric_group;
+		if (!g && pe->metric_name) {
+			if (pe->name)
+				continue;
+			g = "No_group";
+		}
+		if (g) {
+			char *omg;
+			char *mg = strdup(g);
+
+			if (!mg)
+				return;
+			omg = mg;
+			while ((g = strsep(&mg, ";")) != NULL) {
+				struct mep *me;
+				char *s;
+
+				if (*g == 0)
+					g = "No_group";
+				while (isspace(*g))
+					g++;
+				if (filter && !strstr(g, filter))
+					continue;
+				if (raw)
+					s = (char *)pe->metric_name;
+				else {
+					if (asprintf(&s, "%s\n\t[%s]",
+						     pe->metric_name, pe->desc) < 0)
+						return;
+				}
+
+				if (!s)
+					continue;
+
+				if (!metricgroups) {
+					strlist__add(metriclist, s);
+				} else {
+					me = mep_lookup(&groups, g);
+					if (!me)
+						continue;
+					strlist__add(me->metrics, s);
+				}
+			}
+			free(omg);
+		}
+	}
+
+	if (metricgroups && !raw)
+		printf("\nMetric Groups:\n\n");
+	else if (metrics && !raw)
+		printf("\nMetrics:\n\n");
+
+	for (node = rb_first(&groups.entries); node; node = next) {
+		struct mep *me = container_of(node, struct mep, nd);
+
+		if (metricgroups)
+			printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n");
+		if (metrics)
+			metricgroup__print_strlist(me->metrics, raw);
+		next = rb_next(node);
+		rblist__remove_node(&groups, node);
+	}
+	if (!metricgroups)
+		metricgroup__print_strlist(metriclist, raw);
+	strlist__delete(metriclist);
+}
+
+static int metricgroup__add_metric(const char *metric, struct strbuf *events,
+				   struct list_head *group_list)
+{
+	struct pmu_events_map *map = perf_pmu__find_map(NULL);
+	struct pmu_event *pe;
+	int ret = -EINVAL;
+	int i, j;
+
+	if (!map)
+		return 0;
+
+	for (i = 0; ; i++) {
+		pe = &map->table[i];
+
+		if (!pe->name && !pe->metric_group && !pe->metric_name)
+			break;
+		if (!pe->metric_expr)
+			continue;
+		if (match_metric(pe->metric_group, metric) ||
+		    match_metric(pe->metric_name, metric)) {
+			const char **ids;
+			int idnum;
+			struct egroup *eg;
+
+			pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+			if (expr__find_other(pe->metric_expr,
+					     NULL, &ids, &idnum) < 0)
+				continue;
+			if (events->len > 0)
+				strbuf_addf(events, ",");
+			for (j = 0; j < idnum; j++) {
+				pr_debug("found event %s\n", ids[j]);
+				strbuf_addf(events, "%s%s",
+					j == 0 ? "{" : ",",
+					ids[j]);
+			}
+			strbuf_addf(events, "}:W");
+
+			eg = malloc(sizeof(struct egroup));
+			if (!eg) {
+				ret = -ENOMEM;
+				break;
+			}
+			eg->ids = ids;
+			eg->idnum = idnum;
+			eg->metric_name = pe->metric_name;
+			eg->metric_expr = pe->metric_expr;
+			list_add_tail(&eg->nd, group_list);
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
+				        struct list_head *group_list)
+{
+	char *llist, *nlist, *p;
+	int ret = -EINVAL;
+
+	nlist = strdup(list);
+	if (!nlist)
+		return -ENOMEM;
+	llist = nlist;
+
+	strbuf_init(events, 100);
+	strbuf_addf(events, "%s", "");
+
+	while ((p = strsep(&llist, ",")) != NULL) {
+		ret = metricgroup__add_metric(p, events, group_list);
+		if (ret == -EINVAL) {
+			fprintf(stderr, "Cannot find metric or group `%s'\n",
+					p);
+			break;
+		}
+	}
+	free(nlist);
+	return ret;
+}
+
+static void metricgroup__free_egroups(struct list_head *group_list)
+{
+	struct egroup *eg, *egtmp;
+	int i;
+
+	list_for_each_entry_safe (eg, egtmp, group_list, nd) {
+		for (i = 0; i < eg->idnum; i++)
+			free((char *)eg->ids[i]);
+		free(eg->ids);
+		free(eg);
+	}
+}
+
+int metricgroup__parse_groups(const struct option *opt,
+			   const char *str,
+			   struct rblist *metric_events)
+{
+	struct parse_events_error parse_error;
+	struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value;
+	struct strbuf extra_events;
+	LIST_HEAD(group_list);
+	int ret;
+
+	if (metric_events->nr_entries == 0)
+		metricgroup__rblist_init(metric_events);
+	ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
+	if (ret)
+		return ret;
+	pr_debug("adding %s\n", extra_events.buf);
+	memset(&parse_error, 0, sizeof(struct parse_events_error));
+	ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+	if (ret) {
+		parse_events_print_error(&parse_error, extra_events.buf);
+		goto out;
+	}
+	strbuf_release(&extra_events);
+	ret = metricgroup__setup_events(&group_list, perf_evlist,
+					metric_events);
+out:
+	metricgroup__free_egroups(&group_list);
+	return ret;
+}
diff --git a/util/metricgroup.h b/util/metricgroup.h
new file mode 100644
index 0000000..06854e1
--- /dev/null
+++ b/util/metricgroup.h
@@ -0,0 +1,31 @@
+#ifndef METRICGROUP_H
+#define METRICGROUP_H 1
+
+#include "linux/list.h"
+#include "rblist.h"
+#include <subcmd/parse-options.h>
+#include "evlist.h"
+#include "strbuf.h"
+
+struct metric_event {
+	struct rb_node nd;
+	struct perf_evsel *evsel;
+	struct list_head head; /* list of metric_expr */
+};
+
+struct metric_expr {
+	struct list_head nd;
+	const char *metric_expr;
+	const char *metric_name;
+	struct perf_evsel **metric_events;
+};
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+					 struct perf_evsel *evsel,
+					 bool create);
+int metricgroup__parse_groups(const struct option *opt,
+			const char *str,
+			struct rblist *metric_events);
+
+void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
+#endif
diff --git a/util/mmap.c b/util/mmap.c
new file mode 100644
index 0000000..fc83267
--- /dev/null
+++ b/util/mmap.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include "debug.h"
+#include "event.h"
+#include "mmap.h"
+#include "util.h" /* page_size */
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map)
+{
+	return map->mask + 1 + page_size;
+}
+
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
+					 u64 *startp, u64 end)
+{
+	unsigned char *data = map->base + page_size;
+	union perf_event *event = NULL;
+	int diff = end - *startp;
+
+	if (diff >= (int)sizeof(event->header)) {
+		size_t size;
+
+		event = (union perf_event *)&data[*startp & map->mask];
+		size = event->header.size;
+
+		if (size < sizeof(event->header) || diff < (int)size)
+			return NULL;
+
+		/*
+		 * Event straddles the mmap boundary -- header should always
+		 * be inside due to u64 alignment of output.
+		 */
+		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+			unsigned int offset = *startp;
+			unsigned int len = min(sizeof(*event), size), cpy;
+			void *dst = map->event_copy;
+
+			do {
+				cpy = min(map->mask + 1 - (offset & map->mask), len);
+				memcpy(dst, &data[offset & map->mask], cpy);
+				offset += cpy;
+				dst += cpy;
+				len -= cpy;
+			} while (len);
+
+			event = (union perf_event *)map->event_copy;
+		}
+
+		*startp += size;
+	}
+
+	return event;
+}
+
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ *	//process the event
+ *	perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map)
+{
+	union perf_event *event;
+
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return NULL;
+
+	/* non-overwirte doesn't pause the ringbuffer */
+	if (!map->overwrite)
+		map->end = perf_mmap__read_head(map);
+
+	event = perf_mmap__read(map, &map->start, map->end);
+
+	if (!map->overwrite)
+		map->prev = map->start;
+
+	return event;
+}
+
+static bool perf_mmap__empty(struct perf_mmap *map)
+{
+	return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
+}
+
+void perf_mmap__get(struct perf_mmap *map)
+{
+	refcount_inc(&map->refcnt);
+}
+
+void perf_mmap__put(struct perf_mmap *map)
+{
+	BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
+
+	if (refcount_dec_and_test(&map->refcnt))
+		perf_mmap__munmap(map);
+}
+
+void perf_mmap__consume(struct perf_mmap *map)
+{
+	if (!map->overwrite) {
+		u64 old = map->prev;
+
+		perf_mmap__write_tail(map, old);
+	}
+
+	if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
+		perf_mmap__put(map);
+}
+
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+			       struct auxtrace_mmap_params *mp __maybe_unused,
+			       void *userpg __maybe_unused,
+			       int fd __maybe_unused)
+{
+	return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
+				       off_t auxtrace_offset __maybe_unused,
+				       unsigned int auxtrace_pages __maybe_unused,
+				       bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
+					  struct perf_evlist *evlist __maybe_unused,
+					  int idx __maybe_unused,
+					  bool per_cpu __maybe_unused)
+{
+}
+
+void perf_mmap__munmap(struct perf_mmap *map)
+{
+	if (map->base != NULL) {
+		munmap(map->base, perf_mmap__mmap_len(map));
+		map->base = NULL;
+		map->fd = -1;
+		refcount_set(&map->refcnt, 0);
+	}
+	auxtrace_mmap__munmap(&map->auxtrace_mmap);
+}
+
+int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
+{
+	/*
+	 * The last one will be done at perf_mmap__consume(), so that we
+	 * make sure we don't prevent tools from consuming every last event in
+	 * the ring buffer.
+	 *
+	 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
+	 * anymore, but the last events for it are still in the ring buffer,
+	 * waiting to be consumed.
+	 *
+	 * Tools can chose to ignore this at their own discretion, but the
+	 * evlist layer can't just drop it when filtering events in
+	 * perf_evlist__filter_pollfd().
+	 */
+	refcount_set(&map->refcnt, 2);
+	map->prev = 0;
+	map->mask = mp->mask;
+	map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+			 MAP_SHARED, fd, 0);
+	if (map->base == MAP_FAILED) {
+		pr_debug2("failed to mmap perf event ring buffer, error %d\n",
+			  errno);
+		map->base = NULL;
+		return -1;
+	}
+	map->fd = fd;
+
+	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
+				&mp->auxtrace_mp, map->base, fd))
+		return -1;
+
+	return 0;
+}
+
+static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
+{
+	struct perf_event_header *pheader;
+	u64 evt_head = *start;
+	int size = mask + 1;
+
+	pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
+	pheader = (struct perf_event_header *)(buf + (*start & mask));
+	while (true) {
+		if (evt_head - *start >= (unsigned int)size) {
+			pr_debug("Finished reading overwrite ring buffer: rewind\n");
+			if (evt_head - *start > (unsigned int)size)
+				evt_head -= pheader->size;
+			*end = evt_head;
+			return 0;
+		}
+
+		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
+
+		if (pheader->size == 0) {
+			pr_debug("Finished reading overwrite ring buffer: get start\n");
+			*end = evt_head;
+			return 0;
+		}
+
+		evt_head += pheader->size;
+		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
+	}
+	WARN_ONCE(1, "Shouldn't get here\n");
+	return -1;
+}
+
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+static int __perf_mmap__read_init(struct perf_mmap *md)
+{
+	u64 head = perf_mmap__read_head(md);
+	u64 old = md->prev;
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+
+	md->start = md->overwrite ? head : old;
+	md->end = md->overwrite ? old : head;
+
+	if (md->start == md->end)
+		return -EAGAIN;
+
+	size = md->end - md->start;
+	if (size > (unsigned long)(md->mask) + 1) {
+		if (!md->overwrite) {
+			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+
+			md->prev = head;
+			perf_mmap__consume(md);
+			return -EAGAIN;
+		}
+
+		/*
+		 * Backward ring buffer is full. We still have a chance to read
+		 * most of data from it.
+		 */
+		if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int perf_mmap__read_init(struct perf_mmap *map)
+{
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return -ENOENT;
+
+	return __perf_mmap__read_init(map);
+}
+
+int perf_mmap__push(struct perf_mmap *md, void *to,
+		    int push(void *to, void *buf, size_t size))
+{
+	u64 head = perf_mmap__read_head(md);
+	unsigned char *data = md->base + page_size;
+	unsigned long size;
+	void *buf;
+	int rc = 0;
+
+	rc = perf_mmap__read_init(md);
+	if (rc < 0)
+		return (rc == -EAGAIN) ? 0 : -1;
+
+	size = md->end - md->start;
+
+	if ((md->start & md->mask) + size != (md->end & md->mask)) {
+		buf = &data[md->start & md->mask];
+		size = md->mask + 1 - (md->start & md->mask);
+		md->start += size;
+
+		if (push(to, buf, size) < 0) {
+			rc = -1;
+			goto out;
+		}
+	}
+
+	buf = &data[md->start & md->mask];
+	size = md->end - md->start;
+	md->start += size;
+
+	if (push(to, buf, size) < 0) {
+		rc = -1;
+		goto out;
+	}
+
+	md->prev = head;
+	perf_mmap__consume(md);
+out:
+	return rc;
+}
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!refcount_read(&map->refcnt))
+		return;
+
+	map->prev = perf_mmap__read_head(map);
+}
diff --git a/util/mmap.h b/util/mmap.h
new file mode 100644
index 0000000..d82294d
--- /dev/null
+++ b/util/mmap.h
@@ -0,0 +1,101 @@
+#ifndef __PERF_MMAP_H
+#define __PERF_MMAP_H 1
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <stdbool.h>
+#include "auxtrace.h"
+#include "event.h"
+
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct perf_mmap {
+	void		 *base;
+	int		 mask;
+	int		 fd;
+	refcount_t	 refcnt;
+	u64		 prev;
+	u64		 start;
+	u64		 end;
+	bool		 overwrite;
+	struct auxtrace_mmap auxtrace_mmap;
+	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+};
+
+/*
+ * State machine of bkw_mmap_state:
+ *
+ *                     .________________(forbid)_____________.
+ *                     |                                     V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ *                     ^  ^              |   ^               |
+ *                     |  |__(forbid)____/   |___(forbid)___/|
+ *                     |                                     |
+ *                      \_________________(3)_______________/
+ *
+ * NOTREADY     : Backward ring buffers are not ready
+ * RUNNING      : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY        : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+	BKW_MMAP_NOTREADY,
+	BKW_MMAP_RUNNING,
+	BKW_MMAP_DATA_PENDING,
+	BKW_MMAP_EMPTY,
+};
+
+struct mmap_params {
+	int			    prot, mask;
+	struct auxtrace_mmap_params auxtrace_mp;
+};
+
+int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd);
+void perf_mmap__munmap(struct perf_mmap *map);
+
+void perf_mmap__get(struct perf_mmap *map);
+void perf_mmap__put(struct perf_mmap *map);
+
+void perf_mmap__consume(struct perf_mmap *map);
+
+static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->base;
+	u64 head = READ_ONCE(pc->data_head);
+	rmb();
+	return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
+{
+	struct perf_event_mmap_page *pc = md->base;
+
+	/*
+	 * ensure all reads are done before we write the tail out.
+	 */
+	mb();
+	pc->data_tail = tail;
+}
+
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
+
+union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+
+int perf_mmap__push(struct perf_mmap *md, void *to,
+		    int push(void *to, void *buf, size_t size));
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map);
+
+int perf_mmap__read_init(struct perf_mmap *md);
+void perf_mmap__read_done(struct perf_mmap *map);
+#endif /*__PERF_MMAP_H */
diff --git a/util/namespaces.c b/util/namespaces.c
new file mode 100644
index 0000000..5be0217
--- /dev/null
+++ b/util/namespaces.c
@@ -0,0 +1,249 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "util.h"
+#include "event.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+struct namespaces *namespaces__new(struct namespaces_event *event)
+{
+	struct namespaces *namespaces;
+	u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+			      sizeof(struct perf_ns_link_info));
+
+	namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+	if (!namespaces)
+		return NULL;
+
+	namespaces->end_time = -1;
+
+	if (event)
+		memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+	return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+	free(namespaces);
+}
+
+int nsinfo__init(struct nsinfo *nsi)
+{
+	char oldns[PATH_MAX];
+	char spath[PATH_MAX];
+	char *newns = NULL;
+	char *statln = NULL;
+	struct stat old_stat;
+	struct stat new_stat;
+	FILE *f = NULL;
+	size_t linesz = 0;
+	int rv = -1;
+
+	if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+		return rv;
+
+	if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1)
+		return rv;
+
+	if (stat(oldns, &old_stat) < 0)
+		goto out;
+
+	if (stat(newns, &new_stat) < 0)
+		goto out;
+
+	/* Check if the mount namespaces differ, if so then indicate that we
+	 * want to switch as part of looking up dso/map data.
+	 */
+	if (old_stat.st_ino != new_stat.st_ino) {
+		nsi->need_setns = true;
+		nsi->mntns_path = newns;
+		newns = NULL;
+	}
+
+	/* If we're dealing with a process that is in a different PID namespace,
+	 * attempt to work out the innermost tgid for the process.
+	 */
+	if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
+		goto out;
+
+	f = fopen(spath, "r");
+	if (f == NULL)
+		goto out;
+
+	while (getline(&statln, &linesz, f) != -1) {
+		/* Use tgid if CONFIG_PID_NS is not defined. */
+		if (strstr(statln, "Tgid:") != NULL) {
+			nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+						     NULL, 10);
+			nsi->nstgid = nsi->tgid;
+		}
+
+		if (strstr(statln, "NStgid:") != NULL) {
+			nsi->nstgid = (pid_t)strtol(strrchr(statln, '\t'),
+						     NULL, 10);
+			break;
+		}
+	}
+	rv = 0;
+
+out:
+	if (f != NULL)
+		(void) fclose(f);
+	free(statln);
+	free(newns);
+	return rv;
+}
+
+struct nsinfo *nsinfo__new(pid_t pid)
+{
+	struct nsinfo *nsi;
+
+	if (pid == 0)
+		return NULL;
+
+	nsi = calloc(1, sizeof(*nsi));
+	if (nsi != NULL) {
+		nsi->pid = pid;
+		nsi->tgid = pid;
+		nsi->nstgid = pid;
+		nsi->need_setns = false;
+		/* Init may fail if the process exits while we're trying to look
+		 * at its proc information.  In that case, save the pid but
+		 * don't try to enter the namespace.
+		 */
+		if (nsinfo__init(nsi) == -1)
+			nsi->need_setns = false;
+
+		refcount_set(&nsi->refcnt, 1);
+	}
+
+	return nsi;
+}
+
+struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
+{
+	struct nsinfo *nnsi;
+
+	nnsi = calloc(1, sizeof(*nnsi));
+	if (nnsi != NULL) {
+		nnsi->pid = nsi->pid;
+		nnsi->tgid = nsi->tgid;
+		nnsi->nstgid = nsi->nstgid;
+		nnsi->need_setns = nsi->need_setns;
+		if (nsi->mntns_path) {
+			nnsi->mntns_path = strdup(nsi->mntns_path);
+			if (!nnsi->mntns_path) {
+				free(nnsi);
+				return NULL;
+			}
+		}
+		refcount_set(&nnsi->refcnt, 1);
+	}
+
+	return nnsi;
+}
+
+void nsinfo__delete(struct nsinfo *nsi)
+{
+	zfree(&nsi->mntns_path);
+	free(nsi);
+}
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi)
+{
+	if (nsi)
+		refcount_inc(&nsi->refcnt);
+	return nsi;
+}
+
+void nsinfo__put(struct nsinfo *nsi)
+{
+	if (nsi && refcount_dec_and_test(&nsi->refcnt))
+		nsinfo__delete(nsi);
+}
+
+void nsinfo__mountns_enter(struct nsinfo *nsi,
+				  struct nscookie *nc)
+{
+	char curpath[PATH_MAX];
+	int oldns = -1;
+	int newns = -1;
+
+	if (nc == NULL)
+		return;
+
+	nc->oldns = -1;
+	nc->newns = -1;
+
+	if (!nsi || !nsi->need_setns)
+		return;
+
+	if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+		return;
+
+	oldns = open(curpath, O_RDONLY);
+	if (oldns < 0)
+		return;
+
+	newns = open(nsi->mntns_path, O_RDONLY);
+	if (newns < 0)
+		goto errout;
+
+	if (setns(newns, CLONE_NEWNS) < 0)
+		goto errout;
+
+	nc->oldns = oldns;
+	nc->newns = newns;
+	return;
+
+errout:
+	if (oldns > -1)
+		close(oldns);
+	if (newns > -1)
+		close(newns);
+}
+
+void nsinfo__mountns_exit(struct nscookie *nc)
+{
+	if (nc == NULL || nc->oldns == -1 || nc->newns == -1)
+		return;
+
+	setns(nc->oldns, CLONE_NEWNS);
+
+	if (nc->oldns > -1) {
+		close(nc->oldns);
+		nc->oldns = -1;
+	}
+
+	if (nc->newns > -1) {
+		close(nc->newns);
+		nc->newns = -1;
+	}
+}
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi)
+{
+	char *rpath;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	rpath = realpath(path, NULL);
+	nsinfo__mountns_exit(&nsc);
+
+	return rpath;
+}
diff --git a/util/namespaces.h b/util/namespaces.h
new file mode 100644
index 0000000..760558d
--- /dev/null
+++ b/util/namespaces.h
@@ -0,0 +1,65 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include <sys/types.h>
+#include <linux/perf_event.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+
+struct namespaces_event;
+
+struct namespaces {
+	struct list_head list;
+	u64 end_time;
+	struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct namespaces_event *event);
+void namespaces__free(struct namespaces *namespaces);
+
+struct nsinfo {
+	pid_t			pid;
+	pid_t			tgid;
+	pid_t			nstgid;
+	bool			need_setns;
+	char			*mntns_path;
+	refcount_t		refcnt;
+};
+
+struct nscookie {
+	int			oldns;
+	int			newns;
+};
+
+int nsinfo__init(struct nsinfo *nsi);
+struct nsinfo *nsinfo__new(pid_t pid);
+struct nsinfo *nsinfo__copy(struct nsinfo *nsi);
+void nsinfo__delete(struct nsinfo *nsi);
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi);
+void nsinfo__put(struct nsinfo *nsi);
+
+void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
+void nsinfo__mountns_exit(struct nscookie *nc);
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
+
+static inline void __nsinfo__zput(struct nsinfo **nsip)
+{
+	if (nsip) {
+		nsinfo__put(*nsip);
+		*nsip = NULL;
+	}
+}
+
+#define nsinfo__zput(nsi) __nsinfo__zput(&nsi)
+
+#endif  /* __PERF_NAMESPACES_H */
diff --git a/util/ordered-events.c b/util/ordered-events.c
new file mode 100644
index 0000000..bad9e02
--- /dev/null
+++ b/util/ordered-events.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include "ordered-events.h"
+#include "session.h"
+#include "asm/bug.h"
+#include "debug.h"
+
+#define pr_N(n, fmt, ...) \
+	eprintf(n, debug_ordered_events, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+
+static void queue_event(struct ordered_events *oe, struct ordered_event *new)
+{
+	struct ordered_event *last = oe->last;
+	u64 timestamp = new->timestamp;
+	struct list_head *p;
+
+	++oe->nr_events;
+	oe->last = new;
+
+	pr_oe_time2(timestamp, "queue_event nr_events %u\n", oe->nr_events);
+
+	if (!last) {
+		list_add(&new->list, &oe->events);
+		oe->max_timestamp = timestamp;
+		return;
+	}
+
+	/*
+	 * last event might point to some random place in the list as it's
+	 * the last queued event. We expect that the new event is close to
+	 * this.
+	 */
+	if (last->timestamp <= timestamp) {
+		while (last->timestamp <= timestamp) {
+			p = last->list.next;
+			if (p == &oe->events) {
+				list_add_tail(&new->list, &oe->events);
+				oe->max_timestamp = timestamp;
+				return;
+			}
+			last = list_entry(p, struct ordered_event, list);
+		}
+		list_add_tail(&new->list, &last->list);
+	} else {
+		while (last->timestamp > timestamp) {
+			p = last->list.prev;
+			if (p == &oe->events) {
+				list_add(&new->list, &oe->events);
+				return;
+			}
+			last = list_entry(p, struct ordered_event, list);
+		}
+		list_add(&new->list, &last->list);
+	}
+}
+
+static union perf_event *__dup_event(struct ordered_events *oe,
+				     union perf_event *event)
+{
+	union perf_event *new_event = NULL;
+
+	if (oe->cur_alloc_size < oe->max_alloc_size) {
+		new_event = memdup(event, event->header.size);
+		if (new_event)
+			oe->cur_alloc_size += event->header.size;
+	}
+
+	return new_event;
+}
+
+static union perf_event *dup_event(struct ordered_events *oe,
+				   union perf_event *event)
+{
+	return oe->copy_on_queue ? __dup_event(oe, event) : event;
+}
+
+static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+	if (event && oe->copy_on_queue) {
+		oe->cur_alloc_size -= event->header.size;
+		free(event);
+	}
+}
+
+#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct ordered_event))
+static struct ordered_event *alloc_event(struct ordered_events *oe,
+					 union perf_event *event)
+{
+	struct list_head *cache = &oe->cache;
+	struct ordered_event *new = NULL;
+	union perf_event *new_event;
+
+	new_event = dup_event(oe, event);
+	if (!new_event)
+		return NULL;
+
+	if (!list_empty(cache)) {
+		new = list_entry(cache->next, struct ordered_event, list);
+		list_del(&new->list);
+	} else if (oe->buffer) {
+		new = oe->buffer + oe->buffer_idx;
+		if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
+			oe->buffer = NULL;
+	} else if (oe->cur_alloc_size < oe->max_alloc_size) {
+		size_t size = MAX_SAMPLE_BUFFER * sizeof(*new);
+
+		oe->buffer = malloc(size);
+		if (!oe->buffer) {
+			free_dup_event(oe, new_event);
+			return NULL;
+		}
+
+		pr("alloc size %" PRIu64 "B (+%zu), max %" PRIu64 "B\n",
+		   oe->cur_alloc_size, size, oe->max_alloc_size);
+
+		oe->cur_alloc_size += size;
+		list_add(&oe->buffer->list, &oe->to_free);
+
+		/* First entry is abused to maintain the to_free list. */
+		oe->buffer_idx = 2;
+		new = oe->buffer + 1;
+	} else {
+		pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
+	}
+
+	new->event = new_event;
+	return new;
+}
+
+static struct ordered_event *
+ordered_events__new_event(struct ordered_events *oe, u64 timestamp,
+		    union perf_event *event)
+{
+	struct ordered_event *new;
+
+	new = alloc_event(oe, event);
+	if (new) {
+		new->timestamp = timestamp;
+		queue_event(oe, new);
+	}
+
+	return new;
+}
+
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event)
+{
+	list_move(&event->list, &oe->cache);
+	oe->nr_events--;
+	free_dup_event(oe, event->event);
+	event->event = NULL;
+}
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+			  u64 timestamp, u64 file_offset)
+{
+	struct ordered_event *oevent;
+
+	if (!timestamp || timestamp == ~0ULL)
+		return -ETIME;
+
+	if (timestamp < oe->last_flush) {
+		pr_oe_time(timestamp,      "out of order event\n");
+		pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
+			   oe->last_flush_type);
+
+		oe->nr_unordered_events++;
+	}
+
+	oevent = ordered_events__new_event(oe, timestamp, event);
+	if (!oevent) {
+		ordered_events__flush(oe, OE_FLUSH__HALF);
+		oevent = ordered_events__new_event(oe, timestamp, event);
+	}
+
+	if (!oevent)
+		return -ENOMEM;
+
+	oevent->file_offset = file_offset;
+	return 0;
+}
+
+static int __ordered_events__flush(struct ordered_events *oe)
+{
+	struct list_head *head = &oe->events;
+	struct ordered_event *tmp, *iter;
+	u64 limit = oe->next_flush;
+	u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
+	bool show_progress = limit == ULLONG_MAX;
+	struct ui_progress prog;
+	int ret;
+
+	if (!limit)
+		return 0;
+
+	if (show_progress)
+		ui_progress__init(&prog, oe->nr_events, "Processing time ordered events...");
+
+	list_for_each_entry_safe(iter, tmp, head, list) {
+		if (session_done())
+			return 0;
+
+		if (iter->timestamp > limit)
+			break;
+		ret = oe->deliver(oe, iter);
+		if (ret)
+			return ret;
+
+		ordered_events__delete(oe, iter);
+		oe->last_flush = iter->timestamp;
+
+		if (show_progress)
+			ui_progress__update(&prog, 1);
+	}
+
+	if (list_empty(head))
+		oe->last = NULL;
+	else if (last_ts <= limit)
+		oe->last = list_entry(head->prev, struct ordered_event, list);
+
+	if (show_progress)
+		ui_progress__finish();
+
+	return 0;
+}
+
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
+{
+	static const char * const str[] = {
+		"NONE",
+		"FINAL",
+		"ROUND",
+		"HALF ",
+	};
+	int err;
+
+	if (oe->nr_events == 0)
+		return 0;
+
+	switch (how) {
+	case OE_FLUSH__FINAL:
+		oe->next_flush = ULLONG_MAX;
+		break;
+
+	case OE_FLUSH__HALF:
+	{
+		struct ordered_event *first, *last;
+		struct list_head *head = &oe->events;
+
+		first = list_entry(head->next, struct ordered_event, list);
+		last = oe->last;
+
+		/* Warn if we are called before any event got allocated. */
+		if (WARN_ONCE(!last || list_empty(head), "empty queue"))
+			return 0;
+
+		oe->next_flush  = first->timestamp;
+		oe->next_flush += (last->timestamp - first->timestamp) / 2;
+		break;
+	}
+
+	case OE_FLUSH__ROUND:
+	case OE_FLUSH__NONE:
+	default:
+		break;
+	};
+
+	pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE  %s, nr_events %u\n",
+		   str[how], oe->nr_events);
+	pr_oe_time(oe->max_timestamp, "max_timestamp\n");
+
+	err = __ordered_events__flush(oe);
+
+	if (!err) {
+		if (how == OE_FLUSH__ROUND)
+			oe->next_flush = oe->max_timestamp;
+
+		oe->last_flush_type = how;
+	}
+
+	pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush POST %s, nr_events %u\n",
+		   str[how], oe->nr_events);
+	pr_oe_time(oe->last_flush, "last_flush\n");
+
+	return err;
+}
+
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver)
+{
+	INIT_LIST_HEAD(&oe->events);
+	INIT_LIST_HEAD(&oe->cache);
+	INIT_LIST_HEAD(&oe->to_free);
+	oe->max_alloc_size = (u64) -1;
+	oe->cur_alloc_size = 0;
+	oe->deliver	   = deliver;
+}
+
+void ordered_events__free(struct ordered_events *oe)
+{
+	while (!list_empty(&oe->to_free)) {
+		struct ordered_event *event;
+
+		event = list_entry(oe->to_free.next, struct ordered_event, list);
+		list_del(&event->list);
+		free_dup_event(oe, event->event);
+		free(event);
+	}
+}
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+	ordered_events__deliver_t old_deliver = oe->deliver;
+
+	ordered_events__free(oe);
+	memset(oe, '\0', sizeof(*oe));
+	ordered_events__init(oe, old_deliver);
+}
diff --git a/util/ordered-events.h b/util/ordered-events.h
new file mode 100644
index 0000000..8c7a294
--- /dev/null
+++ b/util/ordered-events.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ORDERED_EVENTS_H
+#define __ORDERED_EVENTS_H
+
+#include <linux/types.h>
+
+struct perf_sample;
+
+struct ordered_event {
+	u64			timestamp;
+	u64			file_offset;
+	union perf_event	*event;
+	struct list_head	list;
+};
+
+enum oe_flush {
+	OE_FLUSH__NONE,
+	OE_FLUSH__FINAL,
+	OE_FLUSH__ROUND,
+	OE_FLUSH__HALF,
+};
+
+struct ordered_events;
+
+typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
+					 struct ordered_event *event);
+
+struct ordered_events {
+	u64			last_flush;
+	u64			next_flush;
+	u64			max_timestamp;
+	u64			max_alloc_size;
+	u64			cur_alloc_size;
+	struct list_head	events;
+	struct list_head	cache;
+	struct list_head	to_free;
+	struct ordered_event	*buffer;
+	struct ordered_event	*last;
+	ordered_events__deliver_t deliver;
+	int			buffer_idx;
+	unsigned int		nr_events;
+	enum oe_flush		last_flush_type;
+	u32			nr_unordered_events;
+	bool                    copy_on_queue;
+};
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+			  u64 timestamp, u64 file_offset);
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
+void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
+
+static inline
+void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
+{
+	oe->max_alloc_size = size;
+}
+
+static inline
+void ordered_events__set_copy_on_queue(struct ordered_events *oe, bool copy)
+{
+	oe->copy_on_queue = copy;
+}
+#endif /* __ORDERED_EVENTS_H */
diff --git a/util/parse-branch-options.c b/util/parse-branch-options.c
new file mode 100644
index 0000000..bd779d9
--- /dev/null
+++ b/util/parse-branch-options.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-branch-options.h"
+
+#define BRANCH_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+	const char *name;
+	int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+	BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
+	BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
+	BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
+	BRANCH_END
+};
+
+int parse_branch_str(const char *str, __u64 *mode)
+{
+#define ONLY_PLM \
+	(PERF_SAMPLE_BRANCH_USER	|\
+	 PERF_SAMPLE_BRANCH_KERNEL	|\
+	 PERF_SAMPLE_BRANCH_HV)
+
+	int ret = 0;
+	char *p, *s;
+	char *os = NULL;
+	const struct branch_mode *br;
+
+	if (str == NULL) {
+		*mode = PERF_SAMPLE_BRANCH_ANY;
+		return 0;
+	}
+
+	/* because str is read-only */
+	s = os = strdup(str);
+	if (!s)
+		return -1;
+
+	for (;;) {
+		p = strchr(s, ',');
+		if (p)
+			*p = '\0';
+
+		for (br = branch_modes; br->name; br++) {
+			if (!strcasecmp(s, br->name))
+				break;
+		}
+		if (!br->name) {
+			ret = -1;
+			pr_warning("unknown branch filter %s,"
+				    " check man page\n", s);
+			goto error;
+		}
+
+		*mode |= br->mode;
+
+		if (!p)
+			break;
+
+		s = p + 1;
+	}
+
+	/* default to any branch */
+	if ((*mode & ~ONLY_PLM) == 0) {
+		*mode = PERF_SAMPLE_BRANCH_ANY;
+	}
+error:
+	free(os);
+	return ret;
+}
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+	__u64 *mode = (__u64 *)opt->value;
+
+	if (unset)
+		return 0;
+
+	/*
+	 * cannot set it twice, -b + --branch-filter for instance
+	 */
+	if (*mode)
+		return -1;
+
+	return parse_branch_str(str, mode);
+}
diff --git a/util/parse-branch-options.h b/util/parse-branch-options.h
new file mode 100644
index 0000000..11d1722
--- /dev/null
+++ b/util/parse-branch-options.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+#include <stdint.h>
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+int parse_branch_str(const char *str, __u64 *mode);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
diff --git a/util/parse-events.c b/util/parse-events.c
new file mode 100644
index 0000000..2fb0272
--- /dev/null
+++ b/util/parse-events.c
@@ -0,0 +1,2616 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/hw_breakpoint.h>
+#include <linux/err.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include "term.h"
+#include "../perf.h"
+#include "evlist.h"
+#include "evsel.h"
+#include <subcmd/parse-options.h>
+#include "parse-events.h"
+#include <subcmd/exec-cmd.h>
+#include "string2.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "cache.h"
+#include "header.h"
+#include "bpf-loader.h"
+#include "debug.h"
+#include <api/fs/tracing_path.h>
+#include "parse-events-bison.h"
+#define YY_EXTRA_TYPE int
+#include "parse-events-flex.h"
+#include "pmu.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "probe-file.h"
+#include "asm/bug.h"
+#include "util/parse-branch-options.h"
+#include "metricgroup.h"
+
+#define MAX_NAME_LEN 100
+
+#ifdef PARSER_DEBUG
+extern int parse_events_debug;
+#endif
+int parse_events_parse(void *parse_state, void *scanner);
+static int get_config_terms(struct list_head *head_config,
+			    struct list_head *head_terms __maybe_unused);
+
+static struct perf_pmu_event_symbol *perf_pmu_events_list;
+/*
+ * The variable indicates the number of supported pmu event symbols.
+ * 0 means not initialized and ready to init
+ * -1 means failed to init, don't try anymore
+ * >0 is the number of supported pmu event symbols
+ */
+static int perf_pmu_events_list_num;
+
+struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = {
+		.symbol = "cpu-cycles",
+		.alias  = "cycles",
+	},
+	[PERF_COUNT_HW_INSTRUCTIONS] = {
+		.symbol = "instructions",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_CACHE_REFERENCES] = {
+		.symbol = "cache-references",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_CACHE_MISSES] = {
+		.symbol = "cache-misses",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {
+		.symbol = "branch-instructions",
+		.alias  = "branches",
+	},
+	[PERF_COUNT_HW_BRANCH_MISSES] = {
+		.symbol = "branch-misses",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_BUS_CYCLES] = {
+		.symbol = "bus-cycles",
+		.alias  = "",
+	},
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {
+		.symbol = "stalled-cycles-frontend",
+		.alias  = "idle-cycles-frontend",
+	},
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {
+		.symbol = "stalled-cycles-backend",
+		.alias  = "idle-cycles-backend",
+	},
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = {
+		.symbol = "ref-cycles",
+		.alias  = "",
+	},
+};
+
+struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+	[PERF_COUNT_SW_CPU_CLOCK] = {
+		.symbol = "cpu-clock",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_TASK_CLOCK] = {
+		.symbol = "task-clock",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS] = {
+		.symbol = "page-faults",
+		.alias  = "faults",
+	},
+	[PERF_COUNT_SW_CONTEXT_SWITCHES] = {
+		.symbol = "context-switches",
+		.alias  = "cs",
+	},
+	[PERF_COUNT_SW_CPU_MIGRATIONS] = {
+		.symbol = "cpu-migrations",
+		.alias  = "migrations",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS_MIN] = {
+		.symbol = "minor-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_PAGE_FAULTS_MAJ] = {
+		.symbol = "major-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_ALIGNMENT_FAULTS] = {
+		.symbol = "alignment-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_EMULATION_FAULTS] = {
+		.symbol = "emulation-faults",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_DUMMY] = {
+		.symbol = "dummy",
+		.alias  = "",
+	},
+	[PERF_COUNT_SW_BPF_OUTPUT] = {
+		.symbol = "bpf-output",
+		.alias  = "",
+	},
+};
+
+#define __PERF_EVENT_FIELD(config, name) \
+	((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
+
+#define PERF_EVENT_RAW(config)		__PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_CONFIG(config)	__PERF_EVENT_FIELD(config, CONFIG)
+#define PERF_EVENT_TYPE(config)		__PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
+
+#define for_each_subsystem(sys_dir, sys_dirent)			\
+	while ((sys_dirent = readdir(sys_dir)) != NULL)		\
+		if (sys_dirent->d_type == DT_DIR &&		\
+		    (strcmp(sys_dirent->d_name, ".")) &&	\
+		    (strcmp(sys_dirent->d_name, "..")))
+
+static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
+{
+	char evt_path[MAXPATHLEN];
+	int fd;
+
+	snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
+			sys_dir->d_name, evt_dir->d_name);
+	fd = open(evt_path, O_RDONLY);
+	if (fd < 0)
+		return -EINVAL;
+	close(fd);
+
+	return 0;
+}
+
+#define for_each_event(sys_dirent, evt_dir, evt_dirent)		\
+	while ((evt_dirent = readdir(evt_dir)) != NULL)		\
+		if (evt_dirent->d_type == DT_DIR &&		\
+		    (strcmp(evt_dirent->d_name, ".")) &&	\
+		    (strcmp(evt_dirent->d_name, "..")) &&	\
+		    (!tp_event_has_id(sys_dirent, evt_dirent)))
+
+#define MAX_EVENT_LENGTH 512
+
+
+struct tracepoint_path *tracepoint_id_to_path(u64 config)
+{
+	struct tracepoint_path *path = NULL;
+	DIR *sys_dir, *evt_dir;
+	struct dirent *sys_dirent, *evt_dirent;
+	char id_buf[24];
+	int fd;
+	u64 id;
+	char evt_path[MAXPATHLEN];
+	char dir_path[MAXPATHLEN];
+
+	sys_dir = opendir(tracing_events_path);
+	if (!sys_dir)
+		return NULL;
+
+	for_each_subsystem(sys_dir, sys_dirent) {
+
+		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
+			 sys_dirent->d_name);
+		evt_dir = opendir(dir_path);
+		if (!evt_dir)
+			continue;
+
+		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+
+			scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
+				  evt_dirent->d_name);
+			fd = open(evt_path, O_RDONLY);
+			if (fd < 0)
+				continue;
+			if (read(fd, id_buf, sizeof(id_buf)) < 0) {
+				close(fd);
+				continue;
+			}
+			close(fd);
+			id = atoll(id_buf);
+			if (id == config) {
+				closedir(evt_dir);
+				closedir(sys_dir);
+				path = zalloc(sizeof(*path));
+				if (!path)
+					return NULL;
+				path->system = malloc(MAX_EVENT_LENGTH);
+				if (!path->system) {
+					free(path);
+					return NULL;
+				}
+				path->name = malloc(MAX_EVENT_LENGTH);
+				if (!path->name) {
+					zfree(&path->system);
+					free(path);
+					return NULL;
+				}
+				strncpy(path->system, sys_dirent->d_name,
+					MAX_EVENT_LENGTH);
+				strncpy(path->name, evt_dirent->d_name,
+					MAX_EVENT_LENGTH);
+				return path;
+			}
+		}
+		closedir(evt_dir);
+	}
+
+	closedir(sys_dir);
+	return NULL;
+}
+
+struct tracepoint_path *tracepoint_name_to_path(const char *name)
+{
+	struct tracepoint_path *path = zalloc(sizeof(*path));
+	char *str = strchr(name, ':');
+
+	if (path == NULL || str == NULL) {
+		free(path);
+		return NULL;
+	}
+
+	path->system = strndup(name, str - name);
+	path->name = strdup(str+1);
+
+	if (path->system == NULL || path->name == NULL) {
+		zfree(&path->system);
+		zfree(&path->name);
+		zfree(&path);
+	}
+
+	return path;
+}
+
+const char *event_type(int type)
+{
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		return "hardware";
+
+	case PERF_TYPE_SOFTWARE:
+		return "software";
+
+	case PERF_TYPE_TRACEPOINT:
+		return "tracepoint";
+
+	case PERF_TYPE_HW_CACHE:
+		return "hardware-cache";
+
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+static int parse_events__is_name_term(struct parse_events_term *term)
+{
+	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
+}
+
+static char *get_config_name(struct list_head *head_terms)
+{
+	struct parse_events_term *term;
+
+	if (!head_terms)
+		return NULL;
+
+	list_for_each_entry(term, head_terms, list)
+		if (parse_events__is_name_term(term))
+			return term->val.str;
+
+	return NULL;
+}
+
+static struct perf_evsel *
+__add_event(struct list_head *list, int *idx,
+	    struct perf_event_attr *attr,
+	    char *name, struct perf_pmu *pmu,
+	    struct list_head *config_terms, bool auto_merge_stats)
+{
+	struct perf_evsel *evsel;
+	struct cpu_map *cpus = pmu ? pmu->cpus : NULL;
+
+	event_attr_init(attr);
+
+	evsel = perf_evsel__new_idx(attr, *idx);
+	if (!evsel)
+		return NULL;
+
+	(*idx)++;
+	evsel->cpus        = cpu_map__get(cpus);
+	evsel->own_cpus    = cpu_map__get(cpus);
+	evsel->system_wide = pmu ? pmu->is_uncore : false;
+	evsel->auto_merge_stats = auto_merge_stats;
+
+	if (name)
+		evsel->name = strdup(name);
+
+	if (config_terms)
+		list_splice(config_terms, &evsel->config_terms);
+
+	list_add_tail(&evsel->node, list);
+	return evsel;
+}
+
+static int add_event(struct list_head *list, int *idx,
+		     struct perf_event_attr *attr, char *name,
+		     struct list_head *config_terms)
+{
+	return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
+}
+
+static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
+{
+	int i, j;
+	int n, longest = -1;
+
+	for (i = 0; i < size; i++) {
+		for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
+			n = strlen(names[i][j]);
+			if (n > longest && !strncasecmp(str, names[i][j], n))
+				longest = n;
+		}
+		if (longest > 0)
+			return i;
+	}
+
+	return -1;
+}
+
+typedef int config_term_func_t(struct perf_event_attr *attr,
+			       struct parse_events_term *term,
+			       struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+		       struct list_head *head,
+		       struct parse_events_error *err,
+		       config_term_func_t config_term);
+
+int parse_events_add_cache(struct list_head *list, int *idx,
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *err,
+			   struct list_head *head_config)
+{
+	struct perf_event_attr attr;
+	LIST_HEAD(config_terms);
+	char name[MAX_NAME_LEN], *config_name;
+	int cache_type = -1, cache_op = -1, cache_result = -1;
+	char *op_result[2] = { op_result1, op_result2 };
+	int i, n;
+
+	/*
+	 * No fallback - if we cannot get a clear cache type
+	 * then bail out:
+	 */
+	cache_type = parse_aliases(type, perf_evsel__hw_cache,
+				   PERF_COUNT_HW_CACHE_MAX);
+	if (cache_type == -1)
+		return -EINVAL;
+
+	config_name = get_config_name(head_config);
+	n = snprintf(name, MAX_NAME_LEN, "%s", type);
+
+	for (i = 0; (i < 2) && (op_result[i]); i++) {
+		char *str = op_result[i];
+
+		n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
+
+		if (cache_op == -1) {
+			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
+						 PERF_COUNT_HW_CACHE_OP_MAX);
+			if (cache_op >= 0) {
+				if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
+					return -EINVAL;
+				continue;
+			}
+		}
+
+		if (cache_result == -1) {
+			cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+						     PERF_COUNT_HW_CACHE_RESULT_MAX);
+			if (cache_result >= 0)
+				continue;
+		}
+	}
+
+	/*
+	 * Fall back to reads:
+	 */
+	if (cache_op == -1)
+		cache_op = PERF_COUNT_HW_CACHE_OP_READ;
+
+	/*
+	 * Fall back to accesses:
+	 */
+	if (cache_result == -1)
+		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
+	attr.type = PERF_TYPE_HW_CACHE;
+
+	if (head_config) {
+		if (config_attr(&attr, head_config, err,
+				config_term_common))
+			return -EINVAL;
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+	}
+	return add_event(list, idx, &attr, config_name ? : name, &config_terms);
+}
+
+static void tracepoint_error(struct parse_events_error *e, int err,
+			     const char *sys, const char *name)
+{
+	char help[BUFSIZ];
+
+	if (!e)
+		return;
+
+	/*
+	 * We get error directly from syscall errno ( > 0),
+	 * or from encoded pointer's error ( < 0).
+	 */
+	err = abs(err);
+
+	switch (err) {
+	case EACCES:
+		e->str = strdup("can't access trace events");
+		break;
+	case ENOENT:
+		e->str = strdup("unknown tracepoint");
+		break;
+	default:
+		e->str = strdup("failed to add tracepoint");
+		break;
+	}
+
+	tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name);
+	e->help = strdup(help);
+}
+
+static int add_tracepoint(struct list_head *list, int *idx,
+			  const char *sys_name, const char *evt_name,
+			  struct parse_events_error *err,
+			  struct list_head *head_config)
+{
+	struct perf_evsel *evsel;
+
+	evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++);
+	if (IS_ERR(evsel)) {
+		tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
+		return PTR_ERR(evsel);
+	}
+
+	if (head_config) {
+		LIST_HEAD(config_terms);
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+		list_splice(&config_terms, &evsel->config_terms);
+	}
+
+	list_add_tail(&evsel->node, list);
+	return 0;
+}
+
+static int add_tracepoint_multi_event(struct list_head *list, int *idx,
+				      const char *sys_name, const char *evt_name,
+				      struct parse_events_error *err,
+				      struct list_head *head_config)
+{
+	char evt_path[MAXPATHLEN];
+	struct dirent *evt_ent;
+	DIR *evt_dir;
+	int ret = 0, found = 0;
+
+	snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
+	evt_dir = opendir(evt_path);
+	if (!evt_dir) {
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
+
+	while (!ret && (evt_ent = readdir(evt_dir))) {
+		if (!strcmp(evt_ent->d_name, ".")
+		    || !strcmp(evt_ent->d_name, "..")
+		    || !strcmp(evt_ent->d_name, "enable")
+		    || !strcmp(evt_ent->d_name, "filter"))
+			continue;
+
+		if (!strglobmatch(evt_ent->d_name, evt_name))
+			continue;
+
+		found++;
+
+		ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
+				     err, head_config);
+	}
+
+	if (!found) {
+		tracepoint_error(err, ENOENT, sys_name, evt_name);
+		ret = -1;
+	}
+
+	closedir(evt_dir);
+	return ret;
+}
+
+static int add_tracepoint_event(struct list_head *list, int *idx,
+				const char *sys_name, const char *evt_name,
+				struct parse_events_error *err,
+				struct list_head *head_config)
+{
+	return strpbrk(evt_name, "*?") ?
+	       add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+					  err, head_config) :
+	       add_tracepoint(list, idx, sys_name, evt_name,
+			      err, head_config);
+}
+
+static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
+				    const char *sys_name, const char *evt_name,
+				    struct parse_events_error *err,
+				    struct list_head *head_config)
+{
+	struct dirent *events_ent;
+	DIR *events_dir;
+	int ret = 0;
+
+	events_dir = opendir(tracing_events_path);
+	if (!events_dir) {
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
+
+	while (!ret && (events_ent = readdir(events_dir))) {
+		if (!strcmp(events_ent->d_name, ".")
+		    || !strcmp(events_ent->d_name, "..")
+		    || !strcmp(events_ent->d_name, "enable")
+		    || !strcmp(events_ent->d_name, "header_event")
+		    || !strcmp(events_ent->d_name, "header_page"))
+			continue;
+
+		if (!strglobmatch(events_ent->d_name, sys_name))
+			continue;
+
+		ret = add_tracepoint_event(list, idx, events_ent->d_name,
+					   evt_name, err, head_config);
+	}
+
+	closedir(events_dir);
+	return ret;
+}
+
+struct __add_bpf_event_param {
+	struct parse_events_state *parse_state;
+	struct list_head *list;
+	struct list_head *head_config;
+};
+
+static int add_bpf_event(const char *group, const char *event, int fd,
+			 void *_param)
+{
+	LIST_HEAD(new_evsels);
+	struct __add_bpf_event_param *param = _param;
+	struct parse_events_state *parse_state = param->parse_state;
+	struct list_head *list = param->list;
+	struct perf_evsel *pos;
+	int err;
+
+	pr_debug("add bpf event %s:%s and attach bpf program %d\n",
+		 group, event, fd);
+
+	err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group,
+					  event, parse_state->error,
+					  param->head_config);
+	if (err) {
+		struct perf_evsel *evsel, *tmp;
+
+		pr_debug("Failed to add BPF event %s:%s\n",
+			 group, event);
+		list_for_each_entry_safe(evsel, tmp, &new_evsels, node) {
+			list_del(&evsel->node);
+			perf_evsel__delete(evsel);
+		}
+		return err;
+	}
+	pr_debug("adding %s:%s\n", group, event);
+
+	list_for_each_entry(pos, &new_evsels, node) {
+		pr_debug("adding %s:%s to %p\n",
+			 group, event, pos);
+		pos->bpf_fd = fd;
+	}
+	list_splice(&new_evsels, list);
+	return 0;
+}
+
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+			      struct list_head *list,
+			      struct bpf_object *obj,
+			      struct list_head *head_config)
+{
+	int err;
+	char errbuf[BUFSIZ];
+	struct __add_bpf_event_param param = {parse_state, list, head_config};
+	static bool registered_unprobe_atexit = false;
+
+	if (IS_ERR(obj) || !obj) {
+		snprintf(errbuf, sizeof(errbuf),
+			 "Internal error: load bpf obj with NULL");
+		err = -EINVAL;
+		goto errout;
+	}
+
+	/*
+	 * Register atexit handler before calling bpf__probe() so
+	 * bpf__probe() don't need to unprobe probe points its already
+	 * created when failure.
+	 */
+	if (!registered_unprobe_atexit) {
+		atexit(bpf__clear);
+		registered_unprobe_atexit = true;
+	}
+
+	err = bpf__probe(obj);
+	if (err) {
+		bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf));
+		goto errout;
+	}
+
+	err = bpf__load(obj);
+	if (err) {
+		bpf__strerror_load(obj, err, errbuf, sizeof(errbuf));
+		goto errout;
+	}
+
+	err = bpf__foreach_event(obj, add_bpf_event, &param);
+	if (err) {
+		snprintf(errbuf, sizeof(errbuf),
+			 "Attach events in BPF object failed");
+		goto errout;
+	}
+
+	return 0;
+errout:
+	parse_state->error->help = strdup("(add -v to see detail)");
+	parse_state->error->str = strdup(errbuf);
+	return err;
+}
+
+static int
+parse_events_config_bpf(struct parse_events_state *parse_state,
+			struct bpf_object *obj,
+			struct list_head *head_config)
+{
+	struct parse_events_term *term;
+	int error_pos;
+
+	if (!head_config || list_empty(head_config))
+		return 0;
+
+	list_for_each_entry(term, head_config, list) {
+		char errbuf[BUFSIZ];
+		int err;
+
+		if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+			snprintf(errbuf, sizeof(errbuf),
+				 "Invalid config term for BPF object");
+			errbuf[BUFSIZ - 1] = '\0';
+
+			parse_state->error->idx = term->err_term;
+			parse_state->error->str = strdup(errbuf);
+			return -EINVAL;
+		}
+
+		err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
+		if (err) {
+			bpf__strerror_config_obj(obj, term, parse_state->evlist,
+						 &error_pos, err, errbuf,
+						 sizeof(errbuf));
+			parse_state->error->help = strdup(
+"Hint:\tValid config terms:\n"
+"     \tmap:[<arraymap>].value<indices>=[value]\n"
+"     \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+"     \twhere <indices> is something like [0,3...5] or [all]\n"
+"     \t(add -v to see detail)");
+			parse_state->error->str = strdup(errbuf);
+			if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+				parse_state->error->idx = term->err_val;
+			else
+				parse_state->error->idx = term->err_term + error_pos;
+			return err;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ *  'call-graph=fp' is 'evt config', should be applied to each
+ *  events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+		       struct list_head *obj_head_config)
+{
+	struct parse_events_term *term, *temp;
+
+	/*
+	 * Currectly, all possible user config term
+	 * belong to bpf object. parse_events__is_hardcoded_term()
+	 * happends to be a good flag.
+	 *
+	 * See parse_events_config_bpf() and
+	 * config_term_tracepoint().
+	 */
+	list_for_each_entry_safe(term, temp, evt_head_config, list)
+		if (!parse_events__is_hardcoded_term(term))
+			list_move_tail(&term->list, obj_head_config);
+}
+
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  char *bpf_file_name,
+			  bool source,
+			  struct list_head *head_config)
+{
+	int err;
+	struct bpf_object *obj;
+	LIST_HEAD(obj_head_config);
+
+	if (head_config)
+		split_bpf_config_terms(head_config, &obj_head_config);
+
+	obj = bpf__prepare_load(bpf_file_name, source);
+	if (IS_ERR(obj)) {
+		char errbuf[BUFSIZ];
+
+		err = PTR_ERR(obj);
+
+		if (err == -ENOTSUP)
+			snprintf(errbuf, sizeof(errbuf),
+				 "BPF support is not compiled");
+		else
+			bpf__strerror_prepare_load(bpf_file_name,
+						   source,
+						   -err, errbuf,
+						   sizeof(errbuf));
+
+		parse_state->error->help = strdup("(add -v to see detail)");
+		parse_state->error->str = strdup(errbuf);
+		return err;
+	}
+
+	err = parse_events_load_bpf_obj(parse_state, list, obj, head_config);
+	if (err)
+		return err;
+	err = parse_events_config_bpf(parse_state, obj, &obj_head_config);
+
+	/*
+	 * Caller doesn't know anything about obj_head_config,
+	 * so combine them together again before returnning.
+	 */
+	if (head_config)
+		list_splice_tail(&obj_head_config, head_config);
+	return err;
+}
+
+static int
+parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
+{
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (!type || !type[i])
+			break;
+
+#define CHECK_SET_TYPE(bit)		\
+do {					\
+	if (attr->bp_type & bit)	\
+		return -EINVAL;		\
+	else				\
+		attr->bp_type |= bit;	\
+} while (0)
+
+		switch (type[i]) {
+		case 'r':
+			CHECK_SET_TYPE(HW_BREAKPOINT_R);
+			break;
+		case 'w':
+			CHECK_SET_TYPE(HW_BREAKPOINT_W);
+			break;
+		case 'x':
+			CHECK_SET_TYPE(HW_BREAKPOINT_X);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+#undef CHECK_SET_TYPE
+
+	if (!attr->bp_type) /* Default */
+		attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+	return 0;
+}
+
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
+				void *ptr, char *type, u64 len)
+{
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.bp_addr = (unsigned long) ptr;
+
+	if (parse_breakpoint_type(type, &attr))
+		return -EINVAL;
+
+	/* Provide some defaults if len is not specified */
+	if (!len) {
+		if (attr.bp_type == HW_BREAKPOINT_X)
+			len = sizeof(long);
+		else
+			len = HW_BREAKPOINT_LEN_4;
+	}
+
+	attr.bp_len = len;
+
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.sample_period = 1;
+
+	return add_event(list, idx, &attr, NULL, NULL);
+}
+
+static int check_type_val(struct parse_events_term *term,
+			  struct parse_events_error *err,
+			  int type)
+{
+	if (type == term->type_val)
+		return 0;
+
+	if (err) {
+		err->idx = term->err_val;
+		if (type == PARSE_EVENTS__TERM_TYPE_NUM)
+			err->str = strdup("expected numeric value");
+		else
+			err->str = strdup("expected string value");
+	}
+	return -EINVAL;
+}
+
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+	[PARSE_EVENTS__TERM_TYPE_USER]			= "<sysfs term>",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG]		= "config",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG1]		= "config1",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG2]		= "config2",
+	[PARSE_EVENTS__TERM_TYPE_NAME]			= "name",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]		= "period",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]		= "freq",
+	[PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]	= "branch_type",
+	[PARSE_EVENTS__TERM_TYPE_TIME]			= "time",
+	[PARSE_EVENTS__TERM_TYPE_CALLGRAPH]		= "call-graph",
+	[PARSE_EVENTS__TERM_TYPE_STACKSIZE]		= "stack-size",
+	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
+	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
+	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
+	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
+	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
+	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+	if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+		err->str = strdup("Invalid term_type");
+		return false;
+	}
+	if (!config_term_shrinked)
+		return true;
+
+	switch (term_type) {
+	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+	case PARSE_EVENTS__TERM_TYPE_NAME:
+	case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+		return true;
+	default:
+		if (!err)
+			return false;
+
+		/* term_type is validated so indexing is safe */
+		if (asprintf(&err->str, "'%s' is not usable in 'perf stat'",
+			     config_term_names[term_type]) < 0)
+			err->str = NULL;
+		return false;
+	}
+}
+
+void parse_events__shrink_config_terms(void)
+{
+	config_term_shrinked = true;
+}
+
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err)
+{
+#define CHECK_TYPE_VAL(type)						   \
+do {									   \
+	if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+		return -EINVAL;						   \
+} while (0)
+
+	switch (term->type_term) {
+	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+		CHECK_TYPE_VAL(NUM);
+		attr->config = term->val.num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+		CHECK_TYPE_VAL(NUM);
+		attr->config1 = term->val.num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+		CHECK_TYPE_VAL(NUM);
+		attr->config2 = term->val.num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+		CHECK_TYPE_VAL(STR);
+		if (strcmp(term->val.str, "no") &&
+		    parse_branch_str(term->val.str, &attr->branch_sample_type)) {
+			err->str = strdup("invalid branch sample type");
+			err->idx = term->err_val;
+			return -EINVAL;
+		}
+		break;
+	case PARSE_EVENTS__TERM_TYPE_TIME:
+		CHECK_TYPE_VAL(NUM);
+		if (term->val.num > 1) {
+			err->str = strdup("expected 0 or 1");
+			err->idx = term->err_val;
+			return -EINVAL;
+		}
+		break;
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_INHERIT:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_NAME:
+		CHECK_TYPE_VAL(STR);
+		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+		CHECK_TYPE_VAL(NUM);
+		break;
+	default:
+		err->str = strdup("unknown term");
+		err->idx = term->err_term;
+		err->help = parse_events_formats_error_string(NULL);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check term availbility after basic checking so
+	 * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+	 *
+	 * If check availbility at the entry of this function,
+	 * user will see "'<sysfs term>' is not usable in 'perf stat'"
+	 * if an invalid config term is provided for legacy events
+	 * (for example, instructions/badterm/...), which is confusing.
+	 */
+	if (!config_term_avail(term->type_term, err))
+		return -EINVAL;
+	return 0;
+#undef CHECK_TYPE_VAL
+}
+
+static int config_term_pmu(struct perf_event_attr *attr,
+			   struct parse_events_term *term,
+			   struct parse_events_error *err)
+{
+	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+	    term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
+		/*
+		 * Always succeed for sysfs terms, as we dont know
+		 * at this point what type they need to have.
+		 */
+		return 0;
+	else
+		return config_term_common(attr, term, err);
+}
+
+static int config_term_tracepoint(struct perf_event_attr *attr,
+				  struct parse_events_term *term,
+				  struct parse_events_error *err)
+{
+	switch (term->type_term) {
+	case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+	case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+	case PARSE_EVENTS__TERM_TYPE_INHERIT:
+	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+		return config_term_common(attr, term, err);
+	default:
+		if (err) {
+			err->idx = term->err_term;
+			err->str = strdup("unknown term");
+			err->help = strdup("valid terms: call-graph,stack-size\n");
+		}
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int config_attr(struct perf_event_attr *attr,
+		       struct list_head *head,
+		       struct parse_events_error *err,
+		       config_term_func_t config_term)
+{
+	struct parse_events_term *term;
+
+	list_for_each_entry(term, head, list)
+		if (config_term(attr, term, err))
+			return -EINVAL;
+
+	return 0;
+}
+
+static int get_config_terms(struct list_head *head_config,
+			    struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __name, __val)			\
+do {								\
+	struct perf_evsel_config_term *__t;			\
+								\
+	__t = zalloc(sizeof(*__t));				\
+	if (!__t)						\
+		return -ENOMEM;					\
+								\
+	INIT_LIST_HEAD(&__t->list);				\
+	__t->type       = PERF_EVSEL__CONFIG_TERM_ ## __type;	\
+	__t->val.__name = __val;				\
+	__t->weak	= term->weak;				\
+	list_add_tail(&__t->list, head_terms);			\
+} while (0)
+
+	struct parse_events_term *term;
+
+	list_for_each_entry(term, head_config, list) {
+		switch (term->type_term) {
+		case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+			ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+			ADD_CONFIG_TERM(FREQ, freq, term->val.num);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_TIME:
+			ADD_CONFIG_TERM(TIME, time, term->val.num);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+			ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+			ADD_CONFIG_TERM(BRANCH, branch, term->val.str);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+			ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_INHERIT:
+			ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+			ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+			ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
+			break;
+		case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+			ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
+			break;
+		default:
+			break;
+		}
+	}
+#undef ADD_EVSEL_CONFIG
+	return 0;
+}
+
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+				const char *sys, const char *event,
+				struct parse_events_error *err,
+				struct list_head *head_config)
+{
+	if (head_config) {
+		struct perf_event_attr attr;
+
+		if (config_attr(&attr, head_config, err,
+				config_term_tracepoint))
+			return -EINVAL;
+	}
+
+	if (strpbrk(sys, "*?"))
+		return add_tracepoint_multi_sys(list, idx, sys, event,
+						err, head_config);
+	else
+		return add_tracepoint_event(list, idx, sys, event,
+					    err, head_config);
+}
+
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+			     struct list_head *list,
+			     u32 type, u64 config,
+			     struct list_head *head_config)
+{
+	struct perf_event_attr attr;
+	LIST_HEAD(config_terms);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = type;
+	attr.config = config;
+
+	if (head_config) {
+		if (config_attr(&attr, head_config, parse_state->error,
+				config_term_common))
+			return -EINVAL;
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+	}
+
+	return add_event(list, &parse_state->idx, &attr,
+			 get_config_name(head_config), &config_terms);
+}
+
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config, bool auto_merge_stats)
+{
+	struct perf_event_attr attr;
+	struct perf_pmu_info info;
+	struct perf_pmu *pmu;
+	struct perf_evsel *evsel;
+	struct parse_events_error *err = parse_state->error;
+	LIST_HEAD(config_terms);
+
+	pmu = perf_pmu__find(name);
+	if (!pmu) {
+		if (asprintf(&err->str,
+				"Cannot find PMU `%s'. Missing kernel support?",
+				name) < 0)
+			err->str = NULL;
+		return -EINVAL;
+	}
+
+	if (pmu->default_config) {
+		memcpy(&attr, pmu->default_config,
+		       sizeof(struct perf_event_attr));
+	} else {
+		memset(&attr, 0, sizeof(attr));
+	}
+
+	if (!head_config) {
+		attr.type = pmu->type;
+		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
+		if (evsel) {
+			evsel->pmu_name = name;
+			return 0;
+		} else {
+			return -ENOMEM;
+		}
+	}
+
+	if (perf_pmu__check_alias(pmu, head_config, &info))
+		return -EINVAL;
+
+	/*
+	 * Configure hardcoded terms first, no need to check
+	 * return value when called with fail == 0 ;)
+	 */
+	if (config_attr(&attr, head_config, parse_state->error, config_term_pmu))
+		return -EINVAL;
+
+	if (get_config_terms(head_config, &config_terms))
+		return -ENOMEM;
+
+	if (perf_pmu__config(pmu, &attr, head_config, parse_state->error))
+		return -EINVAL;
+
+	evsel = __add_event(list, &parse_state->idx, &attr,
+			    get_config_name(head_config), pmu,
+			    &config_terms, auto_merge_stats);
+	if (evsel) {
+		evsel->unit = info.unit;
+		evsel->scale = info.scale;
+		evsel->per_pkg = info.per_pkg;
+		evsel->snapshot = info.snapshot;
+		evsel->metric_expr = info.metric_expr;
+		evsel->metric_name = info.metric_name;
+		evsel->pmu_name = name;
+	}
+
+	return evsel ? 0 : -ENOMEM;
+}
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+			       char *str, struct list_head **listp)
+{
+	struct list_head *head;
+	struct parse_events_term *term;
+	struct list_head *list;
+	struct perf_pmu *pmu = NULL;
+	int ok = 0;
+
+	*listp = NULL;
+	/* Add it for all PMUs that support the alias */
+	list = malloc(sizeof(struct list_head));
+	if (!list)
+		return -1;
+	INIT_LIST_HEAD(list);
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		struct perf_pmu_alias *alias;
+
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			if (!strcasecmp(alias->name, str)) {
+				head = malloc(sizeof(struct list_head));
+				if (!head)
+					return -1;
+				INIT_LIST_HEAD(head);
+				if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+							   str, 1, false, &str, NULL) < 0)
+					return -1;
+				list_add_tail(&term->list, head);
+
+				if (!parse_events_add_pmu(parse_state, list,
+							  pmu->name, head, true)) {
+					pr_debug("%s -> %s/%s/\n", str,
+						 pmu->name, alias->str);
+					ok++;
+				}
+
+				parse_events_terms__delete(head);
+			}
+		}
+	}
+	if (!ok)
+		return -1;
+	*listp = list;
+	return 0;
+}
+
+int parse_events__modifier_group(struct list_head *list,
+				 char *event_mod)
+{
+	return parse_events__modifier_event(list, event_mod, true);
+}
+
+void parse_events__set_leader(char *name, struct list_head *list)
+{
+	struct perf_evsel *leader;
+
+	if (list_empty(list)) {
+		WARN_ONCE(true, "WARNING: failed to set leader: empty list");
+		return;
+	}
+
+	__perf_evlist__set_leader(list);
+	leader = list_entry(list->next, struct perf_evsel, node);
+	leader->group_name = name ? strdup(name) : NULL;
+}
+
+/* list_event is assumed to point to malloc'ed memory */
+void parse_events_update_lists(struct list_head *list_event,
+			       struct list_head *list_all)
+{
+	/*
+	 * Called for single event definition. Update the
+	 * 'all event' list, and reinit the 'single event'
+	 * list, for next event definition.
+	 */
+	list_splice_tail(list_event, list_all);
+	free(list_event);
+}
+
+struct event_modifier {
+	int eu;
+	int ek;
+	int eh;
+	int eH;
+	int eG;
+	int eI;
+	int precise;
+	int precise_max;
+	int exclude_GH;
+	int sample_read;
+	int pinned;
+	int weak;
+};
+
+static int get_event_modifier(struct event_modifier *mod, char *str,
+			       struct perf_evsel *evsel)
+{
+	int eu = evsel ? evsel->attr.exclude_user : 0;
+	int ek = evsel ? evsel->attr.exclude_kernel : 0;
+	int eh = evsel ? evsel->attr.exclude_hv : 0;
+	int eH = evsel ? evsel->attr.exclude_host : 0;
+	int eG = evsel ? evsel->attr.exclude_guest : 0;
+	int eI = evsel ? evsel->attr.exclude_idle : 0;
+	int precise = evsel ? evsel->attr.precise_ip : 0;
+	int precise_max = 0;
+	int sample_read = 0;
+	int pinned = evsel ? evsel->attr.pinned : 0;
+
+	int exclude = eu | ek | eh;
+	int exclude_GH = evsel ? evsel->exclude_GH : 0;
+	int weak = 0;
+
+	memset(mod, 0, sizeof(*mod));
+
+	while (*str) {
+		if (*str == 'u') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
+			eu = 0;
+		} else if (*str == 'k') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
+			ek = 0;
+		} else if (*str == 'h') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
+			eh = 0;
+		} else if (*str == 'G') {
+			if (!exclude_GH)
+				exclude_GH = eG = eH = 1;
+			eG = 0;
+		} else if (*str == 'H') {
+			if (!exclude_GH)
+				exclude_GH = eG = eH = 1;
+			eH = 0;
+		} else if (*str == 'I') {
+			eI = 1;
+		} else if (*str == 'p') {
+			precise++;
+			/* use of precise requires exclude_guest */
+			if (!exclude_GH)
+				eG = 1;
+		} else if (*str == 'P') {
+			precise_max = 1;
+		} else if (*str == 'S') {
+			sample_read = 1;
+		} else if (*str == 'D') {
+			pinned = 1;
+		} else if (*str == 'W') {
+			weak = 1;
+		} else
+			break;
+
+		++str;
+	}
+
+	/*
+	 * precise ip:
+	 *
+	 *  0 - SAMPLE_IP can have arbitrary skid
+	 *  1 - SAMPLE_IP must have constant skid
+	 *  2 - SAMPLE_IP requested to have 0 skid
+	 *  3 - SAMPLE_IP must have 0 skid
+	 *
+	 *  See also PERF_RECORD_MISC_EXACT_IP
+	 */
+	if (precise > 3)
+		return -EINVAL;
+
+	mod->eu = eu;
+	mod->ek = ek;
+	mod->eh = eh;
+	mod->eH = eH;
+	mod->eG = eG;
+	mod->eI = eI;
+	mod->precise = precise;
+	mod->precise_max = precise_max;
+	mod->exclude_GH = exclude_GH;
+	mod->sample_read = sample_read;
+	mod->pinned = pinned;
+	mod->weak = weak;
+
+	return 0;
+}
+
+/*
+ * Basic modifier sanity check to validate it contains only one
+ * instance of any modifier (apart from 'p') present.
+ */
+static int check_modifier(char *str)
+{
+	char *p = str;
+
+	/* The sizeof includes 0 byte as well. */
+	if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
+		return -1;
+
+	while (*p) {
+		if (*p != 'p' && strchr(p + 1, *p))
+			return -1;
+		p++;
+	}
+
+	return 0;
+}
+
+int parse_events__modifier_event(struct list_head *list, char *str, bool add)
+{
+	struct perf_evsel *evsel;
+	struct event_modifier mod;
+
+	if (str == NULL)
+		return 0;
+
+	if (check_modifier(str))
+		return -EINVAL;
+
+	if (!add && get_event_modifier(&mod, str, NULL))
+		return -EINVAL;
+
+	__evlist__for_each_entry(list, evsel) {
+		if (add && get_event_modifier(&mod, str, evsel))
+			return -EINVAL;
+
+		evsel->attr.exclude_user   = mod.eu;
+		evsel->attr.exclude_kernel = mod.ek;
+		evsel->attr.exclude_hv     = mod.eh;
+		evsel->attr.precise_ip     = mod.precise;
+		evsel->attr.exclude_host   = mod.eH;
+		evsel->attr.exclude_guest  = mod.eG;
+		evsel->attr.exclude_idle   = mod.eI;
+		evsel->exclude_GH          = mod.exclude_GH;
+		evsel->sample_read         = mod.sample_read;
+		evsel->precise_max         = mod.precise_max;
+		evsel->weak_group	   = mod.weak;
+
+		if (perf_evsel__is_group_leader(evsel))
+			evsel->attr.pinned = mod.pinned;
+	}
+
+	return 0;
+}
+
+int parse_events_name(struct list_head *list, char *name)
+{
+	struct perf_evsel *evsel;
+
+	__evlist__for_each_entry(list, evsel) {
+		if (!evsel->name)
+			evsel->name = strdup(name);
+	}
+
+	return 0;
+}
+
+static int
+comp_pmu(const void *p1, const void *p2)
+{
+	struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1;
+	struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2;
+
+	return strcasecmp(pmu1->symbol, pmu2->symbol);
+}
+
+static void perf_pmu__parse_cleanup(void)
+{
+	if (perf_pmu_events_list_num > 0) {
+		struct perf_pmu_event_symbol *p;
+		int i;
+
+		for (i = 0; i < perf_pmu_events_list_num; i++) {
+			p = perf_pmu_events_list + i;
+			zfree(&p->symbol);
+		}
+		zfree(&perf_pmu_events_list);
+		perf_pmu_events_list_num = 0;
+	}
+}
+
+#define SET_SYMBOL(str, stype)		\
+do {					\
+	p->symbol = str;		\
+	if (!p->symbol)			\
+		goto err;		\
+	p->type = stype;		\
+} while (0)
+
+/*
+ * Read the pmu events list from sysfs
+ * Save it into perf_pmu_events_list
+ */
+static void perf_pmu__parse_init(void)
+{
+
+	struct perf_pmu *pmu = NULL;
+	struct perf_pmu_alias *alias;
+	int len = 0;
+
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			if (strchr(alias->name, '-'))
+				len++;
+			len++;
+		}
+	}
+
+	if (len == 0) {
+		perf_pmu_events_list_num = -1;
+		return;
+	}
+	perf_pmu_events_list = malloc(sizeof(struct perf_pmu_event_symbol) * len);
+	if (!perf_pmu_events_list)
+		return;
+	perf_pmu_events_list_num = len;
+
+	len = 0;
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
+			char *tmp = strchr(alias->name, '-');
+
+			if (tmp != NULL) {
+				SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+						PMU_EVENT_SYMBOL_PREFIX);
+				p++;
+				SET_SYMBOL(strdup(++tmp), PMU_EVENT_SYMBOL_SUFFIX);
+				len += 2;
+			} else {
+				SET_SYMBOL(strdup(alias->name), PMU_EVENT_SYMBOL);
+				len++;
+			}
+		}
+	}
+	qsort(perf_pmu_events_list, len,
+		sizeof(struct perf_pmu_event_symbol), comp_pmu);
+
+	return;
+err:
+	perf_pmu__parse_cleanup();
+}
+
+enum perf_pmu_event_symbol_type
+perf_pmu__parse_check(const char *name)
+{
+	struct perf_pmu_event_symbol p, *r;
+
+	/* scan kernel pmu events from sysfs if needed */
+	if (perf_pmu_events_list_num == 0)
+		perf_pmu__parse_init();
+	/*
+	 * name "cpu" could be prefix of cpu-cycles or cpu// events.
+	 * cpu-cycles has been handled by hardcode.
+	 * So it must be cpu// events, not kernel pmu event.
+	 */
+	if ((perf_pmu_events_list_num <= 0) || !strcmp(name, "cpu"))
+		return PMU_EVENT_SYMBOL_ERR;
+
+	p.symbol = strdup(name);
+	r = bsearch(&p, perf_pmu_events_list,
+			(size_t) perf_pmu_events_list_num,
+			sizeof(struct perf_pmu_event_symbol), comp_pmu);
+	zfree(&p.symbol);
+	return r ? r->type : PMU_EVENT_SYMBOL_ERR;
+}
+
+static int parse_events__scanner(const char *str, void *parse_state, int start_token)
+{
+	YY_BUFFER_STATE buffer;
+	void *scanner;
+	int ret;
+
+	ret = parse_events_lex_init_extra(start_token, &scanner);
+	if (ret)
+		return ret;
+
+	buffer = parse_events__scan_string(str, scanner);
+
+#ifdef PARSER_DEBUG
+	parse_events_debug = 1;
+#endif
+	ret = parse_events_parse(parse_state, scanner);
+
+	parse_events__flush_buffer(buffer, scanner);
+	parse_events__delete_buffer(buffer, scanner);
+	parse_events_lex_destroy(scanner);
+	return ret;
+}
+
+/*
+ * parse event config string, return a list of event terms.
+ */
+int parse_events_terms(struct list_head *terms, const char *str)
+{
+	struct parse_events_state parse_state = {
+		.terms = NULL,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &parse_state, PE_START_TERMS);
+	if (!ret) {
+		list_splice(parse_state.terms, terms);
+		zfree(&parse_state.terms);
+		return 0;
+	}
+
+	parse_events_terms__delete(parse_state.terms);
+	return ret;
+}
+
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 struct parse_events_error *err)
+{
+	struct parse_events_state parse_state = {
+		.list   = LIST_HEAD_INIT(parse_state.list),
+		.idx    = evlist->nr_entries,
+		.error  = err,
+		.evlist = evlist,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS);
+	perf_pmu__parse_cleanup();
+	if (!ret) {
+		struct perf_evsel *last;
+
+		if (list_empty(&parse_state.list)) {
+			WARN_ONCE(true, "WARNING: event parser found nothing");
+			return -1;
+		}
+
+		perf_evlist__splice_list_tail(evlist, &parse_state.list);
+		evlist->nr_groups += parse_state.nr_groups;
+		last = perf_evlist__last(evlist);
+		last->cmdline_group_boundary = true;
+
+		return 0;
+	}
+
+	/*
+	 * There are 2 users - builtin-record and builtin-test objects.
+	 * Both call perf_evlist__delete in case of error, so we dont
+	 * need to bother.
+	 */
+	return ret;
+}
+
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+	struct winsize ws;
+
+	get_term_dimensions(&ws);
+	return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+void parse_events_print_error(struct parse_events_error *err,
+			      const char *event)
+{
+	const char *str = "invalid or unsupported event: ";
+	char _buf[MAX_WIDTH];
+	char *buf = (char *) event;
+	int idx = 0;
+
+	if (err->str) {
+		/* -2 for extra '' in the final fprintf */
+		int width       = get_term_width() - 2;
+		int len_event   = strlen(event);
+		int len_str, max_len, cut = 0;
+
+		/*
+		 * Maximum error index indent, we will cut
+		 * the event string if it's bigger.
+		 */
+		int max_err_idx = 13;
+
+		/*
+		 * Let's be specific with the message when
+		 * we have the precise error.
+		 */
+		str     = "event syntax error: ";
+		len_str = strlen(str);
+		max_len = width - len_str;
+
+		buf = _buf;
+
+		/* We're cutting from the beginning. */
+		if (err->idx > max_err_idx)
+			cut = err->idx - max_err_idx;
+
+		strncpy(buf, event + cut, max_len);
+
+		/* Mark cut parts with '..' on both sides. */
+		if (cut)
+			buf[0] = buf[1] = '.';
+
+		if ((len_event - cut) > max_len) {
+			buf[max_len - 1] = buf[max_len - 2] = '.';
+			buf[max_len] = 0;
+		}
+
+		idx = len_str + err->idx - cut;
+	}
+
+	fprintf(stderr, "%s'%s'\n", str, buf);
+	if (idx) {
+		fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str);
+		if (err->help)
+			fprintf(stderr, "\n%s\n", err->help);
+		zfree(&err->str);
+		zfree(&err->help);
+	}
+}
+
+#undef MAX_WIDTH
+
+int parse_events_option(const struct option *opt, const char *str,
+			int unset __maybe_unused)
+{
+	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+	struct parse_events_error err = { .idx = 0, };
+	int ret = parse_events(evlist, str, &err);
+
+	if (ret) {
+		parse_events_print_error(&err, str);
+		fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+	}
+
+	return ret;
+}
+
+static int
+foreach_evsel_in_last_glob(struct perf_evlist *evlist,
+			   int (*func)(struct perf_evsel *evsel,
+				       const void *arg),
+			   const void *arg)
+{
+	struct perf_evsel *last = NULL;
+	int err;
+
+	/*
+	 * Don't return when list_empty, give func a chance to report
+	 * error when it found last == NULL.
+	 *
+	 * So no need to WARN here, let *func do this.
+	 */
+	if (evlist->nr_entries > 0)
+		last = perf_evlist__last(evlist);
+
+	do {
+		err = (*func)(last, arg);
+		if (err)
+			return -1;
+		if (!last)
+			return 0;
+
+		if (last->node.prev == &evlist->entries)
+			return 0;
+		last = list_entry(last->node.prev, struct perf_evsel, node);
+	} while (!last->cmdline_group_boundary);
+
+	return 0;
+}
+
+static int set_filter(struct perf_evsel *evsel, const void *arg)
+{
+	const char *str = arg;
+	bool found = false;
+	int nr_addr_filters = 0;
+	struct perf_pmu *pmu = NULL;
+
+	if (evsel == NULL)
+		goto err;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+			fprintf(stderr,
+				"not enough memory to hold filter string\n");
+			return -1;
+		}
+
+		return 0;
+	}
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->attr.type) {
+			found = true;
+			break;
+		}
+
+	if (found)
+		perf_pmu__scan_file(pmu, "nr_addr_filters",
+				    "%d", &nr_addr_filters);
+
+	if (!nr_addr_filters)
+		goto err;
+
+	if (perf_evsel__append_addr_filter(evsel, str) < 0) {
+		fprintf(stderr,
+			"not enough memory to hold filter string\n");
+		return -1;
+	}
+
+	return 0;
+
+err:
+	fprintf(stderr,
+		"--filter option should follow a -e tracepoint or HW tracer option\n");
+
+	return -1;
+}
+
+int parse_filter(const struct option *opt, const char *str,
+		 int unset __maybe_unused)
+{
+	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+
+	return foreach_evsel_in_last_glob(evlist, set_filter,
+					  (const void *)str);
+}
+
+static int add_exclude_perf_filter(struct perf_evsel *evsel,
+				   const void *arg __maybe_unused)
+{
+	char new_filter[64];
+
+	if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+		fprintf(stderr,
+			"--exclude-perf option should follow a -e tracepoint option\n");
+		return -1;
+	}
+
+	snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
+
+	if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
+		fprintf(stderr,
+			"not enough memory to hold filter string\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int exclude_perf(const struct option *opt,
+		 const char *arg __maybe_unused,
+		 int unset __maybe_unused)
+{
+	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+
+	return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
+					  NULL);
+}
+
+static const char * const event_type_descriptors[] = {
+	"Hardware event",
+	"Software event",
+	"Tracepoint event",
+	"Hardware cache event",
+	"Raw hardware event descriptor",
+	"Hardware breakpoint",
+};
+
+static int cmp_string(const void *a, const void *b)
+{
+	const char * const *as = a;
+	const char * const *bs = b;
+
+	return strcmp(*as, *bs);
+}
+
+/*
+ * Print the events from <debugfs_mount_point>/tracing/events
+ */
+
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
+			     bool name_only)
+{
+	DIR *sys_dir, *evt_dir;
+	struct dirent *sys_dirent, *evt_dirent;
+	char evt_path[MAXPATHLEN];
+	char dir_path[MAXPATHLEN];
+	char **evt_list = NULL;
+	unsigned int evt_i = 0, evt_num = 0;
+	bool evt_num_known = false;
+
+restart:
+	sys_dir = opendir(tracing_events_path);
+	if (!sys_dir)
+		return;
+
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_close_sys_dir;
+	}
+
+	for_each_subsystem(sys_dir, sys_dirent) {
+		if (subsys_glob != NULL &&
+		    !strglobmatch(sys_dirent->d_name, subsys_glob))
+			continue;
+
+		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
+			 sys_dirent->d_name);
+		evt_dir = opendir(dir_path);
+		if (!evt_dir)
+			continue;
+
+		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+			if (event_glob != NULL &&
+			    !strglobmatch(evt_dirent->d_name, event_glob))
+				continue;
+
+			if (!evt_num_known) {
+				evt_num++;
+				continue;
+			}
+
+			snprintf(evt_path, MAXPATHLEN, "%s:%s",
+				 sys_dirent->d_name, evt_dirent->d_name);
+
+			evt_list[evt_i] = strdup(evt_path);
+			if (evt_list[evt_i] == NULL)
+				goto out_close_evt_dir;
+			evt_i++;
+		}
+		closedir(evt_dir);
+	}
+	closedir(sys_dir);
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++],
+				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+	}
+	if (evt_num && pager_in_use())
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return;
+
+out_close_evt_dir:
+	closedir(evt_dir);
+out_close_sys_dir:
+	closedir(sys_dir);
+
+	printf("FATAL: not enough memory to print %s\n",
+			event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+	if (evt_list)
+		goto out_free;
+}
+
+/*
+ * Check whether event is in <debugfs_mount_point>/tracing/events
+ */
+
+int is_valid_tracepoint(const char *event_string)
+{
+	DIR *sys_dir, *evt_dir;
+	struct dirent *sys_dirent, *evt_dirent;
+	char evt_path[MAXPATHLEN];
+	char dir_path[MAXPATHLEN];
+
+	sys_dir = opendir(tracing_events_path);
+	if (!sys_dir)
+		return 0;
+
+	for_each_subsystem(sys_dir, sys_dirent) {
+
+		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
+			 sys_dirent->d_name);
+		evt_dir = opendir(dir_path);
+		if (!evt_dir)
+			continue;
+
+		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+			snprintf(evt_path, MAXPATHLEN, "%s:%s",
+				 sys_dirent->d_name, evt_dirent->d_name);
+			if (!strcmp(evt_path, event_string)) {
+				closedir(evt_dir);
+				closedir(sys_dir);
+				return 1;
+			}
+		}
+		closedir(evt_dir);
+	}
+	closedir(sys_dir);
+	return 0;
+}
+
+static bool is_event_supported(u8 type, unsigned config)
+{
+	bool ret = true;
+	int open_return;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = type,
+		.config = config,
+		.disabled = 1,
+	};
+	struct thread_map *tmap = thread_map__new_by_tid(0);
+
+	if (tmap == NULL)
+		return false;
+
+	evsel = perf_evsel__new(&attr);
+	if (evsel) {
+		open_return = perf_evsel__open(evsel, NULL, tmap);
+		ret = open_return >= 0;
+
+		if (open_return == -EACCES) {
+			/*
+			 * This happens if the paranoid value
+			 * /proc/sys/kernel/perf_event_paranoid is set to 2
+			 * Re-run with exclude_kernel set; we don't do that
+			 * by default as some ARM machines do not support it.
+			 *
+			 */
+			evsel->attr.exclude_kernel = 1;
+			ret = perf_evsel__open(evsel, NULL, tmap) >= 0;
+		}
+		perf_evsel__delete(evsel);
+	}
+
+	return ret;
+}
+
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+		      bool name_only)
+{
+	struct probe_cache *pcache;
+	struct probe_cache_entry *ent;
+	struct strlist *bidlist, *sdtlist;
+	struct strlist_config cfg = {.dont_dupstr = true};
+	struct str_node *nd, *nd2;
+	char *buf, *path, *ptr = NULL;
+	bool show_detail = false;
+	int ret;
+
+	sdtlist = strlist__new(NULL, &cfg);
+	if (!sdtlist) {
+		pr_debug("Failed to allocate new strlist for SDT\n");
+		return;
+	}
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: %d\n", errno);
+		return;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		pcache = probe_cache__new(nd->s, NULL);
+		if (!pcache)
+			continue;
+		list_for_each_entry(ent, &pcache->entries, node) {
+			if (!ent->sdt)
+				continue;
+			if (subsys_glob &&
+			    !strglobmatch(ent->pev.group, subsys_glob))
+				continue;
+			if (event_glob &&
+			    !strglobmatch(ent->pev.event, event_glob))
+				continue;
+			ret = asprintf(&buf, "%s:%s@%s", ent->pev.group,
+					ent->pev.event, nd->s);
+			if (ret > 0)
+				strlist__add(sdtlist, buf);
+		}
+		probe_cache__delete(pcache);
+	}
+	strlist__delete(bidlist);
+
+	strlist__for_each_entry(nd, sdtlist) {
+		buf = strchr(nd->s, '@');
+		if (buf)
+			*(buf++) = '\0';
+		if (name_only) {
+			printf("%s ", nd->s);
+			continue;
+		}
+		nd2 = strlist__next(nd);
+		if (nd2) {
+			ptr = strchr(nd2->s, '@');
+			if (ptr)
+				*ptr = '\0';
+			if (strcmp(nd->s, nd2->s) == 0)
+				show_detail = true;
+		}
+		if (show_detail) {
+			path = build_id_cache__origname(buf);
+			ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf);
+			if (ret > 0) {
+				printf("  %-50s [%s]\n", buf, "SDT event");
+				free(buf);
+			}
+		} else
+			printf("  %-50s [%s]\n", nd->s, "SDT event");
+		if (nd2) {
+			if (strcmp(nd->s, nd2->s) != 0)
+				show_detail = false;
+			if (ptr)
+				*ptr = '@';
+		}
+	}
+	strlist__delete(sdtlist);
+}
+
+int print_hwcache_events(const char *event_glob, bool name_only)
+{
+	unsigned int type, op, i, evt_i = 0, evt_num = 0;
+	char name[64];
+	char **evt_list = NULL;
+	bool evt_num_known = false;
+
+restart:
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_enomem;
+	}
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				if (event_glob != NULL && !strglobmatch(name, event_glob))
+					continue;
+
+				if (!is_event_supported(PERF_TYPE_HW_CACHE,
+							type | (op << 8) | (i << 16)))
+					continue;
+
+				if (!evt_num_known) {
+					evt_num++;
+					continue;
+				}
+
+				evt_list[evt_i] = strdup(name);
+				if (evt_list[evt_i] == NULL)
+					goto out_enomem;
+				evt_i++;
+			}
+		}
+	}
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++],
+				event_type_descriptors[PERF_TYPE_HW_CACHE]);
+	}
+	if (evt_num && pager_in_use())
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return evt_num;
+
+out_enomem:
+	printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]);
+	if (evt_list)
+		goto out_free;
+	return evt_num;
+}
+
+void print_symbol_events(const char *event_glob, unsigned type,
+				struct event_symbol *syms, unsigned max,
+				bool name_only)
+{
+	unsigned int i, evt_i = 0, evt_num = 0;
+	char name[MAX_NAME_LEN];
+	char **evt_list = NULL;
+	bool evt_num_known = false;
+
+restart:
+	if (evt_num_known) {
+		evt_list = zalloc(sizeof(char *) * evt_num);
+		if (!evt_list)
+			goto out_enomem;
+		syms -= max;
+	}
+
+	for (i = 0; i < max; i++, syms++) {
+
+		if (event_glob != NULL && syms->symbol != NULL &&
+		    !(strglobmatch(syms->symbol, event_glob) ||
+		      (syms->alias && strglobmatch(syms->alias, event_glob))))
+			continue;
+
+		if (!is_event_supported(type, i))
+			continue;
+
+		if (!evt_num_known) {
+			evt_num++;
+			continue;
+		}
+
+		if (!name_only && strlen(syms->alias))
+			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strncpy(name, syms->symbol, MAX_NAME_LEN);
+
+		evt_list[evt_i] = strdup(name);
+		if (evt_list[evt_i] == NULL)
+			goto out_enomem;
+		evt_i++;
+	}
+
+	if (!evt_num_known) {
+		evt_num_known = true;
+		goto restart;
+	}
+	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+	evt_i = 0;
+	while (evt_i < evt_num) {
+		if (name_only) {
+			printf("%s ", evt_list[evt_i++]);
+			continue;
+		}
+		printf("  %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]);
+	}
+	if (evt_num && pager_in_use())
+		printf("\n");
+
+out_free:
+	evt_num = evt_i;
+	for (evt_i = 0; evt_i < evt_num; evt_i++)
+		zfree(&evt_list[evt_i]);
+	zfree(&evt_list);
+	return;
+
+out_enomem:
+	printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]);
+	if (evt_list)
+		goto out_free;
+}
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(const char *event_glob, bool name_only, bool quiet_flag,
+			bool long_desc, bool details_flag)
+{
+	print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
+			    event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
+
+	print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
+			    event_symbols_sw, PERF_COUNT_SW_MAX, name_only);
+
+	print_hwcache_events(event_glob, name_only);
+
+	print_pmu_events(event_glob, name_only, quiet_flag, long_desc,
+			details_flag);
+
+	if (event_glob != NULL)
+		return;
+
+	if (!name_only) {
+		printf("  %-50s [%s]\n",
+		       "rNNN",
+		       event_type_descriptors[PERF_TYPE_RAW]);
+		printf("  %-50s [%s]\n",
+		       "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
+		       event_type_descriptors[PERF_TYPE_RAW]);
+		if (pager_in_use())
+			printf("   (see 'man perf-list' on how to encode it)\n\n");
+
+		printf("  %-50s [%s]\n",
+		       "mem:<addr>[/len][:access]",
+			event_type_descriptors[PERF_TYPE_BREAKPOINT]);
+		if (pager_in_use())
+			printf("\n");
+	}
+
+	print_tracepoint_events(NULL, NULL, name_only);
+
+	print_sdt_events(NULL, NULL, name_only);
+
+	metricgroup__print(true, true, NULL, name_only);
+}
+
+int parse_events__is_hardcoded_term(struct parse_events_term *term)
+{
+	return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
+}
+
+static int new_term(struct parse_events_term **_term,
+		    struct parse_events_term *temp,
+		    char *str, u64 num)
+{
+	struct parse_events_term *term;
+
+	term = malloc(sizeof(*term));
+	if (!term)
+		return -ENOMEM;
+
+	*term = *temp;
+	INIT_LIST_HEAD(&term->list);
+	term->weak = false;
+
+	switch (term->type_val) {
+	case PARSE_EVENTS__TERM_TYPE_NUM:
+		term->val.num = num;
+		break;
+	case PARSE_EVENTS__TERM_TYPE_STR:
+		term->val.str = str;
+		break;
+	default:
+		free(term);
+		return -EINVAL;
+	}
+
+	*_term = term;
+	return 0;
+}
+
+int parse_events_term__num(struct parse_events_term **term,
+			   int type_term, char *config, u64 num,
+			   bool no_value,
+			   void *loc_term_, void *loc_val_)
+{
+	YYLTYPE *loc_term = loc_term_;
+	YYLTYPE *loc_val = loc_val_;
+
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = type_term,
+		.config    = config,
+		.no_value  = no_value,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, NULL, num);
+}
+
+int parse_events_term__str(struct parse_events_term **term,
+			   int type_term, char *config, char *str,
+			   void *loc_term_, void *loc_val_)
+{
+	YYLTYPE *loc_term = loc_term_;
+	YYLTYPE *loc_val = loc_val_;
+
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = type_term,
+		.config    = config,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, str, 0);
+}
+
+int parse_events_term__sym_hw(struct parse_events_term **term,
+			      char *config, unsigned idx)
+{
+	struct event_symbol *sym;
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+		.config    = config ?: (char *) "event",
+	};
+
+	BUG_ON(idx >= PERF_COUNT_HW_MAX);
+	sym = &event_symbols_hw[idx];
+
+	return new_term(term, &temp, (char *) sym->symbol, 0);
+}
+
+int parse_events_term__clone(struct parse_events_term **new,
+			     struct parse_events_term *term)
+{
+	struct parse_events_term temp = {
+		.type_val  = term->type_val,
+		.type_term = term->type_term,
+		.config    = term->config,
+		.err_term  = term->err_term,
+		.err_val   = term->err_val,
+	};
+
+	return new_term(new, &temp, term->val.str, term->val.num);
+}
+
+int parse_events_copy_term_list(struct list_head *old,
+				 struct list_head **new)
+{
+	struct parse_events_term *term, *n;
+	int ret;
+
+	if (!old) {
+		*new = NULL;
+		return 0;
+	}
+
+	*new = malloc(sizeof(struct list_head));
+	if (!*new)
+		return -ENOMEM;
+	INIT_LIST_HEAD(*new);
+
+	list_for_each_entry (term, old, list) {
+		ret = parse_events_term__clone(&n, term);
+		if (ret)
+			return ret;
+		list_add_tail(&n->list, *new);
+	}
+	return 0;
+}
+
+void parse_events_terms__purge(struct list_head *terms)
+{
+	struct parse_events_term *term, *h;
+
+	list_for_each_entry_safe(term, h, terms, list) {
+		if (term->array.nr_ranges)
+			zfree(&term->array.ranges);
+		list_del_init(&term->list);
+		free(term);
+	}
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+	if (!terms)
+		return;
+	parse_events_terms__purge(terms);
+	free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+	zfree(&a->ranges);
+}
+
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+			       int idx, const char *str)
+{
+	struct parse_events_error *err = parse_state->error;
+
+	if (!err)
+		return;
+	err->idx = idx;
+	err->str = strdup(str);
+	WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
+}
+
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+	int i;
+	bool first = true;
+
+	buf[0] = '\0';
+	for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+		const char *name = config_term_names[i];
+
+		if (!config_term_avail(i, NULL))
+			continue;
+		if (!name)
+			continue;
+		if (name[0] == '<')
+			continue;
+
+		if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+			return;
+
+		if (!first)
+			strcat(buf, ",");
+		else
+			first = false;
+		strcat(buf, name);
+	}
+}
+
+/*
+ * Return string contains valid config terms of an event.
+ * @additional_terms: For terms such as PMU sysfs terms.
+ */
+char *parse_events_formats_error_string(char *additional_terms)
+{
+	char *str;
+	/* "no-overwrite" is the longest name */
+	char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+			  (sizeof("no-overwrite") - 1)];
+
+	config_terms_list(static_terms, sizeof(static_terms));
+	/* valid terms */
+	if (additional_terms) {
+		if (asprintf(&str, "valid terms: %s,%s",
+			     additional_terms, static_terms) < 0)
+			goto fail;
+	} else {
+		if (asprintf(&str, "valid terms: %s", static_terms) < 0)
+			goto fail;
+	}
+	return str;
+
+fail:
+	return NULL;
+}
diff --git a/util/parse-events.h b/util/parse-events.h
new file mode 100644
index 0000000..5015cfd
--- /dev/null
+++ b/util/parse-events.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PARSE_EVENTS_H
+#define __PERF_PARSE_EVENTS_H
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <string.h>
+
+struct list_head;
+struct perf_evsel;
+struct perf_evlist;
+struct parse_events_error;
+
+struct option;
+
+struct tracepoint_path {
+	char *system;
+	char *name;
+	struct tracepoint_path *next;
+};
+
+struct tracepoint_path *tracepoint_id_to_path(u64 config);
+struct tracepoint_path *tracepoint_name_to_path(const char *name);
+bool have_tracepoints(struct list_head *evlist);
+
+const char *event_type(int type);
+
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 struct parse_events_error *error);
+int parse_events_terms(struct list_head *terms, const char *str);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
+
+#define EVENTS_HELP_MAX (128*1024)
+
+enum perf_pmu_event_symbol_type {
+	PMU_EVENT_SYMBOL_ERR,		/* not a PMU EVENT */
+	PMU_EVENT_SYMBOL,		/* normal style PMU event */
+	PMU_EVENT_SYMBOL_PREFIX,	/* prefix of pre-suf style event */
+	PMU_EVENT_SYMBOL_SUFFIX,	/* suffix of pre-suf style event */
+};
+
+struct perf_pmu_event_symbol {
+	char	*symbol;
+	enum perf_pmu_event_symbol_type	type;
+};
+
+enum {
+	PARSE_EVENTS__TERM_TYPE_NUM,
+	PARSE_EVENTS__TERM_TYPE_STR,
+};
+
+enum {
+	PARSE_EVENTS__TERM_TYPE_USER,
+	PARSE_EVENTS__TERM_TYPE_CONFIG,
+	PARSE_EVENTS__TERM_TYPE_CONFIG1,
+	PARSE_EVENTS__TERM_TYPE_CONFIG2,
+	PARSE_EVENTS__TERM_TYPE_NAME,
+	PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
+	PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
+	PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+	PARSE_EVENTS__TERM_TYPE_TIME,
+	PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
+	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
+	PARSE_EVENTS__TERM_TYPE_INHERIT,
+	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+	__PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+	size_t nr_ranges;
+	struct {
+		unsigned int start;
+		size_t length;
+	} *ranges;
+};
+
+struct parse_events_term {
+	char *config;
+	struct parse_events_array array;
+	union {
+		char *str;
+		u64  num;
+	} val;
+	int type_val;
+	int type_term;
+	struct list_head list;
+	bool used;
+	bool no_value;
+
+	/* error string indexes for within parsed string */
+	int err_term;
+	int err_val;
+
+	/* Coming from implicit alias */
+	bool weak;
+};
+
+struct parse_events_error {
+	int   idx;	/* index in the parsed string */
+	char *str;      /* string to display at the index */
+	char *help;	/* optional help string */
+};
+
+struct parse_events_state {
+	struct list_head	   list;
+	int			   idx;
+	int			   nr_groups;
+	struct parse_events_error *error;
+	struct perf_evlist	  *evlist;
+	struct list_head	  *terms;
+};
+
+void parse_events__shrink_config_terms(void);
+int parse_events__is_hardcoded_term(struct parse_events_term *term);
+int parse_events_term__num(struct parse_events_term **term,
+			   int type_term, char *config, u64 num,
+			   bool novalue,
+			   void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+			   int type_term, char *config, char *str,
+			   void *loc_term, void *loc_val);
+int parse_events_term__sym_hw(struct parse_events_term **term,
+			      char *config, unsigned idx);
+int parse_events_term__clone(struct parse_events_term **new,
+			     struct parse_events_term *term);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
+int parse_events__modifier_event(struct list_head *list, char *str, bool add);
+int parse_events__modifier_group(struct list_head *list, char *event_mod);
+int parse_events_name(struct list_head *list, char *name);
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+				const char *sys, const char *event,
+				struct parse_events_error *error,
+				struct list_head *head_config);
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  char *bpf_file_name,
+			  bool source,
+			  struct list_head *head_config);
+/* Provide this function for perf test */
+struct bpf_object;
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+			      struct list_head *list,
+			      struct bpf_object *obj,
+			      struct list_head *head_config);
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+			     struct list_head *list,
+			     u32 type, u64 config,
+			     struct list_head *head_config);
+int parse_events_add_cache(struct list_head *list, int *idx,
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *error,
+			   struct list_head *head_config);
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
+				void *ptr, char *type, u64 len);
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config, bool auto_merge_stats);
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+			       char *str,
+			       struct list_head **listp);
+
+int parse_events_copy_term_list(struct list_head *old,
+				 struct list_head **new);
+
+enum perf_pmu_event_symbol_type
+perf_pmu__parse_check(const char *name);
+void parse_events__set_leader(char *name, struct list_head *list);
+void parse_events_update_lists(struct list_head *list_event,
+			       struct list_head *list_all);
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+			       int idx, const char *str);
+
+void print_events(const char *event_glob, bool name_only, bool quiet,
+		  bool long_desc, bool details_flag);
+
+struct event_symbol {
+	const char	*symbol;
+	const char	*alias;
+};
+extern struct event_symbol event_symbols_hw[];
+extern struct event_symbol event_symbols_sw[];
+void print_symbol_events(const char *event_glob, unsigned type,
+				struct event_symbol *syms, unsigned max,
+				bool name_only);
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
+			     bool name_only);
+int print_hwcache_events(const char *event_glob, bool name_only);
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+		      bool name_only);
+int is_valid_tracepoint(const char *event_string);
+
+int valid_event_mount(const char *eventfs);
+char *parse_events_formats_error_string(char *additional_terms);
+
+void parse_events_print_error(struct parse_events_error *err,
+			      const char *event);
+
+#ifdef HAVE_LIBELF_SUPPORT
+/*
+ * If the probe point starts with '%',
+ * or starts with "sdt_" and has a ':' but no '=',
+ * then it should be a SDT/cached probe point.
+ */
+static inline bool is_sdt_event(char *str)
+{
+	return (str[0] == '%' ||
+		(!strncmp(str, "sdt_", 4) &&
+		 !!strchr(str, ':') && !strchr(str, '=')));
+}
+#else
+static inline bool is_sdt_event(char *str __maybe_unused)
+{
+	return false;
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
+#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/util/parse-events.l b/util/parse-events.l
new file mode 100644
index 0000000..a1a01b1
--- /dev/null
+++ b/util/parse-events.l
@@ -0,0 +1,362 @@
+
+%option reentrant
+%option bison-bridge
+%option prefix="parse_events_"
+%option stack
+%option bison-locations
+%option yylineno
+%option reject
+
+%{
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "../perf.h"
+#include "parse-events.h"
+#include "parse-events-bison.h"
+
+char *parse_events_get_text(yyscan_t yyscanner);
+YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+	u64 num;
+
+	errno = 0;
+	num = strtoull(str, NULL, base);
+	if (errno)
+		return PE_ERROR;
+
+	yylval->num = num;
+	return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text, base, PE_VALUE);
+}
+
+static int raw(yyscan_t scanner)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text + 1, 16, PE_RAW);
+}
+
+static int str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	yylval->str = strdup(text);
+	return token;
+}
+
+static bool isbpf_suffix(char *text)
+{
+	int len = strlen(text);
+
+	if (len < 2)
+		return false;
+	if ((text[len - 1] == 'c' || text[len - 1] == 'o') &&
+	    text[len - 2] == '.')
+		return true;
+	if (len > 4 && !strcmp(text + len - 4, ".obj"))
+		return true;
+	return false;
+}
+
+static bool isbpf(yyscan_t scanner)
+{
+	char *text = parse_events_get_text(scanner);
+	struct stat st;
+
+	if (!isbpf_suffix(text))
+		return false;
+
+	return stat(text, &st) == 0;
+}
+
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * 	@cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison.  In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	/* Strip off the '@' */
+	yylval->str = strdup(text + 1);
+	return token;
+}
+
+#define REWIND(__alloc)				\
+do {								\
+	YYSTYPE *__yylval = parse_events_get_lval(yyscanner);	\
+	char *text = parse_events_get_text(yyscanner);		\
+								\
+	if (__alloc)						\
+		__yylval->str = strdup(text);			\
+								\
+	yycolumn -= strlen(text);				\
+	yyless(0);						\
+} while (0)
+
+static int pmu_str_check(yyscan_t scanner)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	yylval->str = strdup(text);
+	switch (perf_pmu__parse_check(text)) {
+		case PMU_EVENT_SYMBOL_PREFIX:
+			return PE_PMU_EVENT_PRE;
+		case PMU_EVENT_SYMBOL_SUFFIX:
+			return PE_PMU_EVENT_SUF;
+		case PMU_EVENT_SYMBOL:
+			return PE_KERNEL_PMU_EVENT;
+		default:
+			return PE_NAME;
+	}
+}
+
+static int sym(yyscan_t scanner, int type, int config)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = (type << 16) + config;
+	return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
+}
+
+static int term(yyscan_t scanner, int type)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = type;
+	return PE_TERM;
+}
+
+#define YY_USER_ACTION					\
+do {							\
+	yylloc->last_column  = yylloc->first_column;	\
+	yylloc->first_column = yycolumn;		\
+	yycolumn += yyleng;				\
+} while (0);
+
+#define USER_REJECT		\
+	yycolumn -= yyleng;	\
+	REJECT
+
+%}
+
+%x mem
+%s config
+%x event
+%x array
+
+group		[^,{}/]*[{][^}]*[}][^,{}/]*
+event_pmu	[^,{}/]+[/][^/]*[/][^,{}/]*
+event		[^,{}/]+
+bpf_object	[^,{}]+\.(o|bpf)[a-zA-Z0-9._]*
+bpf_source	[^,{}]+\.c[a-zA-Z0-9._]*
+
+num_dec		[0-9]+
+num_hex		0x[a-fA-F0-9]+
+num_raw_hex	[a-fA-F0-9]+
+name		[a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
+name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term	[a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
+/* If you add a modifier you need to update check_modifier() */
+modifier_event	[ukhpPGHSDIW]+
+modifier_bp	[rwx]{1,3}
+
+%%
+
+%{
+	{
+		int start_token;
+
+		start_token = parse_events_get_extra(yyscanner);
+
+		if (start_token == PE_START_TERMS)
+			BEGIN(config);
+		else if (start_token == PE_START_EVENTS)
+			BEGIN(event);
+
+		if (start_token) {
+			parse_events_set_extra(NULL, yyscanner);
+			/*
+			 * The flex parser does not init locations variable
+			 * via the scan_string interface, so we need do the
+			 * init in here.
+			 */
+			yycolumn = 0;
+			return start_token;
+		}
+         }
+%}
+
+<event>{
+
+{group}		{
+			BEGIN(INITIAL);
+			REWIND(0);
+		}
+
+{event_pmu}	|
+{bpf_object}	|
+{bpf_source}	|
+{event}		{
+			BEGIN(INITIAL);
+			REWIND(1);
+			return PE_EVENT_NAME;
+		}
+
+<<EOF>>		{
+			BEGIN(INITIAL);
+			REWIND(0);
+		}
+
+}
+
+<array>{
+"]"			{ BEGIN(config); return ']'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+,			{ return ','; }
+"\.\.\."		{ return PE_ARRAY_RANGE; }
+}
+
+<config>{
+	/*
+	 * Please update config_term_names when new static term is added.
+	 */
+config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+freq			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
+branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+inherit			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
+no-inherit		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
+overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
+no-overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
+,			{ return ','; }
+"/"			{ BEGIN(INITIAL); return '/'; }
+{name_minus}		{ return str(yyscanner, PE_NAME); }
+\[all\]			{ return PE_ARRAY_ALL; }
+"["			{ BEGIN(array); return '['; }
+@{drv_cfg_term}		{ return drv_str(yyscanner, PE_DRV_CFG_TERM); }
+}
+
+<mem>{
+{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
+:			{ return ':'; }
+"/"			{ return '/'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+	/*
+	 * We need to separate 'mem:' scanner part, in order to get specific
+	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME
+	 * and we'd need to parse it manually. During the escape from <mem>
+	 * state we need to put the escaping char back, so we dont miss it.
+	 */
+.			{ unput(*yytext); BEGIN(INITIAL); }
+	/*
+	 * We destroy the scanner after reaching EOF,
+	 * but anyway just to be sure get back to INIT state.
+	 */
+<<EOF>>			{ BEGIN(INITIAL); }
+}
+
+cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches			{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+duration_time					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+
+	/*
+	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
+	 * Because the prefix cycles is mixed up with cpu-cycles.
+	 * loads and stores are mixed up with cache event
+	 */
+cycles-ct					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+cycles-t					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+mem-loads					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+mem-stores					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+					{ return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+
+L1-dcache|l1-d|l1d|L1-data		|
+L1-icache|l1-i|l1i|L1-instruction	|
+LLC|L2					|
+dTLB|d-tlb|Data-TLB			|
+iTLB|i-tlb|Instruction-TLB		|
+branch|branches|bpu|btb|bpc		|
+node					{ return str(yyscanner, PE_NAME_CACHE_TYPE); }
+
+load|loads|read				|
+store|stores|write			|
+prefetch|prefetches			|
+speculative-read|speculative-load	|
+refs|Reference|ops|access		|
+misses|miss				{ return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
+
+mem:			{ BEGIN(mem); return PE_PREFIX_MEM; }
+r{num_raw_hex}		{ return raw(yyscanner); }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+
+{modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); }
+{bpf_object}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
+{bpf_source}		{ if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
+{name}			{ return pmu_str_check(yyscanner); }
+"/"			{ BEGIN(config); return '/'; }
+-			{ return '-'; }
+,			{ BEGIN(event); return ','; }
+:			{ return ':'; }
+"{"			{ BEGIN(event); return '{'; }
+"}"			{ return '}'; }
+=			{ return '='; }
+\n			{ }
+.			{ }
+
+%%
+
+int parse_events_wrap(void *scanner __maybe_unused)
+{
+	return 1;
+}
diff --git a/util/parse-events.y b/util/parse-events.y
new file mode 100644
index 0000000..7afeb80
--- /dev/null
+++ b/util/parse-events.y
@@ -0,0 +1,697 @@
+%pure-parser
+%parse-param {void *_parse_state}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+%locations
+
+%{
+
+#define YYDEBUG 1
+
+#include <fnmatch.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include "util.h"
+#include "pmu.h"
+#include "debug.h"
+#include "parse-events.h"
+#include "parse-events-bison.h"
+
+void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg);
+
+#define ABORT_ON(val) \
+do { \
+	if (val) \
+		YYABORT; \
+} while (0)
+
+#define ALLOC_LIST(list) \
+do { \
+	list = malloc(sizeof(*list)); \
+	ABORT_ON(!list);              \
+	INIT_LIST_HEAD(list);         \
+} while (0)
+
+static void inc_group_count(struct list_head *list,
+		       struct parse_events_state *parse_state)
+{
+	/* Count groups only have more than 1 members */
+	if (!list_is_last(list->next, list))
+		parse_state->nr_groups++;
+}
+
+%}
+
+%token PE_START_EVENTS PE_START_TERMS
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
+%token PE_EVENT_NAME
+%token PE_NAME
+%token PE_BPF_OBJECT PE_BPF_SOURCE
+%token PE_MODIFIER_EVENT PE_MODIFIER_BP
+%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
+%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
+%token PE_ERROR
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
+%token PE_DRV_CFG_TERM
+%type <num> PE_VALUE
+%type <num> PE_VALUE_SYM_HW
+%type <num> PE_VALUE_SYM_SW
+%type <num> PE_RAW
+%type <num> PE_TERM
+%type <str> PE_NAME
+%type <str> PE_BPF_OBJECT
+%type <str> PE_BPF_SOURCE
+%type <str> PE_NAME_CACHE_TYPE
+%type <str> PE_NAME_CACHE_OP_RESULT
+%type <str> PE_MODIFIER_EVENT
+%type <str> PE_MODIFIER_BP
+%type <str> PE_EVENT_NAME
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_DRV_CFG_TERM
+%type <num> value_sym
+%type <head> event_config
+%type <head> opt_event_config
+%type <term> event_term
+%type <head> event_pmu
+%type <head> event_legacy_symbol
+%type <head> event_legacy_cache
+%type <head> event_legacy_mem
+%type <head> event_legacy_tracepoint
+%type <tracepoint_name> tracepoint_name
+%type <head> event_legacy_numeric
+%type <head> event_legacy_raw
+%type <head> event_bpf_file
+%type <head> event_def
+%type <head> event_mod
+%type <head> event_name
+%type <head> event
+%type <head> events
+%type <head> group_def
+%type <head> group
+%type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
+
+%union
+{
+	char *str;
+	u64 num;
+	struct list_head *head;
+	struct parse_events_term *term;
+	struct tracepoint_name {
+		char *sys;
+		char *event;
+	} tracepoint_name;
+	struct parse_events_array array;
+}
+%%
+
+start:
+PE_START_EVENTS start_events
+|
+PE_START_TERMS  start_terms
+
+start_events: groups
+{
+	struct parse_events_state *parse_state = _parse_state;
+
+	parse_events_update_lists($1, &parse_state->list);
+}
+
+groups:
+groups ',' group
+{
+	struct list_head *list  = $1;
+	struct list_head *group = $3;
+
+	parse_events_update_lists(group, list);
+	$$ = list;
+}
+|
+groups ',' event
+{
+	struct list_head *list  = $1;
+	struct list_head *event = $3;
+
+	parse_events_update_lists(event, list);
+	$$ = list;
+}
+|
+group
+|
+event
+
+group:
+group_def ':' PE_MODIFIER_EVENT
+{
+	struct list_head *list = $1;
+
+	ABORT_ON(parse_events__modifier_group(list, $3));
+	$$ = list;
+}
+|
+group_def
+
+group_def:
+PE_NAME '{' events '}'
+{
+	struct list_head *list = $3;
+
+	inc_group_count(list, _parse_state);
+	parse_events__set_leader($1, list);
+	$$ = list;
+}
+|
+'{' events '}'
+{
+	struct list_head *list = $2;
+
+	inc_group_count(list, _parse_state);
+	parse_events__set_leader(NULL, list);
+	$$ = list;
+}
+
+events:
+events ',' event
+{
+	struct list_head *event = $3;
+	struct list_head *list  = $1;
+
+	parse_events_update_lists(event, list);
+	$$ = list;
+}
+|
+event
+
+event: event_mod
+
+event_mod:
+event_name PE_MODIFIER_EVENT
+{
+	struct list_head *list = $1;
+
+	/*
+	 * Apply modifier on all events added by single event definition
+	 * (there could be more events added for multiple tracepoint
+	 * definitions via '*?'.
+	 */
+	ABORT_ON(parse_events__modifier_event(list, $2, false));
+	$$ = list;
+}
+|
+event_name
+
+event_name:
+PE_EVENT_NAME event_def
+{
+	ABORT_ON(parse_events_name($2, $1));
+	free($1);
+	$$ = $2;
+}
+|
+event_def
+
+event_def: event_pmu |
+	   event_legacy_symbol |
+	   event_legacy_cache sep_dc |
+	   event_legacy_mem |
+	   event_legacy_tracepoint sep_dc |
+	   event_legacy_numeric sep_dc |
+	   event_legacy_raw sep_dc |
+	   event_bpf_file
+
+event_pmu:
+PE_NAME opt_event_config
+{
+	struct list_head *list, *orig_terms, *terms;
+
+	if (parse_events_copy_term_list($2, &orig_terms))
+		YYABORT;
+
+	ALLOC_LIST(list);
+	if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
+		struct perf_pmu *pmu = NULL;
+		int ok = 0;
+		char *pattern;
+
+		if (asprintf(&pattern, "%s*", $1) < 0)
+			YYABORT;
+
+		while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+			char *name = pmu->name;
+
+			if (!strncmp(name, "uncore_", 7) &&
+			    strncmp($1, "uncore_", 7))
+				name += 7;
+			if (!fnmatch(pattern, name, 0)) {
+				if (parse_events_copy_term_list(orig_terms, &terms)) {
+					free(pattern);
+					YYABORT;
+				}
+				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
+					ok++;
+				parse_events_terms__delete(terms);
+			}
+		}
+
+		free(pattern);
+
+		if (!ok)
+			YYABORT;
+	}
+	parse_events_terms__delete($2);
+	parse_events_terms__delete(orig_terms);
+	$$ = list;
+}
+|
+PE_KERNEL_PMU_EVENT sep_dc
+{
+	struct list_head *list;
+
+	if (parse_events_multi_pmu_add(_parse_state, $1, &list) < 0)
+		YYABORT;
+	$$ = list;
+}
+|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
+{
+	struct list_head *list;
+	char pmu_name[128];
+
+	snprintf(&pmu_name, 128, "%s-%s", $1, $3);
+	if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0)
+		YYABORT;
+	$$ = list;
+}
+
+value_sym:
+PE_VALUE_SYM_HW
+|
+PE_VALUE_SYM_SW
+
+event_legacy_symbol:
+value_sym '/' event_config '/'
+{
+	struct list_head *list;
+	int type = $1 >> 16;
+	int config = $1 & 255;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, $3));
+	parse_events_terms__delete($3);
+	$$ = list;
+}
+|
+value_sym sep_slash_dc
+{
+	struct list_head *list;
+	int type = $1 >> 16;
+	int config = $1 & 255;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL));
+	$$ = list;
+}
+
+event_legacy_cache:
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6));
+	parse_events_terms__delete($6);
+	$$ = list;
+}
+|
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4));
+	parse_events_terms__delete($4);
+	$$ = list;
+}
+|
+PE_NAME_CACHE_TYPE opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2));
+	parse_events_terms__delete($2);
+	$$ = list;
+}
+
+event_legacy_mem:
+PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+					     (void *) $2, $6, $4));
+	$$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+					     (void *) $2, NULL, $4));
+	$$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+					     (void *) $2, $4, 0));
+	$$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE sep_dc
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+					     (void *) $2, NULL, 0));
+	$$ = list;
+}
+
+event_legacy_tracepoint:
+tracepoint_name opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	if (error)
+		error->idx = @1.first_column;
+
+	if (parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
+					error, $2))
+		return -1;
+
+	$$ = list;
+}
+
+tracepoint_name:
+PE_NAME '-' PE_NAME ':' PE_NAME
+{
+	char sys_name[128];
+	struct tracepoint_name tracepoint;
+
+	snprintf(&sys_name, 128, "%s-%s", $1, $3);
+	tracepoint.sys = &sys_name;
+	tracepoint.event = $5;
+
+	$$ = tracepoint;
+}
+|
+PE_NAME ':' PE_NAME
+{
+	struct tracepoint_name tracepoint = {$1, $3};
+
+	$$ = tracepoint;
+}
+
+event_legacy_numeric:
+PE_VALUE ':' PE_VALUE opt_event_config
+{
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4));
+	parse_events_terms__delete($4);
+	$$ = list;
+}
+
+event_legacy_raw:
+PE_RAW opt_event_config
+{
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2));
+	parse_events_terms__delete($2);
+	$$ = list;
+}
+
+event_bpf_file:
+PE_BPF_OBJECT opt_event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_load_bpf(parse_state, list, $1, false, $2));
+	parse_events_terms__delete($2);
+	$$ = list;
+}
+|
+PE_BPF_SOURCE opt_event_config
+{
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_load_bpf(_parse_state, list, $1, true, $2));
+	parse_events_terms__delete($2);
+	$$ = list;
+}
+
+opt_event_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+|
+{
+	$$ = NULL;
+}
+
+start_terms: event_config
+{
+	struct parse_events_state *parse_state = _parse_state;
+	parse_state->terms = $1;
+}
+
+event_config:
+event_config ',' event_term
+{
+	struct list_head *head = $1;
+	struct parse_events_term *term = $3;
+
+	ABORT_ON(!head);
+	list_add_tail(&term->list, head);
+	$$ = $1;
+}
+|
+event_term
+{
+	struct list_head *head = malloc(sizeof(*head));
+	struct parse_events_term *term = $1;
+
+	ABORT_ON(!head);
+	INIT_LIST_HEAD(head);
+	list_add_tail(&term->list, head);
+	$$ = head;
+}
+
+event_term:
+PE_NAME '=' PE_NAME
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $3, &@1, &@3));
+	$$ = term;
+}
+|
+PE_NAME '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $3, false, &@1, &@3));
+	$$ = term;
+}
+|
+PE_NAME '=' PE_VALUE_SYM_HW
+{
+	struct parse_events_term *term;
+	int config = $3 & 255;
+
+	ABORT_ON(parse_events_term__sym_hw(&term, $1, config));
+	$$ = term;
+}
+|
+PE_NAME
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, 1, true, &@1, NULL));
+	$$ = term;
+}
+|
+PE_VALUE_SYM_HW
+{
+	struct parse_events_term *term;
+	int config = $1 & 255;
+
+	ABORT_ON(parse_events_term__sym_hw(&term, NULL, config));
+	$$ = term;
+}
+|
+PE_TERM '=' PE_NAME
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3));
+	$$ = term;
+}
+|
+PE_TERM '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
+	$$ = term;
+}
+|
+PE_TERM
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
+	$$ = term;
+}
+|
+PE_NAME array '=' PE_NAME
+{
+	struct parse_events_term *term;
+	int i;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, &@1, &@4));
+
+	term->array = $2;
+	$$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, false, &@1, &@4));
+	term->array = $2;
+	$$ = term;
+}
+|
+PE_DRV_CFG_TERM
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+					$1, $1, &@1, NULL));
+	$$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+	$$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+	$$.nr_ranges = 0;
+	$$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+	struct parse_events_array new_array;
+
+	new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+	new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+				  new_array.nr_ranges);
+	ABORT_ON(!new_array.ranges);
+	memcpy(&new_array.ranges[0], $1.ranges,
+	       $1.nr_ranges * sizeof(new_array.ranges[0]));
+	memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+	       $3.nr_ranges * sizeof(new_array.ranges[0]));
+	free($1.ranges);
+	free($3.ranges);
+	$$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+	struct parse_events_array array;
+
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = 1;
+	$$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+	struct parse_events_array array;
+
+	ABORT_ON($3 < $1);
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = $3 - $1 + 1;
+	$$ = array;
+}
+
+sep_dc: ':' |
+
+sep_slash_dc: '/' | ':' |
+
+%%
+
+void parse_events_error(YYLTYPE *loc, void *parse_state,
+			void *scanner __maybe_unused,
+			char const *msg __maybe_unused)
+{
+	parse_events_evlist_error(parse_state, loc->last_column, "parser error");
+}
diff --git a/util/parse-regs-options.c b/util/parse-regs-options.c
new file mode 100644
index 0000000..e6599e2
--- /dev/null
+++ b/util/parse-regs-options.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-regs-options.h"
+
+int
+parse_regs(const struct option *opt, const char *str, int unset)
+{
+	uint64_t *mode = (uint64_t *)opt->value;
+	const struct sample_reg *r;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+
+	if (unset)
+		return 0;
+
+	/*
+	 * cannot set it twice
+	 */
+	if (*mode)
+		return -1;
+
+	/* str may be NULL in case no arg is passed to -I */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			if (!strcmp(s, "?")) {
+				fprintf(stderr, "available registers: ");
+				for (r = sample_reg_masks; r->name; r++) {
+					fprintf(stderr, "%s ", r->name);
+				}
+				fputc('\n', stderr);
+				/* just printing available regs */
+				return -1;
+			}
+			for (r = sample_reg_masks; r->name; r++) {
+				if (!strcasecmp(s, r->name))
+					break;
+			}
+			if (!r->name) {
+				ui__warning("unknown register %s,"
+					    " check man page\n", s);
+				goto error;
+			}
+
+			*mode |= r->mask;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	/* default to all possible regs */
+	if (*mode == 0)
+		*mode = PERF_REGS_MASK;
+error:
+	free(os);
+	return ret;
+}
diff --git a/util/parse-regs-options.h b/util/parse-regs-options.h
new file mode 100644
index 0000000..cdefb1a
--- /dev/null
+++ b/util/parse-regs-options.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/util/path.c b/util/path.c
new file mode 100644
index 0000000..ca56ba2
--- /dev/null
+++ b/util/path.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ *   f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "cache.h"
+#include "path.h"
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+
+static char bad_path[] = "/bad-path/";
+/*
+ * One hack:
+ */
+static char *get_pathname(void)
+{
+	static char pathname_array[4][PATH_MAX];
+	static int idx;
+
+	return pathname_array[3 & ++idx];
+}
+
+static char *cleanup_path(char *path)
+{
+	/* Clean it up */
+	if (!memcmp(path, "./", 2)) {
+		path += 2;
+		while (*path == '/')
+			path++;
+	}
+	return path;
+}
+
+char *mkpath(const char *fmt, ...)
+{
+	va_list args;
+	unsigned len;
+	char *pathname = get_pathname();
+
+	va_start(args, fmt);
+	len = vsnprintf(pathname, PATH_MAX, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2)
+{
+	return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2);
+}
+
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3)
+{
+	return scnprintf(bf, size, "%s%s%s%s%s", path1, path1[0] ? "/" : "",
+			 path2, path2[0] ? "/" : "", path3);
+}
+
+bool is_regular_file(const char *file)
+{
+	struct stat st;
+
+	if (stat(file, &st))
+		return false;
+
+	return S_ISREG(st.st_mode);
+}
+
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+bool is_directory(const char *base_path, const struct dirent *dent)
+{
+	char path[PATH_MAX];
+	struct stat st;
+
+	sprintf(path, "%s/%s", base_path, dent->d_name);
+	if (stat(path, &st))
+		return false;
+
+	return S_ISDIR(st.st_mode);
+}
diff --git a/util/path.h b/util/path.h
new file mode 100644
index 0000000..f014f90
--- /dev/null
+++ b/util/path.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PATH_H
+#define _PERF_PATH_H
+
+struct dirent;
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2);
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3);
+
+bool is_regular_file(const char *file);
+bool is_directory(const char *base_path, const struct dirent *dent);
+
+#endif /* _PERF_PATH_H */
diff --git a/util/perf-hooks-list.h b/util/perf-hooks-list.h
new file mode 100644
index 0000000..2867c07
--- /dev/null
+++ b/util/perf-hooks-list.h
@@ -0,0 +1,3 @@
+PERF_HOOK(record_start)
+PERF_HOOK(record_end)
+PERF_HOOK(test)
diff --git a/util/perf-hooks.c b/util/perf-hooks.c
new file mode 100644
index 0000000..4f3aa8d
--- /dev/null
+++ b/util/perf-hooks.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf_hooks.c
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/perf-hooks.h"
+
+static sigjmp_buf jmpbuf;
+static const struct perf_hook_desc *current_perf_hook;
+
+void perf_hooks__invoke(const struct perf_hook_desc *desc)
+{
+	if (!(desc && desc->p_hook_func && *desc->p_hook_func))
+		return;
+
+	if (sigsetjmp(jmpbuf, 1)) {
+		pr_warning("Fatal error (SEGFAULT) in perf hook '%s'\n",
+			   desc->hook_name);
+		*(current_perf_hook->p_hook_func) = NULL;
+	} else {
+		current_perf_hook = desc;
+		(**desc->p_hook_func)(desc->hook_ctx);
+	}
+	current_perf_hook = NULL;
+}
+
+void perf_hooks__recover(void)
+{
+	if (current_perf_hook)
+		siglongjmp(jmpbuf, 1);
+}
+
+#define PERF_HOOK(name)					\
+perf_hook_func_t __perf_hook_func_##name = NULL;	\
+struct perf_hook_desc __perf_hook_desc_##name =		\
+	{.hook_name = #name,				\
+	 .p_hook_func = &__perf_hook_func_##name,	\
+	 .hook_ctx = NULL};
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+#define PERF_HOOK(name)		\
+	&__perf_hook_desc_##name,
+
+static struct perf_hook_desc *perf_hooks[] = {
+#include "perf-hooks-list.h"
+};
+#undef PERF_HOOK
+
+int perf_hooks__set_hook(const char *hook_name,
+			 perf_hook_func_t hook_func,
+			 void *hook_ctx)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+		if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+			continue;
+
+		if (*(perf_hooks[i]->p_hook_func))
+			pr_warning("Overwrite existing hook: %s\n", hook_name);
+		*(perf_hooks[i]->p_hook_func) = hook_func;
+		perf_hooks[i]->hook_ctx = hook_ctx;
+		return 0;
+	}
+	return -ENOENT;
+}
+
+perf_hook_func_t perf_hooks__get_hook(const char *hook_name)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+		if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+			continue;
+
+		return *(perf_hooks[i]->p_hook_func);
+	}
+	return ERR_PTR(-ENOENT);
+}
diff --git a/util/perf-hooks.h b/util/perf-hooks.h
new file mode 100644
index 0000000..27fbec6
--- /dev/null
+++ b/util/perf-hooks.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_PERF_HOOKS_H
+#define PERF_UTIL_PERF_HOOKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*perf_hook_func_t)(void *ctx);
+struct perf_hook_desc {
+	const char * const hook_name;
+	perf_hook_func_t * const p_hook_func;
+	void *hook_ctx;
+};
+
+extern void perf_hooks__invoke(const struct perf_hook_desc *);
+extern void perf_hooks__recover(void);
+
+#define PERF_HOOK(name)					\
+extern struct perf_hook_desc __perf_hook_desc_##name;	\
+static inline void perf_hooks__invoke_##name(void)	\
+{ 							\
+	perf_hooks__invoke(&__perf_hook_desc_##name);	\
+}
+
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+extern int
+perf_hooks__set_hook(const char *hook_name,
+		     perf_hook_func_t hook_func,
+		     void *hook_ctx);
+
+extern perf_hook_func_t
+perf_hooks__get_hook(const char *hook_name);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/perf_regs.c b/util/perf_regs.c
new file mode 100644
index 0000000..2acfcc5
--- /dev/null
+++ b/util/perf_regs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include "perf_regs.h"
+#include "event.h"
+
+const struct sample_reg __weak sample_reg_masks[] = {
+	SMPL_REG_END
+};
+
+int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
+				 char **new_op __maybe_unused)
+{
+	return SDT_ARG_SKIP;
+}
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
+{
+	int i, idx = 0;
+	u64 mask = regs->mask;
+
+	if (regs->cache_mask & (1ULL << id))
+		goto out;
+
+	if (!(mask & (1ULL << id)))
+		return -EINVAL;
+
+	for (i = 0; i < id; i++) {
+		if (mask & (1ULL << i))
+			idx++;
+	}
+
+	regs->cache_mask |= (1ULL << id);
+	regs->cache_regs[id] = regs->regs[idx];
+
+out:
+	*valp = regs->cache_regs[id];
+	return 0;
+}
+#endif
diff --git a/util/perf_regs.h b/util/perf_regs.h
new file mode 100644
index 0000000..c9319f8
--- /dev/null
+++ b/util/perf_regs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_REGS_H
+#define __PERF_REGS_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct regs_dump;
+
+struct sample_reg {
+	const char *name;
+	uint64_t mask;
+};
+#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
+#define SMPL_REG_END { .name = NULL }
+
+extern const struct sample_reg sample_reg_masks[];
+
+enum {
+	SDT_ARG_VALID = 0,
+	SDT_ARG_SKIP,
+};
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op);
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+#include <perf_regs.h>
+
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
+
+#else
+#define PERF_REGS_MASK	0
+#define PERF_REGS_MAX	0
+
+static inline const char *perf_reg_name(int id __maybe_unused)
+{
+	return NULL;
+}
+
+static inline int perf_reg_value(u64 *valp __maybe_unused,
+				 struct regs_dump *regs __maybe_unused,
+				 int id __maybe_unused)
+{
+	return 0;
+}
+#endif /* HAVE_PERF_REGS_SUPPORT */
+#endif /* __PERF_REGS_H */
diff --git a/util/pmu.c b/util/pmu.c
new file mode 100644
index 0000000..d2fb597
--- /dev/null
+++ b/util/pmu.c
@@ -0,0 +1,1446 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <dirent.h>
+#include <api/fs/fs.h>
+#include <locale.h>
+#include <regex.h>
+#include "util.h"
+#include "pmu.h"
+#include "parse-events.h"
+#include "cpumap.h"
+#include "header.h"
+#include "pmu-events/pmu-events.h"
+#include "cache.h"
+#include "string2.h"
+
+struct perf_pmu_format {
+	char *name;
+	int value;
+	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+	struct list_head list;
+};
+
+#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
+
+int perf_pmu_parse(struct list_head *list, char *name);
+extern FILE *perf_pmu_in;
+
+static LIST_HEAD(pmus);
+
+/*
+ * Parse & process all the sysfs attributes located under
+ * the directory specified in 'dir' parameter.
+ */
+int perf_pmu__format_parse(char *dir, struct list_head *head)
+{
+	struct dirent *evt_ent;
+	DIR *format_dir;
+	int ret = 0;
+
+	format_dir = opendir(dir);
+	if (!format_dir)
+		return -EINVAL;
+
+	while (!ret && (evt_ent = readdir(format_dir))) {
+		char path[PATH_MAX];
+		char *name = evt_ent->d_name;
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		snprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+		ret = -EINVAL;
+		file = fopen(path, "r");
+		if (!file)
+			break;
+
+		perf_pmu_in = file;
+		ret = perf_pmu_parse(head, name);
+		fclose(file);
+	}
+
+	closedir(format_dir);
+	return ret;
+}
+
+/*
+ * Reading/parsing the default pmu format definition, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes.
+ */
+static int pmu_format(const char *name, struct list_head *format)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return 0;	/* no error if format does not exist */
+
+	if (perf_pmu__format_parse(path, format))
+		return -1;
+
+	return 0;
+}
+
+static int convert_scale(const char *scale, char **end, double *sval)
+{
+	char *lc;
+	int ret = 0;
+
+	/*
+	 * save current locale
+	 */
+	lc = setlocale(LC_NUMERIC, NULL);
+
+	/*
+	 * The lc string may be allocated in static storage,
+	 * so get a dynamic copy to make it survive setlocale
+	 * call below.
+	 */
+	lc = strdup(lc);
+	if (!lc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * force to C locale to ensure kernel
+	 * scale string is converted correctly.
+	 * kernel uses default C locale.
+	 */
+	setlocale(LC_NUMERIC, "C");
+
+	*sval = strtod(scale, end);
+
+out:
+	/* restore locale */
+	setlocale(LC_NUMERIC, lc);
+	free(lc);
+	return ret;
+}
+
+static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+	struct stat st;
+	ssize_t sret;
+	char scale[128];
+	int fd, ret = -1;
+	char path[PATH_MAX];
+
+	snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	if (fstat(fd, &st) < 0)
+		goto error;
+
+	sret = read(fd, scale, sizeof(scale)-1);
+	if (sret < 0)
+		goto error;
+
+	if (scale[sret - 1] == '\n')
+		scale[sret - 1] = '\0';
+	else
+		scale[sret] = '\0';
+
+	ret = convert_scale(scale, NULL, &alias->scale);
+error:
+	close(fd);
+	return ret;
+}
+
+static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+	char path[PATH_MAX];
+	ssize_t sret;
+	int fd;
+
+	snprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	sret = read(fd, alias->unit, UNIT_MAX_LEN);
+	if (sret < 0)
+		goto error;
+
+	close(fd);
+
+	if (alias->unit[sret - 1] == '\n')
+		alias->unit[sret - 1] = '\0';
+	else
+		alias->unit[sret] = '\0';
+
+	return 0;
+error:
+	close(fd);
+	alias->unit[0] = '\0';
+	return -1;
+}
+
+static int
+perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+	char path[PATH_MAX];
+	int fd;
+
+	snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	close(fd);
+
+	alias->per_pkg = true;
+	return 0;
+}
+
+static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
+				    char *dir, char *name)
+{
+	char path[PATH_MAX];
+	int fd;
+
+	snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	alias->snapshot = true;
+	close(fd);
+	return 0;
+}
+
+static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
+				 char *desc, char *val,
+				 char *long_desc, char *topic,
+				 char *unit, char *perpkg,
+				 char *metric_expr,
+				 char *metric_name)
+{
+	struct perf_pmu_alias *alias;
+	int ret;
+	int num;
+
+	alias = malloc(sizeof(*alias));
+	if (!alias)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&alias->terms);
+	alias->scale = 1.0;
+	alias->unit[0] = '\0';
+	alias->per_pkg = false;
+	alias->snapshot = false;
+
+	ret = parse_events_terms(&alias->terms, val);
+	if (ret) {
+		pr_err("Cannot parse alias %s: %d\n", val, ret);
+		free(alias);
+		return ret;
+	}
+
+	alias->name = strdup(name);
+	if (dir) {
+		/*
+		 * load unit name and scale if available
+		 */
+		perf_pmu__parse_unit(alias, dir, name);
+		perf_pmu__parse_scale(alias, dir, name);
+		perf_pmu__parse_per_pkg(alias, dir, name);
+		perf_pmu__parse_snapshot(alias, dir, name);
+	}
+
+	alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL;
+	alias->metric_name = metric_name ? strdup(metric_name): NULL;
+	alias->desc = desc ? strdup(desc) : NULL;
+	alias->long_desc = long_desc ? strdup(long_desc) :
+				desc ? strdup(desc) : NULL;
+	alias->topic = topic ? strdup(topic) : NULL;
+	if (unit) {
+		if (convert_scale(unit, &unit, &alias->scale) < 0)
+			return -1;
+		snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
+	}
+	alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1;
+	alias->str = strdup(val);
+
+	list_add_tail(&alias->list, list);
+
+	return 0;
+}
+
+static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file)
+{
+	char buf[256];
+	int ret;
+
+	ret = fread(buf, 1, sizeof(buf), file);
+	if (ret == 0)
+		return -EINVAL;
+
+	buf[ret] = 0;
+
+	return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL,
+				     NULL, NULL, NULL);
+}
+
+static inline bool pmu_alias_info_file(char *name)
+{
+	size_t len;
+
+	len = strlen(name);
+	if (len > 5 && !strcmp(name + len - 5, ".unit"))
+		return true;
+	if (len > 6 && !strcmp(name + len - 6, ".scale"))
+		return true;
+	if (len > 8 && !strcmp(name + len - 8, ".per-pkg"))
+		return true;
+	if (len > 9 && !strcmp(name + len - 9, ".snapshot"))
+		return true;
+
+	return false;
+}
+
+/*
+ * Process all the sysfs attributes located under the directory
+ * specified in 'dir' parameter.
+ */
+static int pmu_aliases_parse(char *dir, struct list_head *head)
+{
+	struct dirent *evt_ent;
+	DIR *event_dir;
+
+	event_dir = opendir(dir);
+	if (!event_dir)
+		return -EINVAL;
+
+	while ((evt_ent = readdir(event_dir))) {
+		char path[PATH_MAX];
+		char *name = evt_ent->d_name;
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		/*
+		 * skip info files parsed in perf_pmu__new_alias()
+		 */
+		if (pmu_alias_info_file(name))
+			continue;
+
+		scnprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+		file = fopen(path, "r");
+		if (!file) {
+			pr_debug("Cannot open %s\n", path);
+			continue;
+		}
+
+		if (perf_pmu__new_alias(head, dir, name, file) < 0)
+			pr_debug("Cannot set up %s\n", name);
+		fclose(file);
+	}
+
+	closedir(event_dir);
+	return 0;
+}
+
+/*
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
+ */
+static int pmu_aliases(const char *name, struct list_head *head)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s/bus/event_source/devices/%s/events", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return 0;	 /* no error if 'events' does not exist */
+
+	if (pmu_aliases_parse(path, head))
+		return -1;
+
+	return 0;
+}
+
+static int pmu_alias_terms(struct perf_pmu_alias *alias,
+			   struct list_head *terms)
+{
+	struct parse_events_term *term, *cloned;
+	LIST_HEAD(list);
+	int ret;
+
+	list_for_each_entry(term, &alias->terms, list) {
+		ret = parse_events_term__clone(&cloned, term);
+		if (ret) {
+			parse_events_terms__purge(&list);
+			return ret;
+		}
+		/*
+		 * Weak terms don't override command line options,
+		 * which we don't want for implicit terms in aliases.
+		 */
+		cloned->weak = true;
+		list_add_tail(&cloned->list, &list);
+	}
+	list_splice(&list, terms);
+	return 0;
+}
+
+/*
+ * Reading/parsing the default pmu type value, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/type as sysfs attribute.
+ */
+static int pmu_type(const char *name, __u32 *type)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	FILE *file;
+	int ret = 0;
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return -1;
+
+	file = fopen(path, "r");
+	if (!file)
+		return -EINVAL;
+
+	if (1 != fscanf(file, "%u", type))
+		ret = -1;
+
+	fclose(file);
+	return ret;
+}
+
+/* Add all pmus in sysfs to pmu list: */
+static void pmu_read_sysfs(void)
+{
+	char path[PATH_MAX];
+	DIR *dir;
+	struct dirent *dent;
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
+
+	dir = opendir(path);
+	if (!dir)
+		return;
+
+	while ((dent = readdir(dir))) {
+		if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+			continue;
+		/* add to static LIST_HEAD(pmus): */
+		perf_pmu__find(dent->d_name);
+	}
+
+	closedir(dir);
+}
+
+static struct cpu_map *__pmu_cpumask(const char *path)
+{
+	FILE *file;
+	struct cpu_map *cpus;
+
+	file = fopen(path, "r");
+	if (!file)
+		return NULL;
+
+	cpus = cpu_map__read(file);
+	fclose(file);
+	return cpus;
+}
+
+/*
+ * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
+ * may have a "cpus" file.
+ */
+#define CPUS_TEMPLATE_UNCORE	"%s/bus/event_source/devices/%s/cpumask"
+#define CPUS_TEMPLATE_CPU	"%s/bus/event_source/devices/%s/cpus"
+
+static struct cpu_map *pmu_cpumask(const char *name)
+{
+	char path[PATH_MAX];
+	struct cpu_map *cpus;
+	const char *sysfs = sysfs__mountpoint();
+	const char *templates[] = {
+		CPUS_TEMPLATE_UNCORE,
+		CPUS_TEMPLATE_CPU,
+		NULL
+	};
+	const char **template;
+
+	if (!sysfs)
+		return NULL;
+
+	for (template = templates; *template; template++) {
+		snprintf(path, PATH_MAX, *template, sysfs, name);
+		cpus = __pmu_cpumask(path);
+		if (cpus)
+			return cpus;
+	}
+
+	return NULL;
+}
+
+static bool pmu_is_uncore(const char *name)
+{
+	char path[PATH_MAX];
+	struct cpu_map *cpus;
+	const char *sysfs = sysfs__mountpoint();
+
+	snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
+	cpus = __pmu_cpumask(path);
+	cpu_map__put(cpus);
+
+	return !!cpus;
+}
+
+/*
+ *  PMU CORE devices have different name other than cpu in sysfs on some
+ *  platforms.
+ *  Looking for possible sysfs files to identify the arm core device.
+ */
+static int is_arm_pmu_core(const char *name)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return 0;
+
+	/* Look for cpu sysfs (specific to arm) */
+	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
+				sysfs, name);
+	if (stat(path, &st) == 0)
+		return 1;
+
+	/* Look for cpu sysfs (specific to s390) */
+	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s",
+		  sysfs, name);
+	if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	return NULL;
+}
+
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+	regex_t re;
+	regmatch_t pmatch[1];
+	int match;
+
+	if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+		/* Warn unable to generate match particular string. */
+		pr_info("Invalid regular expression %s\n", mapcpuid);
+		return 1;
+	}
+
+	match = !regexec(&re, cpuid, 1, pmatch, 0);
+	regfree(&re);
+	if (match) {
+		size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+		/* Verify the entire string matched. */
+		if (match_len == strlen(cpuid))
+			return 0;
+	}
+	return 1;
+}
+
+static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
+{
+	char *cpuid;
+	static bool printed;
+
+	cpuid = getenv("PERF_CPUID");
+	if (cpuid)
+		cpuid = strdup(cpuid);
+	if (!cpuid)
+		cpuid = get_cpuid_str(pmu);
+	if (!cpuid)
+		return NULL;
+
+	if (!printed) {
+		pr_debug("Using CPUID %s\n", cpuid);
+		printed = true;
+	}
+	return cpuid;
+}
+
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
+{
+	struct pmu_events_map *map;
+	char *cpuid = perf_pmu__getcpuid(pmu);
+	int i;
+
+	/* on some platforms which uses cpus map, cpuid can be NULL for
+	 * PMUs other than CORE PMUs.
+	 */
+	if (!cpuid)
+		return NULL;
+
+	i = 0;
+	for (;;) {
+		map = &pmu_events_map[i++];
+		if (!map->table) {
+			map = NULL;
+			break;
+		}
+
+		if (!strcmp_cpuid_str(map->cpuid, cpuid))
+			break;
+	}
+	free(cpuid);
+	return map;
+}
+
+/*
+ * From the pmu_events_map, find the table of PMU events that corresponds
+ * to the current running CPU. Then, add all PMU events from that table
+ * as aliases.
+ */
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+	int i;
+	struct pmu_events_map *map;
+	struct pmu_event *pe;
+	const char *name = pmu->name;
+	const char *pname;
+
+	map = perf_pmu__find_map(pmu);
+	if (!map)
+		return;
+
+	/*
+	 * Found a matching PMU events table. Create aliases
+	 */
+	i = 0;
+	while (1) {
+
+		pe = &map->table[i++];
+		if (!pe->name) {
+			if (pe->metric_group || pe->metric_name)
+				continue;
+			break;
+		}
+
+		if (!is_arm_pmu_core(name)) {
+			pname = pe->pmu ? pe->pmu : "cpu";
+			if (strncmp(pname, name, strlen(pname)))
+				continue;
+		}
+
+		/* need type casts to override 'const' */
+		__perf_pmu__new_alias(head, NULL, (char *)pe->name,
+				(char *)pe->desc, (char *)pe->event,
+				(char *)pe->long_desc, (char *)pe->topic,
+				(char *)pe->unit, (char *)pe->perpkg,
+				(char *)pe->metric_expr,
+				(char *)pe->metric_name);
+	}
+}
+
+struct perf_event_attr * __weak
+perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+	return NULL;
+}
+
+static struct perf_pmu *pmu_lookup(const char *name)
+{
+	struct perf_pmu *pmu;
+	LIST_HEAD(format);
+	LIST_HEAD(aliases);
+	__u32 type;
+
+	/*
+	 * The pmu data we store & need consists of the pmu
+	 * type value and format definitions. Load both right
+	 * now.
+	 */
+	if (pmu_format(name, &format))
+		return NULL;
+
+	/*
+	 * Check the type first to avoid unnecessary work.
+	 */
+	if (pmu_type(name, &type))
+		return NULL;
+
+	if (pmu_aliases(name, &aliases))
+		return NULL;
+
+	pmu = zalloc(sizeof(*pmu));
+	if (!pmu)
+		return NULL;
+
+	pmu->cpus = pmu_cpumask(name);
+	pmu->name = strdup(name);
+	pmu->type = type;
+	pmu->is_uncore = pmu_is_uncore(name);
+	pmu_add_cpu_aliases(&aliases, pmu);
+
+	INIT_LIST_HEAD(&pmu->format);
+	INIT_LIST_HEAD(&pmu->aliases);
+	list_splice(&format, &pmu->format);
+	list_splice(&aliases, &pmu->aliases);
+	list_add_tail(&pmu->list, &pmus);
+
+	pmu->default_config = perf_pmu__get_default_config(pmu);
+
+	return pmu;
+}
+
+static struct perf_pmu *pmu_find(const char *name)
+{
+	struct perf_pmu *pmu;
+
+	list_for_each_entry(pmu, &pmus, list)
+		if (!strcmp(pmu->name, name))
+			return pmu;
+
+	return NULL;
+}
+
+struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
+{
+	/*
+	 * pmu iterator: If pmu is NULL, we start at the begin,
+	 * otherwise return the next pmu. Returns NULL on end.
+	 */
+	if (!pmu) {
+		pmu_read_sysfs();
+		pmu = list_prepare_entry(pmu, &pmus, list);
+	}
+	list_for_each_entry_continue(pmu, &pmus, list)
+		return pmu;
+	return NULL;
+}
+
+struct perf_pmu *perf_pmu__find(const char *name)
+{
+	struct perf_pmu *pmu;
+
+	/*
+	 * Once PMU is loaded it stays in the list,
+	 * so we keep us from multiple reading/parsing
+	 * the pmu format definitions.
+	 */
+	pmu = pmu_find(name);
+	if (pmu)
+		return pmu;
+
+	return pmu_lookup(name);
+}
+
+static struct perf_pmu_format *
+pmu_find_format(struct list_head *formats, const char *name)
+{
+	struct perf_pmu_format *format;
+
+	list_for_each_entry(format, formats, list)
+		if (!strcmp(format->name, name))
+			return format;
+
+	return NULL;
+}
+
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+{
+	struct perf_pmu_format *format = pmu_find_format(formats, name);
+	__u64 bits = 0;
+	int fbit;
+
+	if (!format)
+		return 0;
+
+	for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+		bits |= 1ULL << fbit;
+
+	return bits;
+}
+
+/*
+ * Sets value based on the format definition (format parameter)
+ * and unformated value (value parameter).
+ */
+static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
+			     bool zero)
+{
+	unsigned long fbit, vbit;
+
+	for (fbit = 0, vbit = 0; fbit < PERF_PMU_FORMAT_BITS; fbit++) {
+
+		if (!test_bit(fbit, format))
+			continue;
+
+		if (value & (1llu << vbit++))
+			*v |= (1llu << fbit);
+		else if (zero)
+			*v &= ~(1llu << fbit);
+	}
+}
+
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+	__u64 w = 0;
+	int fbit;
+
+	for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
+		w |= (1ULL << fbit);
+
+	return w;
+}
+
+/*
+ * Term is a string term, and might be a param-term. Try to look up it's value
+ * in the remaining terms.
+ * - We have a term like "base-or-format-term=param-term",
+ * - We need to find the value supplied for "param-term" (with param-term named
+ *   in a config string) later on in the term list.
+ */
+static int pmu_resolve_param_term(struct parse_events_term *term,
+				  struct list_head *head_terms,
+				  __u64 *value)
+{
+	struct parse_events_term *t;
+
+	list_for_each_entry(t, head_terms, list) {
+		if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+			if (!strcmp(t->config, term->config)) {
+				t->used = true;
+				*value = t->val.num;
+				return 0;
+			}
+		}
+	}
+
+	if (verbose > 0)
+		printf("Required parameter '%s' not specified\n", term->config);
+
+	return -1;
+}
+
+static char *pmu_formats_string(struct list_head *formats)
+{
+	struct perf_pmu_format *format;
+	char *str = NULL;
+	struct strbuf buf = STRBUF_INIT;
+	unsigned i = 0;
+
+	if (!formats)
+		return NULL;
+
+	/* sysfs exported terms */
+	list_for_each_entry(format, formats, list)
+		if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0)
+			goto error;
+
+	str = strbuf_detach(&buf, NULL);
+error:
+	strbuf_release(&buf);
+
+	return str;
+}
+
+/*
+ * Setup one of config[12] attr members based on the
+ * user input data - term parameter.
+ */
+static int pmu_config_term(struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct parse_events_term *term,
+			   struct list_head *head_terms,
+			   bool zero, struct parse_events_error *err)
+{
+	struct perf_pmu_format *format;
+	__u64 *vp;
+	__u64 val, max_val;
+
+	/*
+	 * If this is a parameter we've already used for parameterized-eval,
+	 * skip it in normal eval.
+	 */
+	if (term->used)
+		return 0;
+
+	/*
+	 * Hardcoded terms should be already in, so nothing
+	 * to be done for them.
+	 */
+	if (parse_events__is_hardcoded_term(term))
+		return 0;
+
+	format = pmu_find_format(formats, term->config);
+	if (!format) {
+		if (verbose > 0)
+			printf("Invalid event/parameter '%s'\n", term->config);
+		if (err) {
+			char *pmu_term = pmu_formats_string(formats);
+
+			err->idx  = term->err_term;
+			err->str  = strdup("unknown term");
+			err->help = parse_events_formats_error_string(pmu_term);
+			free(pmu_term);
+		}
+		return -EINVAL;
+	}
+
+	switch (format->value) {
+	case PERF_PMU_FORMAT_VALUE_CONFIG:
+		vp = &attr->config;
+		break;
+	case PERF_PMU_FORMAT_VALUE_CONFIG1:
+		vp = &attr->config1;
+		break;
+	case PERF_PMU_FORMAT_VALUE_CONFIG2:
+		vp = &attr->config2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Either directly use a numeric term, or try to translate string terms
+	 * using event parameters.
+	 */
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->no_value &&
+		    bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+			if (err) {
+				err->idx = term->err_val;
+				err->str = strdup("no value assigned for term");
+			}
+			return -EINVAL;
+		}
+
+		val = term->val.num;
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+		if (strcmp(term->val.str, "?")) {
+			if (verbose > 0) {
+				pr_info("Invalid sysfs entry %s=%s\n",
+						term->config, term->val.str);
+			}
+			if (err) {
+				err->idx = term->err_val;
+				err->str = strdup("expected numeric value");
+			}
+			return -EINVAL;
+		}
+
+		if (pmu_resolve_param_term(term, head_terms, &val))
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	max_val = pmu_format_max_value(format->bits);
+	if (val > max_val) {
+		if (err) {
+			err->idx = term->err_val;
+			if (asprintf(&err->str,
+				     "value too big for format, maximum is %llu",
+				     (unsigned long long)max_val) < 0)
+				err->str = strdup("value too big for format");
+			return -EINVAL;
+		}
+		/*
+		 * Assume we don't care if !err, in which case the value will be
+		 * silently truncated.
+		 */
+	}
+
+	pmu_format_value(format->bits, val, vp, zero);
+	return 0;
+}
+
+int perf_pmu__config_terms(struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct list_head *head_terms,
+			   bool zero, struct parse_events_error *err)
+{
+	struct parse_events_term *term;
+
+	list_for_each_entry(term, head_terms, list) {
+		if (pmu_config_term(formats, attr, term, head_terms,
+				    zero, err))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Configures event's 'attr' parameter based on the:
+ * 1) users input - specified in terms parameter
+ * 2) pmu format definitions - specified by pmu parameter
+ */
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+		     struct list_head *head_terms,
+		     struct parse_events_error *err)
+{
+	bool zero = !!pmu->default_config;
+
+	attr->type = pmu->type;
+	return perf_pmu__config_terms(&pmu->format, attr, head_terms,
+				      zero, err);
+}
+
+static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
+					     struct parse_events_term *term)
+{
+	struct perf_pmu_alias *alias;
+	char *name;
+
+	if (parse_events__is_hardcoded_term(term))
+		return NULL;
+
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->val.num != 1)
+			return NULL;
+		if (pmu_find_format(&pmu->format, term->config))
+			return NULL;
+		name = term->config;
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+		if (strcasecmp(term->config, "event"))
+			return NULL;
+		name = term->val.str;
+	} else {
+		return NULL;
+	}
+
+	list_for_each_entry(alias, &pmu->aliases, list) {
+		if (!strcasecmp(alias->name, name))
+			return alias;
+	}
+	return NULL;
+}
+
+
+static int check_info_data(struct perf_pmu_alias *alias,
+			   struct perf_pmu_info *info)
+{
+	/*
+	 * Only one term in event definition can
+	 * define unit, scale and snapshot, fail
+	 * if there's more than one.
+	 */
+	if ((info->unit && alias->unit[0]) ||
+	    (info->scale && alias->scale) ||
+	    (info->snapshot && alias->snapshot))
+		return -EINVAL;
+
+	if (alias->unit[0])
+		info->unit = alias->unit;
+
+	if (alias->scale)
+		info->scale = alias->scale;
+
+	if (alias->snapshot)
+		info->snapshot = alias->snapshot;
+
+	return 0;
+}
+
+/*
+ * Find alias in the terms list and replace it with the terms
+ * defined for the alias
+ */
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+			  struct perf_pmu_info *info)
+{
+	struct parse_events_term *term, *h;
+	struct perf_pmu_alias *alias;
+	int ret;
+
+	info->per_pkg = false;
+
+	/*
+	 * Mark unit and scale as not set
+	 * (different from default values, see below)
+	 */
+	info->unit     = NULL;
+	info->scale    = 0.0;
+	info->snapshot = false;
+	info->metric_expr = NULL;
+	info->metric_name = NULL;
+
+	list_for_each_entry_safe(term, h, head_terms, list) {
+		alias = pmu_find_alias(pmu, term);
+		if (!alias)
+			continue;
+		ret = pmu_alias_terms(alias, &term->list);
+		if (ret)
+			return ret;
+
+		ret = check_info_data(alias, info);
+		if (ret)
+			return ret;
+
+		if (alias->per_pkg)
+			info->per_pkg = true;
+		info->metric_expr = alias->metric_expr;
+		info->metric_name = alias->metric_name;
+
+		list_del(&term->list);
+		free(term);
+	}
+
+	/*
+	 * if no unit or scale foundin aliases, then
+	 * set defaults as for evsel
+	 * unit cannot left to NULL
+	 */
+	if (info->unit == NULL)
+		info->unit   = "";
+
+	if (info->scale == 0.0)
+		info->scale  = 1.0;
+
+	return 0;
+}
+
+int perf_pmu__new_format(struct list_head *list, char *name,
+			 int config, unsigned long *bits)
+{
+	struct perf_pmu_format *format;
+
+	format = zalloc(sizeof(*format));
+	if (!format)
+		return -ENOMEM;
+
+	format->name = strdup(name);
+	format->value = config;
+	memcpy(format->bits, bits, sizeof(format->bits));
+
+	list_add_tail(&format->list, list);
+	return 0;
+}
+
+void perf_pmu__set_format(unsigned long *bits, long from, long to)
+{
+	long b;
+
+	if (!to)
+		to = from;
+
+	memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
+	for (b = from; b <= to; b++)
+		set_bit(b, bits);
+}
+
+static int sub_non_neg(int a, int b)
+{
+	if (b > a)
+		return 0;
+	return a - b;
+}
+
+static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
+			  struct perf_pmu_alias *alias)
+{
+	struct parse_events_term *term;
+	int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
+
+	list_for_each_entry(term, &alias->terms, list) {
+		if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+			used += snprintf(buf + used, sub_non_neg(len, used),
+					",%s=%s", term->config,
+					term->val.str);
+	}
+
+	if (sub_non_neg(len, used) > 0) {
+		buf[used] = '/';
+		used++;
+	}
+	if (sub_non_neg(len, used) > 0) {
+		buf[used] = '\0';
+		used++;
+	} else
+		buf[len - 1] = '\0';
+
+	return buf;
+}
+
+static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
+			     struct perf_pmu_alias *alias)
+{
+	snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
+	return buf;
+}
+
+struct sevent {
+	char *name;
+	char *desc;
+	char *topic;
+	char *str;
+	char *pmu;
+	char *metric_expr;
+	char *metric_name;
+};
+
+static int cmp_sevent(const void *a, const void *b)
+{
+	const struct sevent *as = a;
+	const struct sevent *bs = b;
+
+	/* Put extra events last */
+	if (!!as->desc != !!bs->desc)
+		return !!as->desc - !!bs->desc;
+	if (as->topic && bs->topic) {
+		int n = strcmp(as->topic, bs->topic);
+
+		if (n)
+			return n;
+	}
+	return strcmp(as->name, bs->name);
+}
+
+static void wordwrap(char *s, int start, int max, int corr)
+{
+	int column = start;
+	int n;
+
+	while (*s) {
+		int wlen = strcspn(s, " \t");
+
+		if (column + wlen >= max && column > start) {
+			printf("\n%*s", start, "");
+			column = start + corr;
+		}
+		n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+		if (n <= 0)
+			break;
+		s += wlen;
+		column += n;
+		s = ltrim(s);
+	}
+}
+
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
+			bool long_desc, bool details_flag)
+{
+	struct perf_pmu *pmu;
+	struct perf_pmu_alias *alias;
+	char buf[1024];
+	int printed = 0;
+	int len, j;
+	struct sevent *aliases;
+	int numdesc = 0;
+	int columns = pager_get_columns();
+	char *topic = NULL;
+
+	pmu = NULL;
+	len = 0;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list)
+			len++;
+		if (pmu->selectable)
+			len++;
+	}
+	aliases = zalloc(sizeof(struct sevent) * len);
+	if (!aliases)
+		goto out_enomem;
+	pmu = NULL;
+	j = 0;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			char *name = alias->desc ? alias->name :
+				format_alias(buf, sizeof(buf), pmu, alias);
+			bool is_cpu = !strcmp(pmu->name, "cpu");
+
+			if (event_glob != NULL &&
+			    !(strglobmatch_nocase(name, event_glob) ||
+			      (!is_cpu && strglobmatch_nocase(alias->name,
+						       event_glob)) ||
+			      (alias->topic &&
+			       strglobmatch_nocase(alias->topic, event_glob))))
+				continue;
+
+			if (is_cpu && !name_only && !alias->desc)
+				name = format_alias_or(buf, sizeof(buf), pmu, alias);
+
+			aliases[j].name = name;
+			if (is_cpu && !name_only && !alias->desc)
+				aliases[j].name = format_alias_or(buf,
+								  sizeof(buf),
+								  pmu, alias);
+			aliases[j].name = strdup(aliases[j].name);
+			if (!aliases[j].name)
+				goto out_enomem;
+
+			aliases[j].desc = long_desc ? alias->long_desc :
+						alias->desc;
+			aliases[j].topic = alias->topic;
+			aliases[j].str = alias->str;
+			aliases[j].pmu = pmu->name;
+			aliases[j].metric_expr = alias->metric_expr;
+			aliases[j].metric_name = alias->metric_name;
+			j++;
+		}
+		if (pmu->selectable &&
+		    (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
+			char *s;
+			if (asprintf(&s, "%s//", pmu->name) < 0)
+				goto out_enomem;
+			aliases[j].name = s;
+			j++;
+		}
+	}
+	len = j;
+	qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
+	for (j = 0; j < len; j++) {
+		/* Skip duplicates */
+		if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name))
+			continue;
+		if (name_only) {
+			printf("%s ", aliases[j].name);
+			continue;
+		}
+		if (aliases[j].desc && !quiet_flag) {
+			if (numdesc++ == 0)
+				printf("\n");
+			if (aliases[j].topic && (!topic ||
+					strcmp(topic, aliases[j].topic))) {
+				printf("%s%s:\n", topic ? "\n" : "",
+						aliases[j].topic);
+				topic = aliases[j].topic;
+			}
+			printf("  %-50s\n", aliases[j].name);
+			printf("%*s", 8, "[");
+			wordwrap(aliases[j].desc, 8, columns, 0);
+			printf("]\n");
+			if (details_flag) {
+				printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str);
+				if (aliases[j].metric_name)
+					printf(" MetricName: %s", aliases[j].metric_name);
+				if (aliases[j].metric_expr)
+					printf(" MetricExpr: %s", aliases[j].metric_expr);
+				putchar('\n');
+			}
+		} else
+			printf("  %-50s [Kernel PMU event]\n", aliases[j].name);
+		printed++;
+	}
+	if (printed && pager_in_use())
+		printf("\n");
+out_free:
+	for (j = 0; j < len; j++)
+		zfree(&aliases[j].name);
+	zfree(&aliases);
+	return;
+
+out_enomem:
+	printf("FATAL: not enough memory to print PMU events\n");
+	if (aliases)
+		goto out_free;
+}
+
+bool pmu_have_event(const char *pname, const char *name)
+{
+	struct perf_pmu *pmu;
+	struct perf_pmu_alias *alias;
+
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (strcmp(pname, pmu->name))
+			continue;
+		list_for_each_entry(alias, &pmu->aliases, list)
+			if (!strcmp(alias->name, name))
+				return true;
+	}
+	return false;
+}
+
+static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		return NULL;
+
+	snprintf(path, PATH_MAX,
+		 "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
+
+	if (stat(path, &st) < 0)
+		return NULL;
+
+	return fopen(path, "r");
+}
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
+			...)
+{
+	va_list args;
+	FILE *file;
+	int ret = EOF;
+
+	va_start(args, fmt);
+	file = perf_pmu__open_file(pmu, name);
+	if (file) {
+		ret = vfscanf(file, fmt, args);
+		fclose(file);
+	}
+	va_end(args);
+	return ret;
+}
diff --git a/util/pmu.h b/util/pmu.h
new file mode 100644
index 0000000..76fecec
--- /dev/null
+++ b/util/pmu.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMU_H
+#define __PMU_H
+
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <stdbool.h>
+#include "evsel.h"
+#include "parse-events.h"
+
+enum {
+	PERF_PMU_FORMAT_VALUE_CONFIG,
+	PERF_PMU_FORMAT_VALUE_CONFIG1,
+	PERF_PMU_FORMAT_VALUE_CONFIG2,
+};
+
+#define PERF_PMU_FORMAT_BITS 64
+
+struct perf_event_attr;
+
+struct perf_pmu {
+	char *name;
+	__u32 type;
+	bool selectable;
+	bool is_uncore;
+	struct perf_event_attr *default_config;
+	struct cpu_map *cpus;
+	struct list_head format;  /* HEAD struct perf_pmu_format -> list */
+	struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+	struct list_head list;    /* ELEM */
+	int (*set_drv_config)	(struct perf_evsel_config_term *term);
+};
+
+struct perf_pmu_info {
+	const char *unit;
+	const char *metric_expr;
+	const char *metric_name;
+	double scale;
+	bool per_pkg;
+	bool snapshot;
+};
+
+#define UNIT_MAX_LEN	31 /* max length for event unit name */
+
+struct perf_pmu_alias {
+	char *name;
+	char *desc;
+	char *long_desc;
+	char *topic;
+	char *str;
+	struct list_head terms; /* HEAD struct parse_events_term -> list */
+	struct list_head list;  /* ELEM */
+	char unit[UNIT_MAX_LEN+1];
+	double scale;
+	bool per_pkg;
+	bool snapshot;
+	char *metric_expr;
+	char *metric_name;
+};
+
+struct perf_pmu *perf_pmu__find(const char *name);
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+		     struct list_head *head_terms,
+		     struct parse_events_error *error);
+int perf_pmu__config_terms(struct list_head *formats,
+			   struct perf_event_attr *attr,
+			   struct list_head *head_terms,
+			   bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+			  struct perf_pmu_info *info);
+struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
+				  struct list_head *head_terms);
+int perf_pmu_wrap(void);
+void perf_pmu_error(struct list_head *list, char *name, char const *msg);
+
+int perf_pmu__new_format(struct list_head *list, char *name,
+			 int config, unsigned long *bits);
+void perf_pmu__set_format(unsigned long *bits, long from, long to);
+int perf_pmu__format_parse(char *dir, struct list_head *head);
+
+struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
+
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
+		      bool long_desc, bool details_flag);
+bool pmu_have_event(const char *pname, const char *name);
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
+
+int perf_pmu__test(void);
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
+
+#endif /* __PMU_H */
diff --git a/util/pmu.l b/util/pmu.l
new file mode 100644
index 0000000..a15d9fb
--- /dev/null
+++ b/util/pmu.l
@@ -0,0 +1,43 @@
+%option prefix="perf_pmu_"
+
+%{
+#include <stdlib.h>
+#include <linux/bitops.h>
+#include "pmu.h"
+#include "pmu-bison.h"
+
+static int value(int base)
+{
+	long num;
+
+	errno = 0;
+	num = strtoul(perf_pmu_text, NULL, base);
+	if (errno)
+		return PP_ERROR;
+
+	perf_pmu_lval.num = num;
+	return PP_VALUE;
+}
+
+%}
+
+num_dec         [0-9]+
+
+%%
+
+{num_dec}	{ return value(10); }
+config		{ return PP_CONFIG; }
+config1		{ return PP_CONFIG1; }
+config2		{ return PP_CONFIG2; }
+-		{ return '-'; }
+:		{ return ':'; }
+,		{ return ','; }
+.		{ ; }
+\n		{ ; }
+
+%%
+
+int perf_pmu_wrap(void)
+{
+	return 1;
+}
diff --git a/util/pmu.y b/util/pmu.y
new file mode 100644
index 0000000..bfd7e85
--- /dev/null
+++ b/util/pmu.y
@@ -0,0 +1,92 @@
+
+%parse-param {struct list_head *format}
+%parse-param {char *name}
+
+%{
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <string.h>
+#include "pmu.h"
+
+extern int perf_pmu_lex (void);
+
+#define ABORT_ON(val) \
+do { \
+        if (val) \
+                YYABORT; \
+} while (0)
+
+%}
+
+%token PP_CONFIG PP_CONFIG1 PP_CONFIG2
+%token PP_VALUE PP_ERROR
+%type <num> PP_VALUE
+%type <bits> bit_term
+%type <bits> bits
+
+%union
+{
+	unsigned long num;
+	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+}
+
+%%
+
+format:
+format format_term
+|
+format_term
+
+format_term:
+PP_CONFIG ':' bits
+{
+	ABORT_ON(perf_pmu__new_format(format, name,
+				      PERF_PMU_FORMAT_VALUE_CONFIG,
+				      $3));
+}
+|
+PP_CONFIG1 ':' bits
+{
+	ABORT_ON(perf_pmu__new_format(format, name,
+				      PERF_PMU_FORMAT_VALUE_CONFIG1,
+				      $3));
+}
+|
+PP_CONFIG2 ':' bits
+{
+	ABORT_ON(perf_pmu__new_format(format, name,
+				      PERF_PMU_FORMAT_VALUE_CONFIG2,
+				      $3));
+}
+
+bits:
+bits ',' bit_term
+{
+	bitmap_or($$, $1, $3, 64);
+}
+|
+bit_term
+{
+	memcpy($$, $1, sizeof($1));
+}
+
+bit_term:
+PP_VALUE '-' PP_VALUE
+{
+	perf_pmu__set_format($$, $1, $3);
+}
+|
+PP_VALUE
+{
+	perf_pmu__set_format($$, $1, 0);
+}
+
+%%
+
+void perf_pmu_error(struct list_head *list __maybe_unused,
+		    char *name __maybe_unused,
+		    char const *msg __maybe_unused)
+{
+}
diff --git a/util/print_binary.c b/util/print_binary.c
new file mode 100644
index 0000000..23e3670
--- /dev/null
+++ b/util/print_binary.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "print_binary.h"
+#include <linux/log2.h>
+#include "sane_ctype.h"
+
+int binary__fprintf(unsigned char *data, size_t len,
+		    size_t bytes_per_line, binary__fprintf_t printer,
+		    void *extra, FILE *fp)
+{
+	size_t i, j, mask;
+	int printed = 0;
+
+	if (!printer)
+		return 0;
+
+	bytes_per_line = roundup_pow_of_two(bytes_per_line);
+	mask = bytes_per_line - 1;
+
+	printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp);
+	for (i = 0; i < len; i++) {
+		if ((i & mask) == 0) {
+			printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp);
+			printed += printer(BINARY_PRINT_ADDR, i, extra, fp);
+		}
+
+		printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp);
+
+		if (((i & mask) == mask) || i == len - 1) {
+			for (j = 0; j < mask-(i & mask); j++)
+				printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp);
+
+			printer(BINARY_PRINT_SEP, i, extra, fp);
+			for (j = i & ~mask; j <= i; j++)
+				printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp);
+			for (j = 0; j < mask-(i & mask); j++)
+				printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp);
+			printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp);
+		}
+	}
+	printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp);
+	return printed;
+}
+
+int is_printable_array(char *p, unsigned int len)
+{
+	unsigned int i;
+
+	if (!p || !len || p[len - 1] != 0)
+		return 0;
+
+	len--;
+
+	for (i = 0; i < len; i++) {
+		if (!isprint(p[i]) && !isspace(p[i]))
+			return 0;
+	}
+	return 1;
+}
diff --git a/util/print_binary.h b/util/print_binary.h
new file mode 100644
index 0000000..2a1554a
--- /dev/null
+++ b/util/print_binary.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_PRINT_BINARY_H
+#define PERF_PRINT_BINARY_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+enum binary_printer_ops {
+	BINARY_PRINT_DATA_BEGIN,
+	BINARY_PRINT_LINE_BEGIN,
+	BINARY_PRINT_ADDR,
+	BINARY_PRINT_NUM_DATA,
+	BINARY_PRINT_NUM_PAD,
+	BINARY_PRINT_SEP,
+	BINARY_PRINT_CHAR_DATA,
+	BINARY_PRINT_CHAR_PAD,
+	BINARY_PRINT_LINE_END,
+	BINARY_PRINT_DATA_END,
+};
+
+typedef int (*binary__fprintf_t)(enum binary_printer_ops op,
+				 unsigned int val, void *extra, FILE *fp);
+
+int binary__fprintf(unsigned char *data, size_t len,
+		    size_t bytes_per_line, binary__fprintf_t printer,
+		    void *extra, FILE *fp);
+
+static inline void print_binary(unsigned char *data, size_t len,
+				size_t bytes_per_line, binary__fprintf_t printer,
+				void *extra)
+{
+	binary__fprintf(data, len, bytes_per_line, printer, extra, stdout);
+}
+
+int is_printable_array(char *p, unsigned int len);
+
+#endif /* PERF_PRINT_BINARY_H */
diff --git a/util/probe-event.c b/util/probe-event.c
new file mode 100644
index 0000000..e1dbc98
--- /dev/null
+++ b/util/probe-event.c
@@ -0,0 +1,3545 @@
+/*
+ * probe-event.c : perf-probe definition to probe_events format converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <elf.h>
+
+#include "util.h"
+#include "event.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "cache.h"
+#include "color.h"
+#include "symbol.h"
+#include "thread.h"
+#include <api/fs/fs.h>
+#include "trace-event.h"	/* For __maybe_unused */
+#include "probe-event.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "session.h"
+#include "string2.h"
+
+#include "sane_ctype.h"
+
+#define PERFPROBE_GROUP "probe"
+
+bool probe_event_dry_run;	/* Dry run flag */
+struct probe_conf probe_conf;
+
+#define semantic_error(msg ...) pr_err("Semantic error :" msg)
+
+int e_snprintf(char *str, size_t size, const char *format, ...)
+{
+	int ret;
+	va_list ap;
+	va_start(ap, format);
+	ret = vsnprintf(str, size, format, ap);
+	va_end(ap);
+	if (ret >= (int)size)
+		ret = -E2BIG;
+	return ret;
+}
+
+static struct machine *host_machine;
+
+/* Initialize symbol maps and path of vmlinux/modules */
+int init_probe_symbol_maps(bool user_only)
+{
+	int ret;
+
+	symbol_conf.sort_by_name = true;
+	symbol_conf.allow_aliases = true;
+	ret = symbol__init(NULL);
+	if (ret < 0) {
+		pr_debug("Failed to init symbol map.\n");
+		goto out;
+	}
+
+	if (host_machine || user_only)	/* already initialized */
+		return 0;
+
+	if (symbol_conf.vmlinux_name)
+		pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+
+	host_machine = machine__new_host();
+	if (!host_machine) {
+		pr_debug("machine__new_host() failed.\n");
+		symbol__exit();
+		ret = -1;
+	}
+out:
+	if (ret < 0)
+		pr_warning("Failed to init vmlinux path.\n");
+	return ret;
+}
+
+void exit_probe_symbol_maps(void)
+{
+	machine__delete(host_machine);
+	host_machine = NULL;
+	symbol__exit();
+}
+
+static struct symbol *__find_kernel_function_by_name(const char *name,
+						     struct map **mapp)
+{
+	return machine__find_kernel_function_by_name(host_machine, name, mapp);
+}
+
+static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
+{
+	return machine__find_kernel_function(host_machine, addr, mapp);
+}
+
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
+{
+	/* kmap->ref_reloc_sym should be set if host_machine is initialized */
+	struct kmap *kmap;
+	struct map *map = machine__kernel_map(host_machine);
+
+	if (map__load(map) < 0)
+		return NULL;
+
+	kmap = map__kmap(map);
+	if (!kmap)
+		return NULL;
+	return kmap->ref_reloc_sym;
+}
+
+static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
+					     bool reloc, bool reladdr)
+{
+	struct ref_reloc_sym *reloc_sym;
+	struct symbol *sym;
+	struct map *map;
+
+	/* ref_reloc_sym is just a label. Need a special fix*/
+	reloc_sym = kernel_get_ref_reloc_sym();
+	if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
+		*addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
+	else {
+		sym = __find_kernel_function_by_name(name, &map);
+		if (!sym)
+			return -ENOENT;
+		*addr = map->unmap_ip(map, sym->start) -
+			((reloc) ? 0 : map->reloc) -
+			((reladdr) ? map->start : 0);
+	}
+	return 0;
+}
+
+static struct map *kernel_get_module_map(const char *module)
+{
+	struct map_groups *grp = &host_machine->kmaps;
+	struct maps *maps = &grp->maps[MAP__FUNCTION];
+	struct map *pos;
+
+	/* A file path -- this is an offline module */
+	if (module && strchr(module, '/'))
+		return dso__new_map(module);
+
+	if (!module)
+		module = "kernel";
+
+	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		/* short_name is "[module]" */
+		if (strncmp(pos->dso->short_name + 1, module,
+			    pos->dso->short_name_len - 2) == 0 &&
+		    module[pos->dso->short_name_len - 2] == '\0') {
+			map__get(pos);
+			return pos;
+		}
+	}
+	return NULL;
+}
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
+{
+	/* Init maps of given executable or kernel */
+	if (user) {
+		struct map *map;
+
+		map = dso__new_map(target);
+		if (map && map->dso)
+			map->dso->nsinfo = nsinfo__get(nsi);
+		return map;
+	} else {
+		return kernel_get_module_map(target);
+	}
+}
+
+static int convert_exec_to_group(const char *exec, char **result)
+{
+	char *ptr1, *ptr2, *exec_copy;
+	char buf[64];
+	int ret;
+
+	exec_copy = strdup(exec);
+	if (!exec_copy)
+		return -ENOMEM;
+
+	ptr1 = basename(exec_copy);
+	if (!ptr1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) {
+		if (!isalnum(*ptr2) && *ptr2 != '_') {
+			*ptr2 = '\0';
+			break;
+		}
+	}
+
+	ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
+	if (ret < 0)
+		goto out;
+
+	*result = strdup(buf);
+	ret = *result ? 0 : -ENOMEM;
+
+out:
+	free(exec_copy);
+	return ret;
+}
+
+static void clear_perf_probe_point(struct perf_probe_point *pp)
+{
+	free(pp->file);
+	free(pp->function);
+	free(pp->lazy_line);
+}
+
+static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
+{
+	int i;
+
+	for (i = 0; i < ntevs; i++)
+		clear_probe_trace_event(tevs + i);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address);
+static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
+{
+	u64 etext_addr = 0;
+	int ret;
+
+	/* Get the address of _etext for checking non-probable text symbol */
+	ret = kernel_get_symbol_address_by_name("_etext", &etext_addr,
+						false, false);
+
+	if (ret == 0 && etext_addr < address)
+		pr_warning("%s is out of .text, skip it.\n", symbol);
+	else if (kprobe_blacklist__listed(address))
+		pr_warning("%s is blacklisted function, skip it.\n", symbol);
+	else
+		return false;
+
+	return true;
+}
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+	int fd;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	char *mod_name = NULL;
+	int name_offset;
+
+	fd = open(module, O_RDONLY);
+	if (fd < 0)
+		return NULL;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		goto elf_err;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto ret_err;
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+			".gnu.linkonce.this_module", NULL);
+	if (!sec)
+		goto ret_err;
+
+	data = elf_getdata(sec, NULL);
+	if (!data || !data->d_buf)
+		goto ret_err;
+
+	/*
+	 * NOTE:
+	 * '.gnu.linkonce.this_module' section of kernel module elf directly
+	 * maps to 'struct module' from linux/module.h. This section contains
+	 * actual module name which will be used by kernel after loading it.
+	 * But, we cannot use 'struct module' here since linux/module.h is not
+	 * exposed to user-space. Offset of 'name' has remained same from long
+	 * time, so hardcoding it here.
+	 */
+	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+		name_offset = 12;
+	else	/* expect ELFCLASS64 by default */
+		name_offset = 24;
+
+	mod_name = strdup((char *)data->d_buf + name_offset);
+
+ret_err:
+	elf_end(elf);
+elf_err:
+	close(fd);
+	return mod_name;
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+	struct dso *dso;
+	struct map *map;
+	const char *vmlinux_name;
+	int ret = 0;
+
+	if (module) {
+		char module_name[128];
+
+		snprintf(module_name, sizeof(module_name), "[%s]", module);
+		map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name);
+		if (map) {
+			dso = map->dso;
+			goto found;
+		}
+		pr_debug("Failed to find module %s.\n", module);
+		return -ENOENT;
+	}
+
+	map = machine__kernel_map(host_machine);
+	dso = map->dso;
+
+	vmlinux_name = symbol_conf.vmlinux_name;
+	dso->load_errno = 0;
+	if (vmlinux_name)
+		ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
+	else
+		ret = dso__load_vmlinux_path(dso, map);
+found:
+	*pdso = dso;
+	return ret;
+}
+
+/*
+ * Some binaries like glibc have special symbols which are on the symbol
+ * table, but not in the debuginfo. If we can find the address of the
+ * symbol from map, we can translate the address back to the probe point.
+ */
+static int find_alternative_probe_point(struct debuginfo *dinfo,
+					struct perf_probe_point *pp,
+					struct perf_probe_point *result,
+					const char *target, struct nsinfo *nsi,
+					bool uprobes)
+{
+	struct map *map = NULL;
+	struct symbol *sym;
+	u64 address = 0;
+	int ret = -ENOENT;
+
+	/* This can work only for function-name based one */
+	if (!pp->function || pp->file)
+		return -ENOTSUP;
+
+	map = get_target_map(target, nsi, uprobes);
+	if (!map)
+		return -EINVAL;
+
+	/* Find the address of given function */
+	map__for_each_symbol_by_name(map, pp->function, sym) {
+		if (uprobes)
+			address = sym->start;
+		else
+			address = map->unmap_ip(map, sym->start) - map->reloc;
+		break;
+	}
+	if (!address) {
+		ret = -ENOENT;
+		goto out;
+	}
+	pr_debug("Symbol %s address found : %" PRIx64 "\n",
+			pp->function, address);
+
+	ret = debuginfo__find_probe_point(dinfo, (unsigned long)address,
+					  result);
+	if (ret <= 0)
+		ret = (!ret) ? -ENOENT : ret;
+	else {
+		result->offset += pp->offset;
+		result->line += pp->line;
+		result->retprobe = pp->retprobe;
+		ret = 0;
+	}
+
+out:
+	map__put(map);
+	return ret;
+
+}
+
+static int get_alternative_probe_event(struct debuginfo *dinfo,
+				       struct perf_probe_event *pev,
+				       struct perf_probe_point *tmp)
+{
+	int ret;
+
+	memcpy(tmp, &pev->point, sizeof(*tmp));
+	memset(&pev->point, 0, sizeof(pev->point));
+	ret = find_alternative_probe_point(dinfo, tmp, &pev->point, pev->target,
+					   pev->nsi, pev->uprobes);
+	if (ret < 0)
+		memcpy(&pev->point, tmp, sizeof(*tmp));
+
+	return ret;
+}
+
+static int get_alternative_line_range(struct debuginfo *dinfo,
+				      struct line_range *lr,
+				      const char *target, bool user)
+{
+	struct perf_probe_point pp = { .function = lr->function,
+				       .file = lr->file,
+				       .line = lr->start };
+	struct perf_probe_point result;
+	int ret, len = 0;
+
+	memset(&result, 0, sizeof(result));
+
+	if (lr->end != INT_MAX)
+		len = lr->end - lr->start;
+	ret = find_alternative_probe_point(dinfo, &pp, &result,
+					   target, NULL, user);
+	if (!ret) {
+		lr->function = result.function;
+		lr->file = result.file;
+		lr->start = result.line;
+		if (lr->end != INT_MAX)
+			lr->end = lr->start + len;
+		clear_perf_probe_point(&pp);
+	}
+	return ret;
+}
+
+/* Open new debuginfo of given module */
+static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
+					bool silent)
+{
+	const char *path = module;
+	char reason[STRERR_BUFSIZE];
+	struct debuginfo *ret = NULL;
+	struct dso *dso = NULL;
+	struct nscookie nsc;
+	int err;
+
+	if (!module || !strchr(module, '/')) {
+		err = kernel_get_module_dso(module, &dso);
+		if (err < 0) {
+			if (!dso || dso->load_errno == 0) {
+				if (!str_error_r(-err, reason, STRERR_BUFSIZE))
+					strcpy(reason, "(unknown)");
+			} else
+				dso__strerror_load(dso, reason, STRERR_BUFSIZE);
+			if (!silent)
+				pr_err("Failed to find the path for %s: %s\n",
+					module ?: "kernel", reason);
+			return NULL;
+		}
+		path = dso->long_name;
+	}
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = debuginfo__new(path);
+	if (!ret && !silent) {
+		pr_warning("The %s file has no debug information.\n", path);
+		if (!module || !strtailcmp(path, ".ko"))
+			pr_warning("Rebuild with CONFIG_DEBUG_INFO=y, ");
+		else
+			pr_warning("Rebuild with -g, ");
+		pr_warning("or install an appropriate debuginfo package.\n");
+	}
+	nsinfo__mountns_exit(&nsc);
+	return ret;
+}
+
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+	const char *path = module;
+
+	/* If the module is NULL, it should be the kernel. */
+	if (!module)
+		path = "kernel";
+
+	if (debuginfo_cache_path && !strcmp(debuginfo_cache_path, path))
+		goto out;
+
+	/* Copy module path */
+	free(debuginfo_cache_path);
+	debuginfo_cache_path = strdup(path);
+	if (!debuginfo_cache_path) {
+		debuginfo__delete(debuginfo_cache);
+		debuginfo_cache = NULL;
+		goto out;
+	}
+
+	debuginfo_cache = open_debuginfo(module, NULL, silent);
+	if (!debuginfo_cache)
+		zfree(&debuginfo_cache_path);
+out:
+	return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+	debuginfo__delete(debuginfo_cache);
+	debuginfo_cache = NULL;
+	zfree(&debuginfo_cache_path);
+}
+
+
+static int get_text_start_address(const char *exec, unsigned long *address,
+				  struct nsinfo *nsi)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	int fd, ret = -ENOENT;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	fd = open(exec, O_RDONLY);
+	nsinfo__mountns_exit(&nsc);
+	if (fd < 0)
+		return -errno;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		ret = -EINVAL;
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out;
+
+	if (!elf_section_by_name(elf, &ehdr, &shdr, ".text", NULL))
+		goto out;
+
+	*address = shdr.sh_addr - shdr.sh_offset;
+	ret = 0;
+out:
+	elf_end(elf);
+out_close:
+	close(fd);
+
+	return ret;
+}
+
+/*
+ * Convert trace point to probe point with debuginfo
+ */
+static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
+					    struct perf_probe_point *pp,
+					    bool is_kprobe)
+{
+	struct debuginfo *dinfo = NULL;
+	unsigned long stext = 0;
+	u64 addr = tp->address;
+	int ret = -ENOENT;
+
+	/* convert the address to dwarf address */
+	if (!is_kprobe) {
+		if (!addr) {
+			ret = -EINVAL;
+			goto error;
+		}
+		ret = get_text_start_address(tp->module, &stext, NULL);
+		if (ret < 0)
+			goto error;
+		addr += stext;
+	} else if (tp->symbol) {
+		/* If the module is given, this returns relative address */
+		ret = kernel_get_symbol_address_by_name(tp->symbol, &addr,
+							false, !!tp->module);
+		if (ret != 0)
+			goto error;
+		addr += tp->offset;
+	}
+
+	pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
+		 tp->module ? : "kernel");
+
+	dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
+	if (dinfo)
+		ret = debuginfo__find_probe_point(dinfo,
+						 (unsigned long)addr, pp);
+	else
+		ret = -ENOENT;
+
+	if (ret > 0) {
+		pp->retprobe = tp->retprobe;
+		return 0;
+	}
+error:
+	pr_debug("Failed to find corresponding probes from debuginfo.\n");
+	return ret ? : -ENOENT;
+}
+
+/* Adjust symbol name and address */
+static int post_process_probe_trace_point(struct probe_trace_point *tp,
+					   struct map *map, unsigned long offs)
+{
+	struct symbol *sym;
+	u64 addr = tp->address - offs;
+
+	sym = map__find_symbol(map, addr);
+	if (!sym)
+		return -ENOENT;
+
+	if (strcmp(sym->name, tp->symbol)) {
+		/* If we have no realname, use symbol for it */
+		if (!tp->realname)
+			tp->realname = tp->symbol;
+		else
+			free(tp->symbol);
+		tp->symbol = strdup(sym->name);
+		if (!tp->symbol)
+			return -ENOMEM;
+	}
+	tp->offset = addr - sym->start;
+	tp->address -= offs;
+
+	return 0;
+}
+
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+					int ntevs, const char *pathname)
+{
+	struct map *map;
+	unsigned long stext = 0;
+	int i, ret = 0;
+
+	/* Prepare a map for offline binary */
+	map = dso__new_map(pathname);
+	if (!map || get_text_start_address(pathname, &stext, NULL) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", pathname);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		ret = post_process_probe_trace_point(&tevs[i].point,
+						     map, stext);
+		if (ret < 0)
+			break;
+	}
+	map__put(map);
+
+	return ret;
+}
+
+static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
+					  int ntevs, const char *exec,
+					  struct nsinfo *nsi)
+{
+	int i, ret = 0;
+	unsigned long stext = 0;
+
+	if (!exec)
+		return 0;
+
+	ret = get_text_start_address(exec, &stext, nsi);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < ntevs && ret >= 0; i++) {
+		/* point.address is the addres of point.symbol + point.offset */
+		tevs[i].point.address -= stext;
+		tevs[i].point.module = strdup(exec);
+		if (!tevs[i].point.module) {
+			ret = -ENOMEM;
+			break;
+		}
+		tevs[i].uprobes = true;
+	}
+
+	return ret;
+}
+
+static int
+post_process_module_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs, const char *module,
+				       struct debuginfo *dinfo)
+{
+	Dwarf_Addr text_offs = 0;
+	int i, ret = 0;
+	char *mod_name = NULL;
+	struct map *map;
+
+	if (!module)
+		return 0;
+
+	map = get_target_map(module, NULL, false);
+	if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", module);
+		return -EINVAL;
+	}
+
+	mod_name = find_module_name(module);
+	for (i = 0; i < ntevs; i++) {
+		ret = post_process_probe_trace_point(&tevs[i].point,
+						map, (unsigned long)text_offs);
+		if (ret < 0)
+			break;
+		tevs[i].point.module =
+			strdup(mod_name ? mod_name : module);
+		if (!tevs[i].point.module) {
+			ret = -ENOMEM;
+			break;
+		}
+	}
+
+	free(mod_name);
+	map__put(map);
+
+	return ret;
+}
+
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs)
+{
+	struct ref_reloc_sym *reloc_sym;
+	char *tmp;
+	int i, skipped = 0;
+
+	/* Skip post process if the target is an offline kernel */
+	if (symbol_conf.ignore_vmlinux_buildid)
+		return post_process_offline_probe_trace_events(tevs, ntevs,
+						symbol_conf.vmlinux_name);
+
+	reloc_sym = kernel_get_ref_reloc_sym();
+	if (!reloc_sym) {
+		pr_warning("Relocated base symbol is not found!\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		if (!tevs[i].point.address)
+			continue;
+		if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
+			continue;
+		/* If we found a wrong one, mark it by NULL symbol */
+		if (kprobe_warn_out_range(tevs[i].point.symbol,
+					  tevs[i].point.address)) {
+			tmp = NULL;
+			skipped++;
+		} else {
+			tmp = strdup(reloc_sym->name);
+			if (!tmp)
+				return -ENOMEM;
+		}
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = tmp;
+		tevs[i].point.offset = tevs[i].point.address -
+				       reloc_sym->unrelocated_addr;
+	}
+	return skipped;
+}
+
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+				      int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+					   struct probe_trace_event *tevs,
+					   int ntevs, const char *module,
+					   bool uprobe, struct debuginfo *dinfo)
+{
+	int ret;
+
+	if (uprobe)
+		ret = add_exec_to_probe_trace_events(tevs, ntevs, module,
+						     pev->nsi);
+	else if (module)
+		/* Currently ref_reloc_sym based probe is not for drivers */
+		ret = post_process_module_probe_trace_events(tevs, ntevs,
+							     module, dinfo);
+	else
+		ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+	if (ret >= 0)
+		arch__post_process_probe_trace_events(pev, ntevs);
+
+	return ret;
+}
+
+/* Try to find perf_probe_event with debuginfo */
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+					  struct probe_trace_event **tevs)
+{
+	bool need_dwarf = perf_probe_event_need_dwarf(pev);
+	struct perf_probe_point tmp;
+	struct debuginfo *dinfo;
+	int ntevs, ret = 0;
+
+	dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf);
+	if (!dinfo) {
+		if (need_dwarf)
+			return -ENOENT;
+		pr_debug("Could not open debuginfo. Try to use symbols.\n");
+		return 0;
+	}
+
+	pr_debug("Try to find probe point from debuginfo.\n");
+	/* Searching trace events corresponding to a probe event */
+	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+
+	if (ntevs == 0)	{  /* Not found, retry with an alternative */
+		ret = get_alternative_probe_event(dinfo, pev, &tmp);
+		if (!ret) {
+			ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+			/*
+			 * Write back to the original probe_event for
+			 * setting appropriate (user given) event name
+			 */
+			clear_perf_probe_point(&pev->point);
+			memcpy(&pev->point, &tmp, sizeof(tmp));
+		}
+	}
+
+	if (ntevs > 0) {	/* Succeeded to find trace events */
+		pr_debug("Found %d probe_trace_events.\n", ntevs);
+		ret = post_process_probe_trace_events(pev, *tevs, ntevs,
+					pev->target, pev->uprobes, dinfo);
+		if (ret < 0 || ret == ntevs) {
+			pr_debug("Post processing failed or all events are skipped. (%d)\n", ret);
+			clear_probe_trace_events(*tevs, ntevs);
+			zfree(tevs);
+			ntevs = 0;
+		}
+	}
+
+	debuginfo__delete(dinfo);
+
+	if (ntevs == 0)	{	/* No error but failed to find probe point. */
+		pr_warning("Probe point '%s' not found.\n",
+			   synthesize_perf_probe_point(&pev->point));
+		return -ENOENT;
+	} else if (ntevs < 0) {
+		/* Error path : ntevs < 0 */
+		pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
+		if (ntevs == -EBADF)
+			pr_warning("Warning: No dwarf info found in the vmlinux - "
+				"please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
+		if (!need_dwarf) {
+			pr_debug("Trying to use symbols.\n");
+			return 0;
+		}
+	}
+	return ntevs;
+}
+
+#define LINEBUF_SIZE 256
+#define NR_ADDITIONAL_LINES 2
+
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+	char buf[LINEBUF_SIZE], sbuf[STRERR_BUFSIZE];
+	const char *color = show_num ? "" : PERF_COLOR_BLUE;
+	const char *prefix = NULL;
+
+	do {
+		if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+			goto error;
+		if (skip)
+			continue;
+		if (!prefix) {
+			prefix = show_num ? "%7d  " : "         ";
+			color_fprintf(stdout, color, prefix, l);
+		}
+		color_fprintf(stdout, color, "%s", buf);
+
+	} while (strchr(buf, '\n') == NULL);
+
+	return 1;
+error:
+	if (ferror(fp)) {
+		pr_warning("File read error: %s\n",
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -1;
+	}
+	return 0;
+}
+
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+	int rv = __show_one_line(fp, l, skip, show_num);
+	if (rv == 0) {
+		pr_warning("Source file is shorter than expected.\n");
+		rv = -1;
+	}
+	return rv;
+}
+
+#define show_one_line_with_num(f,l)	_show_one_line(f,l,false,true)
+#define show_one_line(f,l)		_show_one_line(f,l,false,false)
+#define skip_one_line(f,l)		_show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l)	__show_one_line(f,l,false,false)
+
+/*
+ * Show line-range always requires debuginfo to find source file and
+ * line number.
+ */
+static int __show_line_range(struct line_range *lr, const char *module,
+			     bool user)
+{
+	int l = 1;
+	struct int_node *ln;
+	struct debuginfo *dinfo;
+	FILE *fp;
+	int ret;
+	char *tmp;
+	char sbuf[STRERR_BUFSIZE];
+
+	/* Search a line range */
+	dinfo = open_debuginfo(module, NULL, false);
+	if (!dinfo)
+		return -ENOENT;
+
+	ret = debuginfo__find_line_range(dinfo, lr);
+	if (!ret) {	/* Not found, retry with an alternative */
+		ret = get_alternative_line_range(dinfo, lr, module, user);
+		if (!ret)
+			ret = debuginfo__find_line_range(dinfo, lr);
+	}
+	debuginfo__delete(dinfo);
+	if (ret == 0 || ret == -ENOENT) {
+		pr_warning("Specified source line is not found.\n");
+		return -ENOENT;
+	} else if (ret < 0) {
+		pr_warning("Debuginfo analysis failed.\n");
+		return ret;
+	}
+
+	/* Convert source file path */
+	tmp = lr->path;
+	ret = get_real_path(tmp, lr->comp_dir, &lr->path);
+
+	/* Free old path when new path is assigned */
+	if (tmp != lr->path)
+		free(tmp);
+
+	if (ret < 0) {
+		pr_warning("Failed to find source file path.\n");
+		return ret;
+	}
+
+	setup_pager();
+
+	if (lr->function)
+		fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
+			lr->start - lr->offset);
+	else
+		fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
+
+	fp = fopen(lr->path, "r");
+	if (fp == NULL) {
+		pr_warning("Failed to open %s: %s\n", lr->path,
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -errno;
+	}
+	/* Skip to starting line number */
+	while (l < lr->start) {
+		ret = skip_one_line(fp, l++);
+		if (ret < 0)
+			goto end;
+	}
+
+	intlist__for_each_entry(ln, lr->line_list) {
+		for (; ln->i > l; l++) {
+			ret = show_one_line(fp, l - lr->offset);
+			if (ret < 0)
+				goto end;
+		}
+		ret = show_one_line_with_num(fp, l++ - lr->offset);
+		if (ret < 0)
+			goto end;
+	}
+
+	if (lr->end == INT_MAX)
+		lr->end = l + NR_ADDITIONAL_LINES;
+	while (l <= lr->end) {
+		ret = show_one_line_or_eof(fp, l++ - lr->offset);
+		if (ret <= 0)
+			break;
+	}
+end:
+	fclose(fp);
+	return ret;
+}
+
+int show_line_range(struct line_range *lr, const char *module,
+		    struct nsinfo *nsi, bool user)
+{
+	int ret;
+	struct nscookie nsc;
+
+	ret = init_probe_symbol_maps(user);
+	if (ret < 0)
+		return ret;
+	nsinfo__mountns_enter(nsi, &nsc);
+	ret = __show_line_range(lr, module, user);
+	nsinfo__mountns_exit(&nsc);
+	exit_probe_symbol_maps();
+
+	return ret;
+}
+
+static int show_available_vars_at(struct debuginfo *dinfo,
+				  struct perf_probe_event *pev,
+				  struct strfilter *_filter)
+{
+	char *buf;
+	int ret, i, nvars;
+	struct str_node *node;
+	struct variable_list *vls = NULL, *vl;
+	struct perf_probe_point tmp;
+	const char *var;
+
+	buf = synthesize_perf_probe_point(&pev->point);
+	if (!buf)
+		return -EINVAL;
+	pr_debug("Searching variables at %s\n", buf);
+
+	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
+	if (!ret) {  /* Not found, retry with an alternative */
+		ret = get_alternative_probe_event(dinfo, pev, &tmp);
+		if (!ret) {
+			ret = debuginfo__find_available_vars_at(dinfo, pev,
+								&vls);
+			/* Release the old probe_point */
+			clear_perf_probe_point(&tmp);
+		}
+	}
+	if (ret <= 0) {
+		if (ret == 0 || ret == -ENOENT) {
+			pr_err("Failed to find the address of %s\n", buf);
+			ret = -ENOENT;
+		} else
+			pr_warning("Debuginfo analysis failed.\n");
+		goto end;
+	}
+
+	/* Some variables are found */
+	fprintf(stdout, "Available variables at %s\n", buf);
+	for (i = 0; i < ret; i++) {
+		vl = &vls[i];
+		/*
+		 * A probe point might be converted to
+		 * several trace points.
+		 */
+		fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+			vl->point.offset);
+		zfree(&vl->point.symbol);
+		nvars = 0;
+		if (vl->vars) {
+			strlist__for_each_entry(node, vl->vars) {
+				var = strchr(node->s, '\t') + 1;
+				if (strfilter__compare(_filter, var)) {
+					fprintf(stdout, "\t\t%s\n", node->s);
+					nvars++;
+				}
+			}
+			strlist__delete(vl->vars);
+		}
+		if (nvars == 0)
+			fprintf(stdout, "\t\t(No matched variables)\n");
+	}
+	free(vls);
+end:
+	free(buf);
+	return ret;
+}
+
+/* Show available variables on given probe point */
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+			struct strfilter *_filter)
+{
+	int i, ret = 0;
+	struct debuginfo *dinfo;
+
+	ret = init_probe_symbol_maps(pevs->uprobes);
+	if (ret < 0)
+		return ret;
+
+	dinfo = open_debuginfo(pevs->target, pevs->nsi, false);
+	if (!dinfo) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	setup_pager();
+
+	for (i = 0; i < npevs && ret >= 0; i++)
+		ret = show_available_vars_at(dinfo, &pevs[i], _filter);
+
+	debuginfo__delete(dinfo);
+out:
+	exit_probe_symbol_maps();
+	return ret;
+}
+
+#else	/* !HAVE_DWARF_SUPPORT */
+
+static void debuginfo_cache__exit(void)
+{
+}
+
+static int
+find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
+				 struct perf_probe_point *pp __maybe_unused,
+				 bool is_kprobe __maybe_unused)
+{
+	return -ENOSYS;
+}
+
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+				struct probe_trace_event **tevs __maybe_unused)
+{
+	if (perf_probe_event_need_dwarf(pev)) {
+		pr_warning("Debuginfo-analysis is not supported.\n");
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+
+int show_line_range(struct line_range *lr __maybe_unused,
+		    const char *module __maybe_unused,
+		    struct nsinfo *nsi __maybe_unused,
+		    bool user __maybe_unused)
+{
+	pr_warning("Debuginfo-analysis is not supported.\n");
+	return -ENOSYS;
+}
+
+int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
+			int npevs __maybe_unused,
+			struct strfilter *filter __maybe_unused)
+{
+	pr_warning("Debuginfo-analysis is not supported.\n");
+	return -ENOSYS;
+}
+#endif
+
+void line_range__clear(struct line_range *lr)
+{
+	free(lr->function);
+	free(lr->file);
+	free(lr->path);
+	free(lr->comp_dir);
+	intlist__delete(lr->line_list);
+	memset(lr, 0, sizeof(*lr));
+}
+
+int line_range__init(struct line_range *lr)
+{
+	memset(lr, 0, sizeof(*lr));
+	lr->line_list = intlist__new(NULL);
+	if (!lr->line_list)
+		return -ENOMEM;
+	else
+		return 0;
+}
+
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+	const char *start = *ptr;
+
+	errno = 0;
+	*val = strtol(*ptr, ptr, 0);
+	if (errno || *ptr == start) {
+		semantic_error("'%s' is not a valid number.\n", what);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+	if (!isalpha(*name) && *name != '_')
+		return false;
+	while (*++name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+	}
+	return true;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ *         SRC[:SLN[+NUM|-ELN]]
+ *         FNC[@SRC][:SLN[+NUM|-ELN]]
+ */
+int parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+	char *range, *file, *name = strdup(arg);
+	int err;
+
+	if (!name)
+		return -ENOMEM;
+
+	lr->start = 0;
+	lr->end = INT_MAX;
+
+	range = strchr(name, ':');
+	if (range) {
+		*range++ = '\0';
+
+		err = parse_line_num(&range, &lr->start, "start line");
+		if (err)
+			goto err;
+
+		if (*range == '+' || *range == '-') {
+			const char c = *range++;
+
+			err = parse_line_num(&range, &lr->end, "end line");
+			if (err)
+				goto err;
+
+			if (c == '+') {
+				lr->end += lr->start;
+				/*
+				 * Adjust the number of lines here.
+				 * If the number of lines == 1, the
+				 * the end of line should be equal to
+				 * the start of line.
+				 */
+				lr->end--;
+			}
+		}
+
+		pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+		err = -EINVAL;
+		if (lr->start > lr->end) {
+			semantic_error("Start line must be smaller"
+				       " than end line.\n");
+			goto err;
+		}
+		if (*range != '\0') {
+			semantic_error("Tailing with invalid str '%s'.\n", range);
+			goto err;
+		}
+	}
+
+	file = strchr(name, '@');
+	if (file) {
+		*file = '\0';
+		lr->file = strdup(++file);
+		if (lr->file == NULL) {
+			err = -ENOMEM;
+			goto err;
+		}
+		lr->function = name;
+	} else if (strchr(name, '/') || strchr(name, '.'))
+		lr->file = name;
+	else if (is_c_func_name(name))/* We reuse it for checking funcname */
+		lr->function = name;
+	else {	/* Invalid name */
+		semantic_error("'%s' is not a valid function name.\n", name);
+		err = -EINVAL;
+		goto err;
+	}
+
+	return 0;
+err:
+	free(name);
+	return err;
+}
+
+static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+{
+	char *ptr;
+
+	ptr = strpbrk_esc(*arg, ":");
+	if (ptr) {
+		*ptr = '\0';
+		if (!pev->sdt && !is_c_func_name(*arg))
+			goto ng_name;
+		pev->group = strdup_esc(*arg);
+		if (!pev->group)
+			return -ENOMEM;
+		*arg = ptr + 1;
+	} else
+		pev->group = NULL;
+
+	pev->event = strdup_esc(*arg);
+	if (pev->event == NULL)
+		return -ENOMEM;
+
+	if (!pev->sdt && !is_c_func_name(pev->event)) {
+		zfree(&pev->event);
+ng_name:
+		zfree(&pev->group);
+		semantic_error("%s is bad for event name -it must "
+			       "follow C symbol-naming rule.\n", *arg);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Parse probepoint definition. */
+static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
+{
+	struct perf_probe_point *pp = &pev->point;
+	char *ptr, *tmp;
+	char c, nc = 0;
+	bool file_spec = false;
+	int ret;
+
+	/*
+	 * <Syntax>
+	 * perf probe [GRP:][EVENT=]SRC[:LN|;PTN]
+	 * perf probe [GRP:][EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
+	 * perf probe %[GRP:]SDT_EVENT
+	 */
+	if (!arg)
+		return -EINVAL;
+
+	if (is_sdt_event(arg)) {
+		pev->sdt = true;
+		if (arg[0] == '%')
+			arg++;
+	}
+
+	ptr = strpbrk_esc(arg, ";=@+%");
+	if (pev->sdt) {
+		if (ptr) {
+			if (*ptr != '@') {
+				semantic_error("%s must be an SDT name.\n",
+					       arg);
+				return -EINVAL;
+			}
+			/* This must be a target file name or build id */
+			tmp = build_id_cache__complement(ptr + 1);
+			if (tmp) {
+				pev->target = build_id_cache__origname(tmp);
+				free(tmp);
+			} else
+				pev->target = strdup_esc(ptr + 1);
+			if (!pev->target)
+				return -ENOMEM;
+			*ptr = '\0';
+		}
+		ret = parse_perf_probe_event_name(&arg, pev);
+		if (ret == 0) {
+			if (asprintf(&pev->point.function, "%%%s", pev->event) < 0)
+				ret = -errno;
+		}
+		return ret;
+	}
+
+	if (ptr && *ptr == '=') {	/* Event name */
+		*ptr = '\0';
+		tmp = ptr + 1;
+		ret = parse_perf_probe_event_name(&arg, pev);
+		if (ret < 0)
+			return ret;
+
+		arg = tmp;
+	}
+
+	/*
+	 * Check arg is function or file name and copy it.
+	 *
+	 * We consider arg to be a file spec if and only if it satisfies
+	 * all of the below criteria::
+	 * - it does not include any of "+@%",
+	 * - it includes one of ":;", and
+	 * - it has a period '.' in the name.
+	 *
+	 * Otherwise, we consider arg to be a function specification.
+	 */
+	if (!strpbrk_esc(arg, "+@%")) {
+		ptr = strpbrk_esc(arg, ";:");
+		/* This is a file spec if it includes a '.' before ; or : */
+		if (ptr && memchr(arg, '.', ptr - arg))
+			file_spec = true;
+	}
+
+	ptr = strpbrk_esc(arg, ";:+@%");
+	if (ptr) {
+		nc = *ptr;
+		*ptr++ = '\0';
+	}
+
+	if (arg[0] == '\0')
+		tmp = NULL;
+	else {
+		tmp = strdup_esc(arg);
+		if (tmp == NULL)
+			return -ENOMEM;
+	}
+
+	if (file_spec)
+		pp->file = tmp;
+	else {
+		pp->function = tmp;
+
+		/*
+		 * Keep pp->function even if this is absolute address,
+		 * so it can mark whether abs_address is valid.
+		 * Which make 'perf probe lib.bin 0x0' possible.
+		 *
+		 * Note that checking length of tmp is not needed
+		 * because when we access tmp[1] we know tmp[0] is '0',
+		 * so tmp[1] should always valid (but could be '\0').
+		 */
+		if (tmp && !strncmp(tmp, "0x", 2)) {
+			pp->abs_address = strtoul(pp->function, &tmp, 0);
+			if (*tmp != '\0') {
+				semantic_error("Invalid absolute address.\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Parse other options */
+	while (ptr) {
+		arg = ptr;
+		c = nc;
+		if (c == ';') {	/* Lazy pattern must be the last part */
+			pp->lazy_line = strdup(arg); /* let leave escapes */
+			if (pp->lazy_line == NULL)
+				return -ENOMEM;
+			break;
+		}
+		ptr = strpbrk_esc(arg, ";:+@%");
+		if (ptr) {
+			nc = *ptr;
+			*ptr++ = '\0';
+		}
+		switch (c) {
+		case ':':	/* Line number */
+			pp->line = strtoul(arg, &tmp, 0);
+			if (*tmp != '\0') {
+				semantic_error("There is non-digit char"
+					       " in line number.\n");
+				return -EINVAL;
+			}
+			break;
+		case '+':	/* Byte offset from a symbol */
+			pp->offset = strtoul(arg, &tmp, 0);
+			if (*tmp != '\0') {
+				semantic_error("There is non-digit character"
+						" in offset.\n");
+				return -EINVAL;
+			}
+			break;
+		case '@':	/* File name */
+			if (pp->file) {
+				semantic_error("SRC@SRC is not allowed.\n");
+				return -EINVAL;
+			}
+			pp->file = strdup_esc(arg);
+			if (pp->file == NULL)
+				return -ENOMEM;
+			break;
+		case '%':	/* Probe places */
+			if (strcmp(arg, "return") == 0) {
+				pp->retprobe = 1;
+			} else {	/* Others not supported yet */
+				semantic_error("%%%s is not supported.\n", arg);
+				return -ENOTSUP;
+			}
+			break;
+		default:	/* Buggy case */
+			pr_err("This program has a bug at %s:%d.\n",
+				__FILE__, __LINE__);
+			return -ENOTSUP;
+			break;
+		}
+	}
+
+	/* Exclusion check */
+	if (pp->lazy_line && pp->line) {
+		semantic_error("Lazy pattern can't be used with"
+			       " line number.\n");
+		return -EINVAL;
+	}
+
+	if (pp->lazy_line && pp->offset) {
+		semantic_error("Lazy pattern can't be used with offset.\n");
+		return -EINVAL;
+	}
+
+	if (pp->line && pp->offset) {
+		semantic_error("Offset can't be used with line number.\n");
+		return -EINVAL;
+	}
+
+	if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
+		semantic_error("File always requires line number or "
+			       "lazy pattern.\n");
+		return -EINVAL;
+	}
+
+	if (pp->offset && !pp->function) {
+		semantic_error("Offset requires an entry function.\n");
+		return -EINVAL;
+	}
+
+	if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
+		semantic_error("Offset/Line/Lazy pattern can't be used with "
+			       "return probe.\n");
+		return -EINVAL;
+	}
+
+	pr_debug("symbol:%s file:%s line:%d offset:%lu return:%d lazy:%s\n",
+		 pp->function, pp->file, pp->line, pp->offset, pp->retprobe,
+		 pp->lazy_line);
+	return 0;
+}
+
+/* Parse perf-probe event argument */
+static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
+{
+	char *tmp, *goodname;
+	struct perf_probe_arg_field **fieldp;
+
+	pr_debug("parsing arg: %s into ", str);
+
+	tmp = strchr(str, '=');
+	if (tmp) {
+		arg->name = strndup(str, tmp - str);
+		if (arg->name == NULL)
+			return -ENOMEM;
+		pr_debug("name:%s ", arg->name);
+		str = tmp + 1;
+	}
+
+	tmp = strchr(str, ':');
+	if (tmp) {	/* Type setting */
+		*tmp = '\0';
+		arg->type = strdup(tmp + 1);
+		if (arg->type == NULL)
+			return -ENOMEM;
+		pr_debug("type:%s ", arg->type);
+	}
+
+	tmp = strpbrk(str, "-.[");
+	if (!is_c_varname(str) || !tmp) {
+		/* A variable, register, symbol or special value */
+		arg->var = strdup(str);
+		if (arg->var == NULL)
+			return -ENOMEM;
+		pr_debug("%s\n", arg->var);
+		return 0;
+	}
+
+	/* Structure fields or array element */
+	arg->var = strndup(str, tmp - str);
+	if (arg->var == NULL)
+		return -ENOMEM;
+	goodname = arg->var;
+	pr_debug("%s, ", arg->var);
+	fieldp = &arg->field;
+
+	do {
+		*fieldp = zalloc(sizeof(struct perf_probe_arg_field));
+		if (*fieldp == NULL)
+			return -ENOMEM;
+		if (*tmp == '[') {	/* Array */
+			str = tmp;
+			(*fieldp)->index = strtol(str + 1, &tmp, 0);
+			(*fieldp)->ref = true;
+			if (*tmp != ']' || tmp == str + 1) {
+				semantic_error("Array index must be a"
+						" number.\n");
+				return -EINVAL;
+			}
+			tmp++;
+			if (*tmp == '\0')
+				tmp = NULL;
+		} else {		/* Structure */
+			if (*tmp == '.') {
+				str = tmp + 1;
+				(*fieldp)->ref = false;
+			} else if (tmp[1] == '>') {
+				str = tmp + 2;
+				(*fieldp)->ref = true;
+			} else {
+				semantic_error("Argument parse error: %s\n",
+					       str);
+				return -EINVAL;
+			}
+			tmp = strpbrk(str, "-.[");
+		}
+		if (tmp) {
+			(*fieldp)->name = strndup(str, tmp - str);
+			if ((*fieldp)->name == NULL)
+				return -ENOMEM;
+			if (*str != '[')
+				goodname = (*fieldp)->name;
+			pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
+			fieldp = &(*fieldp)->next;
+		}
+	} while (tmp);
+	(*fieldp)->name = strdup(str);
+	if ((*fieldp)->name == NULL)
+		return -ENOMEM;
+	if (*str != '[')
+		goodname = (*fieldp)->name;
+	pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
+
+	/* If no name is specified, set the last field name (not array index)*/
+	if (!arg->name) {
+		arg->name = strdup(goodname);
+		if (arg->name == NULL)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/* Parse perf-probe event command */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev)
+{
+	char **argv;
+	int argc, i, ret = 0;
+
+	argv = argv_split(cmd, &argc);
+	if (!argv) {
+		pr_debug("Failed to split arguments.\n");
+		return -ENOMEM;
+	}
+	if (argc - 1 > MAX_PROBE_ARGS) {
+		semantic_error("Too many probe arguments (%d).\n", argc - 1);
+		ret = -ERANGE;
+		goto out;
+	}
+	/* Parse probe point */
+	ret = parse_perf_probe_point(argv[0], pev);
+	if (ret < 0)
+		goto out;
+
+	/* Copy arguments and ensure return probe has no C argument */
+	pev->nargs = argc - 1;
+	pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+	if (pev->args == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	for (i = 0; i < pev->nargs && ret >= 0; i++) {
+		ret = parse_perf_probe_arg(argv[i + 1], &pev->args[i]);
+		if (ret >= 0 &&
+		    is_c_varname(pev->args[i].var) && pev->point.retprobe) {
+			semantic_error("You can't specify local variable for"
+				       " kretprobe.\n");
+			ret = -EINVAL;
+		}
+	}
+out:
+	argv_free(argv);
+
+	return ret;
+}
+
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+	int i = 0;
+
+	for (i = 0; i < pev->nargs; i++)
+		if (is_c_varname(pev->args[i].var)              ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+			return true;
+	return false;
+}
+
+/* Return true if this perf_probe_event requires debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
+{
+	if (pev->point.file || pev->point.line || pev->point.lazy_line)
+		return true;
+
+	if (perf_probe_with_var(pev))
+		return true;
+
+	return false;
+}
+
+/* Parse probe_events event into struct probe_point */
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
+{
+	struct probe_trace_point *tp = &tev->point;
+	char pr;
+	char *p;
+	char *argv0_str = NULL, *fmt, *fmt1_str, *fmt2_str, *fmt3_str;
+	int ret, i, argc;
+	char **argv;
+
+	pr_debug("Parsing probe_events: %s\n", cmd);
+	argv = argv_split(cmd, &argc);
+	if (!argv) {
+		pr_debug("Failed to split arguments.\n");
+		return -ENOMEM;
+	}
+	if (argc < 2) {
+		semantic_error("Too few probe arguments.\n");
+		ret = -ERANGE;
+		goto out;
+	}
+
+	/* Scan event and group name. */
+	argv0_str = strdup(argv[0]);
+	if (argv0_str == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	fmt1_str = strtok_r(argv0_str, ":", &fmt);
+	fmt2_str = strtok_r(NULL, "/", &fmt);
+	fmt3_str = strtok_r(NULL, " \t", &fmt);
+	if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL
+	    || fmt3_str == NULL) {
+		semantic_error("Failed to parse event name: %s\n", argv[0]);
+		ret = -EINVAL;
+		goto out;
+	}
+	pr = fmt1_str[0];
+	tev->group = strdup(fmt2_str);
+	tev->event = strdup(fmt3_str);
+	if (tev->group == NULL || tev->event == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr);
+
+	tp->retprobe = (pr == 'r');
+
+	/* Scan module name(if there), function name and offset */
+	p = strchr(argv[1], ':');
+	if (p) {
+		tp->module = strndup(argv[1], p - argv[1]);
+		if (!tp->module) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		tev->uprobes = (tp->module[0] == '/');
+		p++;
+	} else
+		p = argv[1];
+	fmt1_str = strtok_r(p, "+", &fmt);
+	/* only the address started with 0x */
+	if (fmt1_str[0] == '0')	{
+		/*
+		 * Fix a special case:
+		 * if address == 0, kernel reports something like:
+		 * p:probe_libc/abs_0 /lib/libc-2.18.so:0x          (null) arg1=%ax
+		 * Newer kernel may fix that, but we want to
+		 * support old kernel also.
+		 */
+		if (strcmp(fmt1_str, "0x") == 0) {
+			if (!argv[2] || strcmp(argv[2], "(null)")) {
+				ret = -EINVAL;
+				goto out;
+			}
+			tp->address = 0;
+
+			free(argv[2]);
+			for (i = 2; argv[i + 1] != NULL; i++)
+				argv[i] = argv[i + 1];
+
+			argv[i] = NULL;
+			argc -= 1;
+		} else
+			tp->address = strtoul(fmt1_str, NULL, 0);
+	} else {
+		/* Only the symbol-based probe has offset */
+		tp->symbol = strdup(fmt1_str);
+		if (tp->symbol == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		fmt2_str = strtok_r(NULL, "", &fmt);
+		if (fmt2_str == NULL)
+			tp->offset = 0;
+		else
+			tp->offset = strtoul(fmt2_str, NULL, 10);
+	}
+
+	tev->nargs = argc - 2;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (tev->args == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	for (i = 0; i < tev->nargs; i++) {
+		p = strchr(argv[i + 2], '=');
+		if (p)	/* We don't need which register is assigned. */
+			*p++ = '\0';
+		else
+			p = argv[i + 2];
+		tev->args[i].name = strdup(argv[i + 2]);
+		/* TODO: parse regs and offset */
+		tev->args[i].value = strdup(p);
+		if (tev->args[i].name == NULL || tev->args[i].value == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+	ret = 0;
+out:
+	free(argv0_str);
+	argv_free(argv);
+	return ret;
+}
+
+/* Compose only probe arg */
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
+{
+	struct perf_probe_arg_field *field = pa->field;
+	struct strbuf buf;
+	char *ret = NULL;
+	int err;
+
+	if (strbuf_init(&buf, 64) < 0)
+		return NULL;
+
+	if (pa->name && pa->var)
+		err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
+	else
+		err = strbuf_addstr(&buf, pa->name ?: pa->var);
+	if (err)
+		goto out;
+
+	while (field) {
+		if (field->name[0] == '[')
+			err = strbuf_addstr(&buf, field->name);
+		else
+			err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+					  field->name);
+		field = field->next;
+		if (err)
+			goto out;
+	}
+
+	if (pa->type)
+		if (strbuf_addf(&buf, ":%s", pa->type) < 0)
+			goto out;
+
+	ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
+/* Compose only probe point (not argument) */
+char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+{
+	struct strbuf buf;
+	char *tmp, *ret = NULL;
+	int len, err = 0;
+
+	if (strbuf_init(&buf, 64) < 0)
+		return NULL;
+
+	if (pp->function) {
+		if (strbuf_addstr(&buf, pp->function) < 0)
+			goto out;
+		if (pp->offset)
+			err = strbuf_addf(&buf, "+%lu", pp->offset);
+		else if (pp->line)
+			err = strbuf_addf(&buf, ":%d", pp->line);
+		else if (pp->retprobe)
+			err = strbuf_addstr(&buf, "%return");
+		if (err)
+			goto out;
+	}
+	if (pp->file) {
+		tmp = pp->file;
+		len = strlen(tmp);
+		if (len > 30) {
+			tmp = strchr(pp->file + len - 30, '/');
+			tmp = tmp ? tmp + 1 : pp->file + len - 30;
+		}
+		err = strbuf_addf(&buf, "@%s", tmp);
+		if (!err && !pp->function && pp->line)
+			err = strbuf_addf(&buf, ":%d", pp->line);
+	}
+	if (!err)
+		ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
+char *synthesize_perf_probe_command(struct perf_probe_event *pev)
+{
+	struct strbuf buf;
+	char *tmp, *ret = NULL;
+	int i;
+
+	if (strbuf_init(&buf, 64))
+		return NULL;
+	if (pev->event)
+		if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP,
+				pev->event) < 0)
+			goto out;
+
+	tmp = synthesize_perf_probe_point(&pev->point);
+	if (!tmp || strbuf_addstr(&buf, tmp) < 0)
+		goto out;
+	free(tmp);
+
+	for (i = 0; i < pev->nargs; i++) {
+		tmp = synthesize_perf_probe_arg(pev->args + i);
+		if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0)
+			goto out;
+		free(tmp);
+	}
+
+	ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
+}
+
+static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
+					    struct strbuf *buf, int depth)
+{
+	int err;
+	if (ref->next) {
+		depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
+							 depth + 1);
+		if (depth < 0)
+			return depth;
+	}
+	err = strbuf_addf(buf, "%+ld(", ref->offset);
+	return (err < 0) ? err : depth;
+}
+
+static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
+				      struct strbuf *buf)
+{
+	struct probe_trace_arg_ref *ref = arg->ref;
+	int depth = 0, err;
+
+	/* Argument name or separator */
+	if (arg->name)
+		err = strbuf_addf(buf, " %s=", arg->name);
+	else
+		err = strbuf_addch(buf, ' ');
+	if (err)
+		return err;
+
+	/* Special case: @XXX */
+	if (arg->value[0] == '@' && arg->ref)
+			ref = ref->next;
+
+	/* Dereferencing arguments */
+	if (ref) {
+		depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
+		if (depth < 0)
+			return depth;
+	}
+
+	/* Print argument value */
+	if (arg->value[0] == '@' && arg->ref)
+		err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
+	else
+		err = strbuf_addstr(buf, arg->value);
+
+	/* Closing */
+	while (!err && depth--)
+		err = strbuf_addch(buf, ')');
+
+	/* Print argument type */
+	if (!err && arg->type)
+		err = strbuf_addf(buf, ":%s", arg->type);
+
+	return err;
+}
+
+char *synthesize_probe_trace_command(struct probe_trace_event *tev)
+{
+	struct probe_trace_point *tp = &tev->point;
+	struct strbuf buf;
+	char *ret = NULL;
+	int i, err;
+
+	/* Uprobes must have tp->module */
+	if (tev->uprobes && !tp->module)
+		return NULL;
+
+	if (strbuf_init(&buf, 32) < 0)
+		return NULL;
+
+	if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+			tev->group, tev->event) < 0)
+		goto error;
+	/*
+	 * If tp->address == 0, then this point must be a
+	 * absolute address uprobe.
+	 * try_to_find_absolute_address() should have made
+	 * tp->symbol to "0x0".
+	 */
+	if (tev->uprobes && !tp->address) {
+		if (!tp->symbol || strcmp(tp->symbol, "0x0"))
+			goto error;
+	}
+
+	/* Use the tp->address for uprobes */
+	if (tev->uprobes)
+		err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
+	else if (!strncmp(tp->symbol, "0x", 2))
+		/* Absolute address. See try_to_find_absolute_address() */
+		err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
+				  tp->module ? ":" : "", tp->address);
+	else
+		err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
+				tp->module ? ":" : "", tp->symbol, tp->offset);
+	if (err)
+		goto error;
+
+	for (i = 0; i < tev->nargs; i++)
+		if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0)
+			goto error;
+
+	ret = strbuf_detach(&buf, NULL);
+error:
+	strbuf_release(&buf);
+	return ret;
+}
+
+static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
+					  struct perf_probe_point *pp,
+					  bool is_kprobe)
+{
+	struct symbol *sym = NULL;
+	struct map *map = NULL;
+	u64 addr = tp->address;
+	int ret = -ENOENT;
+
+	if (!is_kprobe) {
+		map = dso__new_map(tp->module);
+		if (!map)
+			goto out;
+		sym = map__find_symbol(map, addr);
+	} else {
+		if (tp->symbol && !addr) {
+			if (kernel_get_symbol_address_by_name(tp->symbol,
+						&addr, true, false) < 0)
+				goto out;
+		}
+		if (addr) {
+			addr += tp->offset;
+			sym = __find_kernel_function(addr, &map);
+		}
+	}
+
+	if (!sym)
+		goto out;
+
+	pp->retprobe = tp->retprobe;
+	pp->offset = addr - map->unmap_ip(map, sym->start);
+	pp->function = strdup(sym->name);
+	ret = pp->function ? 0 : -ENOMEM;
+
+out:
+	if (map && !is_kprobe) {
+		map__put(map);
+	}
+
+	return ret;
+}
+
+static int convert_to_perf_probe_point(struct probe_trace_point *tp,
+				       struct perf_probe_point *pp,
+				       bool is_kprobe)
+{
+	char buf[128];
+	int ret;
+
+	ret = find_perf_probe_point_from_dwarf(tp, pp, is_kprobe);
+	if (!ret)
+		return 0;
+	ret = find_perf_probe_point_from_map(tp, pp, is_kprobe);
+	if (!ret)
+		return 0;
+
+	pr_debug("Failed to find probe point from both of dwarf and map.\n");
+
+	if (tp->symbol) {
+		pp->function = strdup(tp->symbol);
+		pp->offset = tp->offset;
+	} else {
+		ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address);
+		if (ret < 0)
+			return ret;
+		pp->function = strdup(buf);
+		pp->offset = 0;
+	}
+	if (pp->function == NULL)
+		return -ENOMEM;
+
+	pp->retprobe = tp->retprobe;
+
+	return 0;
+}
+
+static int convert_to_perf_probe_event(struct probe_trace_event *tev,
+			       struct perf_probe_event *pev, bool is_kprobe)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int i, ret;
+
+	/* Convert event/group name */
+	pev->event = strdup(tev->event);
+	pev->group = strdup(tev->group);
+	if (pev->event == NULL || pev->group == NULL)
+		return -ENOMEM;
+
+	/* Convert trace_point to probe_point */
+	ret = convert_to_perf_probe_point(&tev->point, &pev->point, is_kprobe);
+	if (ret < 0)
+		return ret;
+
+	/* Convert trace_arg to probe_arg */
+	pev->nargs = tev->nargs;
+	pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+	if (pev->args == NULL)
+		return -ENOMEM;
+	for (i = 0; i < tev->nargs && ret >= 0; i++) {
+		if (tev->args[i].name)
+			pev->args[i].name = strdup(tev->args[i].name);
+		else {
+			if ((ret = strbuf_init(&buf, 32)) < 0)
+				goto error;
+			ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+			pev->args[i].name = strbuf_detach(&buf, NULL);
+		}
+		if (pev->args[i].name == NULL && ret >= 0)
+			ret = -ENOMEM;
+	}
+error:
+	if (ret < 0)
+		clear_perf_probe_event(pev);
+
+	return ret;
+}
+
+void clear_perf_probe_event(struct perf_probe_event *pev)
+{
+	struct perf_probe_arg_field *field, *next;
+	int i;
+
+	free(pev->event);
+	free(pev->group);
+	free(pev->target);
+	clear_perf_probe_point(&pev->point);
+
+	for (i = 0; i < pev->nargs; i++) {
+		free(pev->args[i].name);
+		free(pev->args[i].var);
+		free(pev->args[i].type);
+		field = pev->args[i].field;
+		while (field) {
+			next = field->next;
+			zfree(&field->name);
+			free(field);
+			field = next;
+		}
+	}
+	free(pev->args);
+	memset(pev, 0, sizeof(*pev));
+}
+
+#define strdup_or_goto(str, label)	\
+({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; })
+
+static int perf_probe_point__copy(struct perf_probe_point *dst,
+				  struct perf_probe_point *src)
+{
+	dst->file = strdup_or_goto(src->file, out_err);
+	dst->function = strdup_or_goto(src->function, out_err);
+	dst->lazy_line = strdup_or_goto(src->lazy_line, out_err);
+	dst->line = src->line;
+	dst->retprobe = src->retprobe;
+	dst->offset = src->offset;
+	return 0;
+
+out_err:
+	clear_perf_probe_point(dst);
+	return -ENOMEM;
+}
+
+static int perf_probe_arg__copy(struct perf_probe_arg *dst,
+				struct perf_probe_arg *src)
+{
+	struct perf_probe_arg_field *field, **ppfield;
+
+	dst->name = strdup_or_goto(src->name, out_err);
+	dst->var = strdup_or_goto(src->var, out_err);
+	dst->type = strdup_or_goto(src->type, out_err);
+
+	field = src->field;
+	ppfield = &(dst->field);
+	while (field) {
+		*ppfield = zalloc(sizeof(*field));
+		if (!*ppfield)
+			goto out_err;
+		(*ppfield)->name = strdup_or_goto(field->name, out_err);
+		(*ppfield)->index = field->index;
+		(*ppfield)->ref = field->ref;
+		field = field->next;
+		ppfield = &((*ppfield)->next);
+	}
+	return 0;
+out_err:
+	return -ENOMEM;
+}
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+			   struct perf_probe_event *src)
+{
+	int i;
+
+	dst->event = strdup_or_goto(src->event, out_err);
+	dst->group = strdup_or_goto(src->group, out_err);
+	dst->target = strdup_or_goto(src->target, out_err);
+	dst->uprobes = src->uprobes;
+
+	if (perf_probe_point__copy(&dst->point, &src->point) < 0)
+		goto out_err;
+
+	dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs);
+	if (!dst->args)
+		goto out_err;
+	dst->nargs = src->nargs;
+
+	for (i = 0; i < src->nargs; i++)
+		if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0)
+			goto out_err;
+	return 0;
+
+out_err:
+	clear_perf_probe_event(dst);
+	return -ENOMEM;
+}
+
+void clear_probe_trace_event(struct probe_trace_event *tev)
+{
+	struct probe_trace_arg_ref *ref, *next;
+	int i;
+
+	free(tev->event);
+	free(tev->group);
+	free(tev->point.symbol);
+	free(tev->point.realname);
+	free(tev->point.module);
+	for (i = 0; i < tev->nargs; i++) {
+		free(tev->args[i].name);
+		free(tev->args[i].value);
+		free(tev->args[i].type);
+		ref = tev->args[i].ref;
+		while (ref) {
+			next = ref->next;
+			free(ref);
+			ref = next;
+		}
+	}
+	free(tev->args);
+	memset(tev, 0, sizeof(*tev));
+}
+
+struct kprobe_blacklist_node {
+	struct list_head list;
+	unsigned long start;
+	unsigned long end;
+	char *symbol;
+};
+
+static void kprobe_blacklist__delete(struct list_head *blacklist)
+{
+	struct kprobe_blacklist_node *node;
+
+	while (!list_empty(blacklist)) {
+		node = list_first_entry(blacklist,
+					struct kprobe_blacklist_node, list);
+		list_del(&node->list);
+		free(node->symbol);
+		free(node);
+	}
+}
+
+static int kprobe_blacklist__load(struct list_head *blacklist)
+{
+	struct kprobe_blacklist_node *node;
+	const char *__debugfs = debugfs__mountpoint();
+	char buf[PATH_MAX], *p;
+	FILE *fp;
+	int ret;
+
+	if (__debugfs == NULL)
+		return -ENOTSUP;
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs);
+	if (ret < 0)
+		return ret;
+
+	fp = fopen(buf, "r");
+	if (!fp)
+		return -errno;
+
+	ret = 0;
+	while (fgets(buf, PATH_MAX, fp)) {
+		node = zalloc(sizeof(*node));
+		if (!node) {
+			ret = -ENOMEM;
+			break;
+		}
+		INIT_LIST_HEAD(&node->list);
+		list_add_tail(&node->list, blacklist);
+		if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) {
+			ret = -EINVAL;
+			break;
+		}
+		p = strchr(buf, '\t');
+		if (p) {
+			p++;
+			if (p[strlen(p) - 1] == '\n')
+				p[strlen(p) - 1] = '\0';
+		} else
+			p = (char *)"unknown";
+		node->symbol = strdup(p);
+		if (!node->symbol) {
+			ret = -ENOMEM;
+			break;
+		}
+		pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n",
+			  node->start, node->end, node->symbol);
+		ret++;
+	}
+	if (ret < 0)
+		kprobe_blacklist__delete(blacklist);
+	fclose(fp);
+
+	return ret;
+}
+
+static struct kprobe_blacklist_node *
+kprobe_blacklist__find_by_address(struct list_head *blacklist,
+				  unsigned long address)
+{
+	struct kprobe_blacklist_node *node;
+
+	list_for_each_entry(node, blacklist, list) {
+		if (node->start <= address && address < node->end)
+			return node;
+	}
+
+	return NULL;
+}
+
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+	if (!list_empty(&kprobe_blacklist))
+		return;
+
+	if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+		pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+	kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address)
+{
+	return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+				     struct perf_probe_event *pev,
+				     const char *module,
+				     struct strbuf *result)
+{
+	int i, ret;
+	char *buf;
+
+	if (asprintf(&buf, "%s:%s", group, event) < 0)
+		return -errno;
+	ret = strbuf_addf(result, "  %-20s (on ", buf);
+	free(buf);
+	if (ret)
+		return ret;
+
+	/* Synthesize only event probe point */
+	buf = synthesize_perf_probe_point(&pev->point);
+	if (!buf)
+		return -ENOMEM;
+	ret = strbuf_addstr(result, buf);
+	free(buf);
+
+	if (!ret && module)
+		ret = strbuf_addf(result, " in %s", module);
+
+	if (!ret && pev->nargs > 0) {
+		ret = strbuf_add(result, " with", 5);
+		for (i = 0; !ret && i < pev->nargs; i++) {
+			buf = synthesize_perf_probe_arg(&pev->args[i]);
+			if (!buf)
+				return -ENOMEM;
+			ret = strbuf_addf(result, " %s", buf);
+			free(buf);
+		}
+	}
+	if (!ret)
+		ret = strbuf_addch(result, ')');
+
+	return ret;
+}
+
+/* Show an event */
+int show_perf_probe_event(const char *group, const char *event,
+			  struct perf_probe_event *pev,
+			  const char *module, bool use_stdout)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+	if (ret >= 0) {
+		if (use_stdout)
+			printf("%s\n", buf.buf);
+		else
+			pr_info("%s\n", buf.buf);
+	}
+	strbuf_release(&buf);
+
+	return ret;
+}
+
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+				     struct strfilter *filter)
+{
+	char tmp[128];
+
+	/* At first, check the event name itself */
+	if (strfilter__compare(filter, tev->event))
+		return true;
+
+	/* Next, check the combination of name and group */
+	if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+		return false;
+	return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+				    struct strfilter *filter)
+{
+	int ret = 0;
+	struct probe_trace_event tev;
+	struct perf_probe_event pev;
+	struct strlist *rawlist;
+	struct str_node *ent;
+
+	memset(&tev, 0, sizeof(tev));
+	memset(&pev, 0, sizeof(pev));
+
+	rawlist = probe_file__get_rawlist(fd);
+	if (!rawlist)
+		return -ENOMEM;
+
+	strlist__for_each_entry(ent, rawlist) {
+		ret = parse_probe_trace_command(ent->s, &tev);
+		if (ret >= 0) {
+			if (!filter_probe_trace_event(&tev, filter))
+				goto next;
+			ret = convert_to_perf_probe_event(&tev, &pev,
+								is_kprobe);
+			if (ret < 0)
+				goto next;
+			ret = show_perf_probe_event(pev.group, pev.event,
+						    &pev, tev.point.module,
+						    true);
+		}
+next:
+		clear_perf_probe_event(&pev);
+		clear_probe_trace_event(&tev);
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(rawlist);
+	/* Cleanup cached debuginfo if needed */
+	debuginfo_cache__exit();
+
+	return ret;
+}
+
+/* List up current perf-probe events */
+int show_perf_probe_events(struct strfilter *filter)
+{
+	int kp_fd, up_fd, ret;
+
+	setup_pager();
+
+	if (probe_conf.cache)
+		return probe_cache__show_all_caches(filter);
+
+	ret = init_probe_symbol_maps(false);
+	if (ret < 0)
+		return ret;
+
+	ret = probe_file__open_both(&kp_fd, &up_fd, 0);
+	if (ret < 0)
+		return ret;
+
+	if (kp_fd >= 0)
+		ret = __show_perf_probe_events(kp_fd, true, filter);
+	if (up_fd >= 0 && ret >= 0)
+		ret = __show_perf_probe_events(up_fd, false, filter);
+	if (kp_fd > 0)
+		close(kp_fd);
+	if (up_fd > 0)
+		close(up_fd);
+	exit_probe_symbol_maps();
+
+	return ret;
+}
+
+static int get_new_event_name(char *buf, size_t len, const char *base,
+			      struct strlist *namelist, bool ret_event,
+			      bool allow_suffix)
+{
+	int i, ret;
+	char *p, *nbase;
+
+	if (*base == '.')
+		base++;
+	nbase = strdup(base);
+	if (!nbase)
+		return -ENOMEM;
+
+	/* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
+	p = strpbrk(nbase, ".@");
+	if (p && p != nbase)
+		*p = '\0';
+
+	/* Try no suffix number */
+	ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
+	if (ret < 0) {
+		pr_debug("snprintf() failed: %d\n", ret);
+		goto out;
+	}
+	if (!strlist__has_entry(namelist, buf))
+		goto out;
+
+	if (!allow_suffix) {
+		pr_warning("Error: event \"%s\" already exists.\n"
+			   " Hint: Remove existing event by 'perf probe -d'\n"
+			   "       or force duplicates by 'perf probe -f'\n"
+			   "       or set 'force=yes' in BPF source.\n",
+			   buf);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	/* Try to add suffix */
+	for (i = 1; i < MAX_EVENT_INDEX; i++) {
+		ret = e_snprintf(buf, len, "%s_%d", nbase, i);
+		if (ret < 0) {
+			pr_debug("snprintf() failed: %d\n", ret);
+			goto out;
+		}
+		if (!strlist__has_entry(namelist, buf))
+			break;
+	}
+	if (i == MAX_EVENT_INDEX) {
+		pr_warning("Too many events are on the same function.\n");
+		ret = -ERANGE;
+	}
+
+out:
+	free(nbase);
+
+	/* Final validation */
+	if (ret >= 0 && !is_c_func_name(buf)) {
+		pr_warning("Internal error: \"%s\" is an invalid event name.\n",
+			   buf);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* Warn if the current kernel's uprobe implementation is old */
+static void warn_uprobe_event_compat(struct probe_trace_event *tev)
+{
+	int i;
+	char *buf = synthesize_probe_trace_command(tev);
+
+	/* Old uprobe event doesn't support memory dereference */
+	if (!tev->uprobes || tev->nargs == 0 || !buf)
+		goto out;
+
+	for (i = 0; i < tev->nargs; i++)
+		if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
+			pr_warning("Please upgrade your kernel to at least "
+				   "3.14 to have access to feature %s\n",
+				   tev->args[i].value);
+			break;
+		}
+out:
+	free(buf);
+}
+
+/* Set new name from original perf_probe_event and namelist */
+static int probe_trace_event__set_name(struct probe_trace_event *tev,
+				       struct perf_probe_event *pev,
+				       struct strlist *namelist,
+				       bool allow_suffix)
+{
+	const char *event, *group;
+	char buf[64];
+	int ret;
+
+	/* If probe_event or trace_event already have the name, reuse it */
+	if (pev->event && !pev->sdt)
+		event = pev->event;
+	else if (tev->event)
+		event = tev->event;
+	else {
+		/* Or generate new one from probe point */
+		if (pev->point.function &&
+			(strncmp(pev->point.function, "0x", 2) != 0) &&
+			!strisglob(pev->point.function))
+			event = pev->point.function;
+		else
+			event = tev->point.realname;
+	}
+	if (pev->group && !pev->sdt)
+		group = pev->group;
+	else if (tev->group)
+		group = tev->group;
+	else
+		group = PERFPROBE_GROUP;
+
+	/* Get an unused new event name */
+	ret = get_new_event_name(buf, 64, event, namelist,
+				 tev->point.retprobe, allow_suffix);
+	if (ret < 0)
+		return ret;
+
+	event = buf;
+
+	tev->event = strdup(event);
+	tev->group = strdup(group);
+	if (tev->event == NULL || tev->group == NULL)
+		return -ENOMEM;
+
+	/* Add added event name to namelist */
+	strlist__add(namelist, event);
+	return 0;
+}
+
+static int __open_probe_file_and_namelist(bool uprobe,
+					  struct strlist **namelist)
+{
+	int fd;
+
+	fd = probe_file__open(PF_FL_RW | (uprobe ? PF_FL_UPROBE : 0));
+	if (fd < 0)
+		return fd;
+
+	/* Get current event names */
+	*namelist = probe_file__get_namelist(fd);
+	if (!(*namelist)) {
+		pr_debug("Failed to get current event list.\n");
+		close(fd);
+		return -ENOMEM;
+	}
+	return fd;
+}
+
+static int __add_probe_trace_events(struct perf_probe_event *pev,
+				     struct probe_trace_event *tevs,
+				     int ntevs, bool allow_suffix)
+{
+	int i, fd[2] = {-1, -1}, up, ret;
+	struct probe_trace_event *tev = NULL;
+	struct probe_cache *cache = NULL;
+	struct strlist *namelist[2] = {NULL, NULL};
+	struct nscookie nsc;
+
+	up = pev->uprobes ? 1 : 0;
+	fd[up] = __open_probe_file_and_namelist(up, &namelist[up]);
+	if (fd[up] < 0)
+		return fd[up];
+
+	ret = 0;
+	for (i = 0; i < ntevs; i++) {
+		tev = &tevs[i];
+		up = tev->uprobes ? 1 : 0;
+		if (fd[up] == -1) {	/* Open the kprobe/uprobe_events */
+			fd[up] = __open_probe_file_and_namelist(up,
+								&namelist[up]);
+			if (fd[up] < 0)
+				goto close_out;
+		}
+		/* Skip if the symbol is out of .text or blacklisted */
+		if (!tev->point.symbol && !pev->uprobes)
+			continue;
+
+		/* Set new name for tev (and update namelist) */
+		ret = probe_trace_event__set_name(tev, pev, namelist[up],
+						  allow_suffix);
+		if (ret < 0)
+			break;
+
+		nsinfo__mountns_enter(pev->nsi, &nsc);
+		ret = probe_file__add_event(fd[up], tev);
+		nsinfo__mountns_exit(&nsc);
+		if (ret < 0)
+			break;
+
+		/*
+		 * Probes after the first probe which comes from same
+		 * user input are always allowed to add suffix, because
+		 * there might be several addresses corresponding to
+		 * one code line.
+		 */
+		allow_suffix = true;
+	}
+	if (ret == -EINVAL && pev->uprobes)
+		warn_uprobe_event_compat(tev);
+	if (ret == 0 && probe_conf.cache) {
+		cache = probe_cache__new(pev->target, pev->nsi);
+		if (!cache ||
+		    probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 ||
+		    probe_cache__commit(cache) < 0)
+			pr_warning("Failed to add event to probe cache\n");
+		probe_cache__delete(cache);
+	}
+
+close_out:
+	for (up = 0; up < 2; up++) {
+		strlist__delete(namelist[up]);
+		if (fd[up] >= 0)
+			close(fd[up]);
+	}
+	return ret;
+}
+
+static int find_probe_functions(struct map *map, char *name,
+				struct symbol **syms)
+{
+	int found = 0;
+	struct symbol *sym;
+	struct rb_node *tmp;
+	const char *norm, *ver;
+	char *buf = NULL;
+	bool cut_version = true;
+
+	if (map__load(map) < 0)
+		return 0;
+
+	/* If user gives a version, don't cut off the version from symbols */
+	if (strchr(name, '@'))
+		cut_version = false;
+
+	map__for_each_symbol(map, sym, tmp) {
+		norm = arch__normalize_symbol_name(sym->name);
+		if (!norm)
+			continue;
+
+		if (cut_version) {
+			/* We don't care about default symbol or not */
+			ver = strchr(norm, '@');
+			if (ver) {
+				buf = strndup(norm, ver - norm);
+				if (!buf)
+					return -ENOMEM;
+				norm = buf;
+			}
+		}
+
+		if (strglobmatch(norm, name)) {
+			found++;
+			if (syms && found < probe_conf.max_probes)
+				syms[found - 1] = sym;
+		}
+		if (buf)
+			zfree(&buf);
+	}
+
+	return found;
+}
+
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+				struct probe_trace_event *tev __maybe_unused,
+				struct map *map __maybe_unused,
+				struct symbol *sym __maybe_unused) { }
+
+/*
+ * Find probe function addresses from map.
+ * Return an error or the number of found probe_trace_event
+ */
+static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
+					    struct probe_trace_event **tevs)
+{
+	struct map *map = NULL;
+	struct ref_reloc_sym *reloc_sym = NULL;
+	struct symbol *sym;
+	struct symbol **syms = NULL;
+	struct probe_trace_event *tev;
+	struct perf_probe_point *pp = &pev->point;
+	struct probe_trace_point *tp;
+	int num_matched_functions;
+	int ret, i, j, skipped = 0;
+	char *mod_name;
+
+	map = get_target_map(pev->target, pev->nsi, pev->uprobes);
+	if (!map) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+	if (!syms) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Load matched symbols: Since the different local symbols may have
+	 * same name but different addresses, this lists all the symbols.
+	 */
+	num_matched_functions = find_probe_functions(map, pp->function, syms);
+	if (num_matched_functions <= 0) {
+		pr_err("Failed to find symbol %s in %s\n", pp->function,
+			pev->target ? : "kernel");
+		ret = -ENOENT;
+		goto out;
+	} else if (num_matched_functions > probe_conf.max_probes) {
+		pr_err("Too many functions matched in %s\n",
+			pev->target ? : "kernel");
+		ret = -E2BIG;
+		goto out;
+	}
+
+	/* Note that the symbols in the kmodule are not relocated */
+	if (!pev->uprobes && !pev->target &&
+			(!pp->retprobe || kretprobe_offset_is_supported())) {
+		reloc_sym = kernel_get_ref_reloc_sym();
+		if (!reloc_sym) {
+			pr_warning("Relocated base symbol is not found!\n");
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Setup result trace-probe-events */
+	*tevs = zalloc(sizeof(*tev) * num_matched_functions);
+	if (!*tevs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = 0;
+
+	for (j = 0; j < num_matched_functions; j++) {
+		sym = syms[j];
+
+		tev = (*tevs) + ret;
+		tp = &tev->point;
+		if (ret == num_matched_functions) {
+			pr_warning("Too many symbols are listed. Skip it.\n");
+			break;
+		}
+		ret++;
+
+		if (pp->offset > sym->end - sym->start) {
+			pr_warning("Offset %ld is bigger than the size of %s\n",
+				   pp->offset, sym->name);
+			ret = -ENOENT;
+			goto err_out;
+		}
+		/* Add one probe point */
+		tp->address = map->unmap_ip(map, sym->start) + pp->offset;
+
+		/* Check the kprobe (not in module) is within .text  */
+		if (!pev->uprobes && !pev->target &&
+		    kprobe_warn_out_range(sym->name, tp->address)) {
+			tp->symbol = NULL;	/* Skip it */
+			skipped++;
+		} else if (reloc_sym) {
+			tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
+			tp->offset = tp->address - reloc_sym->addr;
+		} else {
+			tp->symbol = strdup_or_goto(sym->name, nomem_out);
+			tp->offset = pp->offset;
+		}
+		tp->realname = strdup_or_goto(sym->name, nomem_out);
+
+		tp->retprobe = pp->retprobe;
+		if (pev->target) {
+			if (pev->uprobes) {
+				tev->point.module = strdup_or_goto(pev->target,
+								   nomem_out);
+			} else {
+				mod_name = find_module_name(pev->target);
+				tev->point.module =
+					strdup(mod_name ? mod_name : pev->target);
+				free(mod_name);
+				if (!tev->point.module)
+					goto nomem_out;
+			}
+		}
+		tev->uprobes = pev->uprobes;
+		tev->nargs = pev->nargs;
+		if (tev->nargs) {
+			tev->args = zalloc(sizeof(struct probe_trace_arg) *
+					   tev->nargs);
+			if (tev->args == NULL)
+				goto nomem_out;
+		}
+		for (i = 0; i < tev->nargs; i++) {
+			if (pev->args[i].name)
+				tev->args[i].name =
+					strdup_or_goto(pev->args[i].name,
+							nomem_out);
+
+			tev->args[i].value = strdup_or_goto(pev->args[i].var,
+							    nomem_out);
+			if (pev->args[i].type)
+				tev->args[i].type =
+					strdup_or_goto(pev->args[i].type,
+							nomem_out);
+		}
+		arch__fix_tev_from_maps(pev, tev, map, sym);
+	}
+	if (ret == skipped) {
+		ret = -ENOENT;
+		goto err_out;
+	}
+
+out:
+	map__put(map);
+	free(syms);
+	return ret;
+
+nomem_out:
+	ret = -ENOMEM;
+err_out:
+	clear_probe_trace_events(*tevs, num_matched_functions);
+	zfree(tevs);
+	goto out;
+}
+
+static int try_to_find_absolute_address(struct perf_probe_event *pev,
+					struct probe_trace_event **tevs)
+{
+	struct perf_probe_point *pp = &pev->point;
+	struct probe_trace_event *tev;
+	struct probe_trace_point *tp;
+	int i, err;
+
+	if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
+		return -EINVAL;
+	if (perf_probe_event_need_dwarf(pev))
+		return -EINVAL;
+
+	/*
+	 * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
+	 * absolute address.
+	 *
+	 * Only one tev can be generated by this.
+	 */
+	*tevs = zalloc(sizeof(*tev));
+	if (!*tevs)
+		return -ENOMEM;
+
+	tev = *tevs;
+	tp = &tev->point;
+
+	/*
+	 * Don't use tp->offset, use address directly, because
+	 * in synthesize_probe_trace_command() address cannot be
+	 * zero.
+	 */
+	tp->address = pev->point.abs_address;
+	tp->retprobe = pp->retprobe;
+	tev->uprobes = pev->uprobes;
+
+	err = -ENOMEM;
+	/*
+	 * Give it a '0x' leading symbol name.
+	 * In __add_probe_trace_events, a NULL symbol is interpreted as
+	 * invalud.
+	 */
+	if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0)
+		goto errout;
+
+	/* For kprobe, check range */
+	if ((!tev->uprobes) &&
+	    (kprobe_warn_out_range(tev->point.symbol,
+				   tev->point.address))) {
+		err = -EACCES;
+		goto errout;
+	}
+
+	if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0)
+		goto errout;
+
+	if (pev->target) {
+		tp->module = strdup(pev->target);
+		if (!tp->module)
+			goto errout;
+	}
+
+	if (tev->group) {
+		tev->group = strdup(pev->group);
+		if (!tev->group)
+			goto errout;
+	}
+
+	if (pev->event) {
+		tev->event = strdup(pev->event);
+		if (!tev->event)
+			goto errout;
+	}
+
+	tev->nargs = pev->nargs;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (!tev->args)
+		goto errout;
+
+	for (i = 0; i < tev->nargs; i++)
+		copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
+
+	return 1;
+
+errout:
+	clear_probe_trace_events(*tevs, 1);
+	*tevs = NULL;
+	return err;
+}
+
+/* Concatinate two arrays */
+static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
+{
+	void *ret;
+
+	ret = malloc(sz_a + sz_b);
+	if (ret) {
+		memcpy(ret, a, sz_a);
+		memcpy(ret + sz_a, b, sz_b);
+	}
+	return ret;
+}
+
+static int
+concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs,
+			  struct probe_trace_event **tevs2, int ntevs2)
+{
+	struct probe_trace_event *new_tevs;
+	int ret = 0;
+
+	if (*ntevs == 0) {
+		*tevs = *tevs2;
+		*ntevs = ntevs2;
+		*tevs2 = NULL;
+		return 0;
+	}
+
+	if (*ntevs + ntevs2 > probe_conf.max_probes)
+		ret = -E2BIG;
+	else {
+		/* Concatinate the array of probe_trace_event */
+		new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
+				  *tevs2, ntevs2 * sizeof(**tevs2));
+		if (!new_tevs)
+			ret = -ENOMEM;
+		else {
+			free(*tevs);
+			*tevs = new_tevs;
+			*ntevs += ntevs2;
+		}
+	}
+	if (ret < 0)
+		clear_probe_trace_events(*tevs2, ntevs2);
+	zfree(tevs2);
+
+	return ret;
+}
+
+/*
+ * Try to find probe_trace_event from given probe caches. Return the number
+ * of cached events found, if an error occurs return the error.
+ */
+static int find_cached_events(struct perf_probe_event *pev,
+			      struct probe_trace_event **tevs,
+			      const char *target)
+{
+	struct probe_cache *cache;
+	struct probe_cache_entry *entry;
+	struct probe_trace_event *tmp_tevs = NULL;
+	int ntevs = 0;
+	int ret = 0;
+
+	cache = probe_cache__new(target, pev->nsi);
+	/* Return 0 ("not found") if the target has no probe cache. */
+	if (!cache)
+		return 0;
+
+	for_each_probe_cache_entry(entry, cache) {
+		/* Skip the cache entry which has no name */
+		if (!entry->pev.event || !entry->pev.group)
+			continue;
+		if ((!pev->group || strglobmatch(entry->pev.group, pev->group)) &&
+		    strglobmatch(entry->pev.event, pev->event)) {
+			ret = probe_cache_entry__get_event(entry, &tmp_tevs);
+			if (ret > 0)
+				ret = concat_probe_trace_events(tevs, &ntevs,
+								&tmp_tevs, ret);
+			if (ret < 0)
+				break;
+		}
+	}
+	probe_cache__delete(cache);
+	if (ret < 0) {
+		clear_probe_trace_events(*tevs, ntevs);
+		zfree(tevs);
+	} else {
+		ret = ntevs;
+		if (ntevs > 0 && target && target[0] == '/')
+			pev->uprobes = true;
+	}
+
+	return ret;
+}
+
+/* Try to find probe_trace_event from all probe caches */
+static int find_cached_events_all(struct perf_probe_event *pev,
+				   struct probe_trace_event **tevs)
+{
+	struct probe_trace_event *tmp_tevs = NULL;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *pathname;
+	int ntevs = 0;
+	int ret;
+
+	/* Get the buildid list of all valid caches */
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		ret = -errno;
+		pr_debug("Failed to get buildids: %d\n", ret);
+		return ret;
+	}
+
+	ret = 0;
+	strlist__for_each_entry(nd, bidlist) {
+		pathname = build_id_cache__origname(nd->s);
+		ret = find_cached_events(pev, &tmp_tevs, pathname);
+		/* In the case of cnt == 0, we just skip it */
+		if (ret > 0)
+			ret = concat_probe_trace_events(tevs, &ntevs,
+							&tmp_tevs, ret);
+		free(pathname);
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(bidlist);
+
+	if (ret < 0) {
+		clear_probe_trace_events(*tevs, ntevs);
+		zfree(tevs);
+	} else
+		ret = ntevs;
+
+	return ret;
+}
+
+static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
+					      struct probe_trace_event **tevs)
+{
+	struct probe_cache *cache;
+	struct probe_cache_entry *entry;
+	struct probe_trace_event *tev;
+	struct str_node *node;
+	int ret, i;
+
+	if (pev->sdt) {
+		/* For SDT/cached events, we use special search functions */
+		if (!pev->target)
+			return find_cached_events_all(pev, tevs);
+		else
+			return find_cached_events(pev, tevs, pev->target);
+	}
+	cache = probe_cache__new(pev->target, pev->nsi);
+	if (!cache)
+		return 0;
+
+	entry = probe_cache__find(cache, pev);
+	if (!entry) {
+		/* SDT must be in the cache */
+		ret = pev->sdt ? -ENOENT : 0;
+		goto out;
+	}
+
+	ret = strlist__nr_entries(entry->tevlist);
+	if (ret > probe_conf.max_probes) {
+		pr_debug("Too many entries matched in the cache of %s\n",
+			 pev->target ? : "kernel");
+		ret = -E2BIG;
+		goto out;
+	}
+
+	*tevs = zalloc(ret * sizeof(*tev));
+	if (!*tevs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	i = 0;
+	strlist__for_each_entry(node, entry->tevlist) {
+		tev = &(*tevs)[i++];
+		ret = parse_probe_trace_command(node->s, tev);
+		if (ret < 0)
+			goto out;
+		/* Set the uprobes attribute as same as original */
+		tev->uprobes = pev->uprobes;
+	}
+	ret = i;
+
+out:
+	probe_cache__delete(cache);
+	return ret;
+}
+
+static int convert_to_probe_trace_events(struct perf_probe_event *pev,
+					 struct probe_trace_event **tevs)
+{
+	int ret;
+
+	if (!pev->group && !pev->sdt) {
+		/* Set group name if not given */
+		if (!pev->uprobes) {
+			pev->group = strdup(PERFPROBE_GROUP);
+			ret = pev->group ? 0 : -ENOMEM;
+		} else
+			ret = convert_exec_to_group(pev->target, &pev->group);
+		if (ret != 0) {
+			pr_warning("Failed to make a group name.\n");
+			return ret;
+		}
+	}
+
+	ret = try_to_find_absolute_address(pev, tevs);
+	if (ret > 0)
+		return ret;
+
+	/* At first, we need to lookup cache entry */
+	ret = find_probe_trace_events_from_cache(pev, tevs);
+	if (ret > 0 || pev->sdt)	/* SDT can be found only in the cache */
+		return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
+
+	/* Convert perf_probe_event with debuginfo */
+	ret = try_to_find_probe_trace_events(pev, tevs);
+	if (ret != 0)
+		return ret;	/* Found in debuginfo or got an error */
+
+	return find_probe_trace_events_from_map(pev, tevs);
+}
+
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int i, ret;
+
+	/* Loop 1: convert all events */
+	for (i = 0; i < npevs; i++) {
+		/* Init kprobe blacklist if needed */
+		if (!pevs[i].uprobes)
+			kprobe_blacklist__init();
+		/* Convert with or without debuginfo */
+		ret  = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs);
+		if (ret < 0)
+			return ret;
+		pevs[i].ntevs = ret;
+	}
+	/* This just release blacklist only if allocated */
+	kprobe_blacklist__release();
+
+	return 0;
+}
+
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+	char *buf = synthesize_probe_trace_command(tev);
+
+	if (!buf) {
+		pr_debug("Failed to synthesize probe trace event.\n");
+		return -EINVAL;
+	}
+
+	/* Showing definition always go stdout */
+	printf("%s\n", buf);
+	free(buf);
+
+	return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+	struct strlist *namelist = strlist__new(NULL, NULL);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	int i, j, ret = 0;
+
+	if (!namelist)
+		return -ENOMEM;
+
+	for (j = 0; j < npevs && !ret; j++) {
+		pev = &pevs[j];
+		for (i = 0; i < pev->ntevs && !ret; i++) {
+			tev = &pev->tevs[i];
+			/* Skip if the symbol is out of .text or blacklisted */
+			if (!tev->point.symbol && !pev->uprobes)
+				continue;
+
+			/* Set new name for tev (and update namelist) */
+			ret = probe_trace_event__set_name(tev, pev,
+							  namelist, true);
+			if (!ret)
+				ret = show_probe_trace_event(tev);
+		}
+	}
+	strlist__delete(namelist);
+
+	return ret;
+}
+
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int i, ret = 0;
+
+	/* Loop 2: add all events */
+	for (i = 0; i < npevs; i++) {
+		ret = __add_probe_trace_events(&pevs[i], pevs[i].tevs,
+					       pevs[i].ntevs,
+					       probe_conf.force_add);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int i, j;
+	struct perf_probe_event *pev;
+
+	/* Loop 3: cleanup and free trace events  */
+	for (i = 0; i < npevs; i++) {
+		pev = &pevs[i];
+		for (j = 0; j < pevs[i].ntevs; j++)
+			clear_probe_trace_event(&pevs[i].tevs[j]);
+		zfree(&pevs[i].tevs);
+		pevs[i].ntevs = 0;
+		nsinfo__zput(pev->nsi);
+		clear_perf_probe_event(&pevs[i]);
+	}
+}
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+	int ret;
+
+	ret = init_probe_symbol_maps(pevs->uprobes);
+	if (ret < 0)
+		return ret;
+
+	ret = convert_perf_probe_events(pevs, npevs);
+	if (ret == 0)
+		ret = apply_perf_probe_events(pevs, npevs);
+
+	cleanup_perf_probe_events(pevs, npevs);
+
+	exit_probe_symbol_maps();
+	return ret;
+}
+
+int del_perf_probe_events(struct strfilter *filter)
+{
+	int ret, ret2, ufd = -1, kfd = -1;
+	char *str = strfilter__string(filter);
+
+	if (!str)
+		return -EINVAL;
+
+	/* Get current event names */
+	ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+	if (ret < 0)
+		goto out;
+
+	ret = probe_file__del_events(kfd, filter);
+	if (ret < 0 && ret != -ENOENT)
+		goto error;
+
+	ret2 = probe_file__del_events(ufd, filter);
+	if (ret2 < 0 && ret2 != -ENOENT) {
+		ret = ret2;
+		goto error;
+	}
+	ret = 0;
+
+error:
+	if (kfd >= 0)
+		close(kfd);
+	if (ufd >= 0)
+		close(ufd);
+out:
+	free(str);
+
+	return ret;
+}
+
+int show_available_funcs(const char *target, struct nsinfo *nsi,
+			 struct strfilter *_filter, bool user)
+{
+        struct rb_node *nd;
+	struct map *map;
+	int ret;
+
+	ret = init_probe_symbol_maps(user);
+	if (ret < 0)
+		return ret;
+
+	/* Get a symbol map */
+	map = get_target_map(target, nsi, user);
+	if (!map) {
+		pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
+		return -EINVAL;
+	}
+
+	ret = map__load(map);
+	if (ret) {
+		if (ret == -2) {
+			char *str = strfilter__string(_filter);
+			pr_err("Failed to find symbols matched to \"%s\"\n",
+			       str);
+			free(str);
+		} else
+			pr_err("Failed to load symbols in %s\n",
+			       (target) ? : "kernel");
+		goto end;
+	}
+	if (!dso__sorted_by_name(map->dso, map->type))
+		dso__sort_by_name(map->dso, map->type);
+
+	/* Show all (filtered) symbols */
+	setup_pager();
+
+        for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+		struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+
+		if (strfilter__compare(_filter, pos->sym.name))
+			printf("%s\n", pos->sym.name);
+        }
+
+end:
+	map__put(map);
+	exit_probe_symbol_maps();
+
+	return ret;
+}
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+			    struct perf_probe_arg *pvar)
+{
+	tvar->value = strdup(pvar->var);
+	if (tvar->value == NULL)
+		return -ENOMEM;
+	if (pvar->type) {
+		tvar->type = strdup(pvar->type);
+		if (tvar->type == NULL)
+			return -ENOMEM;
+	}
+	if (pvar->name) {
+		tvar->name = strdup(pvar->name);
+		if (tvar->name == NULL)
+			return -ENOMEM;
+	} else
+		tvar->name = NULL;
+	return 0;
+}
diff --git a/util/probe-event.h b/util/probe-event.h
new file mode 100644
index 0000000..45b14f0
--- /dev/null
+++ b/util/probe-event.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_EVENT_H
+#define _PROBE_EVENT_H
+
+#include <linux/compiler.h>
+#include <stdbool.h>
+#include "intlist.h"
+#include "namespaces.h"
+
+/* Probe related configurations */
+struct probe_conf {
+	bool	show_ext_vars;
+	bool	show_location_range;
+	bool	force_add;
+	bool	no_inlines;
+	bool	cache;
+	int	max_probes;
+};
+extern struct probe_conf probe_conf;
+extern bool probe_event_dry_run;
+
+struct symbol;
+
+/* kprobe-tracer and uprobe-tracer tracing point */
+struct probe_trace_point {
+	char		*realname;	/* function real name (if needed) */
+	char		*symbol;	/* Base symbol */
+	char		*module;	/* Module name */
+	unsigned long	offset;		/* Offset from symbol */
+	unsigned long	address;	/* Actual address of the trace point */
+	bool		retprobe;	/* Return probe flag */
+};
+
+/* probe-tracer tracing argument referencing offset */
+struct probe_trace_arg_ref {
+	struct probe_trace_arg_ref	*next;	/* Next reference */
+	long				offset;	/* Offset value */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing argument */
+struct probe_trace_arg {
+	char				*name;	/* Argument name */
+	char				*value;	/* Base value */
+	char				*type;	/* Type name */
+	struct probe_trace_arg_ref	*ref;	/* Referencing offset */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */
+struct probe_trace_event {
+	char				*event;	/* Event name */
+	char				*group;	/* Group name */
+	struct probe_trace_point	point;	/* Trace point */
+	int				nargs;	/* Number of args */
+	bool				uprobes;	/* uprobes only */
+	struct probe_trace_arg		*args;	/* Arguments */
+};
+
+/* Perf probe probing point */
+struct perf_probe_point {
+	char		*file;		/* File path */
+	char		*function;	/* Function name */
+	int		line;		/* Line number */
+	bool		retprobe;	/* Return probe flag */
+	char		*lazy_line;	/* Lazy matching pattern */
+	unsigned long	offset;		/* Offset from function entry */
+	unsigned long	abs_address;	/* Absolute address of the point */
+};
+
+/* Perf probe probing argument field chain */
+struct perf_probe_arg_field {
+	struct perf_probe_arg_field	*next;	/* Next field */
+	char				*name;	/* Name of the field */
+	long				index;	/* Array index number */
+	bool				ref;	/* Referencing flag */
+};
+
+/* Perf probe probing argument */
+struct perf_probe_arg {
+	char				*name;	/* Argument name */
+	char				*var;	/* Variable name */
+	char				*type;	/* Type name */
+	struct perf_probe_arg_field	*field;	/* Structure fields */
+};
+
+/* Perf probe probing event (point + arg) */
+struct perf_probe_event {
+	char			*event;	/* Event name */
+	char			*group;	/* Group name */
+	struct perf_probe_point	point;	/* Probe point */
+	int			nargs;	/* Number of arguments */
+	bool			sdt;	/* SDT/cached event flag */
+	bool			uprobes;	/* Uprobe event flag */
+	char			*target;	/* Target binary */
+	struct perf_probe_arg	*args;	/* Arguments */
+	struct probe_trace_event *tevs;
+	int			ntevs;
+	struct nsinfo		*nsi;	/* Target namespace */
+};
+
+/* Line range */
+struct line_range {
+	char			*file;		/* File name */
+	char			*function;	/* Function name */
+	int			start;		/* Start line number */
+	int			end;		/* End line number */
+	int			offset;		/* Start line offset */
+	char			*path;		/* Real path name */
+	char			*comp_dir;	/* Compile directory */
+	struct intlist		*line_list;	/* Visible lines */
+};
+
+struct strlist;
+
+/* List of variables */
+struct variable_list {
+	struct probe_trace_point	point;	/* Actual probepoint */
+	struct strlist			*vars;	/* Available variables */
+};
+
+struct map;
+int init_probe_symbol_maps(bool user_only);
+void exit_probe_symbol_maps(void);
+
+/* Command string to events */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
+
+/* Events to command string */
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
+char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+			   struct perf_probe_event *src);
+
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
+/* Check the perf_probe_event needs debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+
+/* Release event contents */
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
+
+/* Command string to line-range */
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
+
+/* Release line range members */
+void line_range__clear(struct line_range *lr);
+
+/* Initialize line range */
+int line_range__init(struct line_range *lr);
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+
+struct strfilter;
+
+int del_perf_probe_events(struct strfilter *filter);
+
+int show_perf_probe_event(const char *group, const char *event,
+			  struct perf_probe_event *pev,
+			  const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module,
+		    struct nsinfo *nsi, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+			struct strfilter *filter);
+int show_available_funcs(const char *module, struct nsinfo *nsi,
+			 struct strfilter *filter, bool user);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+			     struct probe_trace_event *tev, struct map *map,
+			     struct symbol *sym);
+
+/* If there is no space to write, returns -E2BIG. */
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
+
+/* Maximum index number of event-name postfix */
+#define MAX_EVENT_INDEX	1024
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+			    struct perf_probe_arg *pvar);
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs);
+
+#endif /*_PROBE_EVENT_H */
diff --git a/util/probe-file.c b/util/probe-file.c
new file mode 100644
index 0000000..4ae1123
--- /dev/null
+++ b/util/probe-file.c
@@ -0,0 +1,1067 @@
+/*
+ * probe-file.c : operate ftrace k/uprobe events files
+ *
+ * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "util.h"
+#include "event.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "cache.h"
+#include "color.h"
+#include "symbol.h"
+#include "thread.h"
+#include <api/fs/tracing_path.h>
+#include "probe-event.h"
+#include "probe-file.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "string2.h"
+
+/* 4096 - 2 ('\n' + '\0') */
+#define MAX_CMDLEN 4094
+
+static void print_open_warning(int err, bool uprobe)
+{
+	char sbuf[STRERR_BUFSIZE];
+
+	if (err == -ENOENT) {
+		const char *config;
+
+		if (uprobe)
+			config = "CONFIG_UPROBE_EVENTS";
+		else
+			config = "CONFIG_KPROBE_EVENTS";
+
+		pr_warning("%cprobe_events file does not exist"
+			   " - please rebuild kernel with %s.\n",
+			   uprobe ? 'u' : 'k', config);
+	} else if (err == -ENOTSUP)
+		pr_warning("Tracefs or debugfs is not mounted.\n");
+	else
+		pr_warning("Failed to open %cprobe_events: %s\n",
+			   uprobe ? 'u' : 'k',
+			   str_error_r(-err, sbuf, sizeof(sbuf)));
+}
+
+static void print_both_open_warning(int kerr, int uerr)
+{
+	/* Both kprobes and uprobes are disabled, warn it. */
+	if (kerr == -ENOTSUP && uerr == -ENOTSUP)
+		pr_warning("Tracefs or debugfs is not mounted.\n");
+	else if (kerr == -ENOENT && uerr == -ENOENT)
+		pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
+			   "or/and CONFIG_UPROBE_EVENTS.\n");
+	else {
+		char sbuf[STRERR_BUFSIZE];
+		pr_warning("Failed to open kprobe events: %s.\n",
+			   str_error_r(-kerr, sbuf, sizeof(sbuf)));
+		pr_warning("Failed to open uprobe events: %s.\n",
+			   str_error_r(-uerr, sbuf, sizeof(sbuf)));
+	}
+}
+
+int open_trace_file(const char *trace_file, bool readwrite)
+{
+	char buf[PATH_MAX];
+	int ret;
+
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s",
+			 tracing_path, trace_file);
+	if (ret >= 0) {
+		pr_debug("Opening %s write=%d\n", buf, readwrite);
+		if (readwrite && !probe_event_dry_run)
+			ret = open(buf, O_RDWR | O_APPEND, 0);
+		else
+			ret = open(buf, O_RDONLY, 0);
+
+		if (ret < 0)
+			ret = -errno;
+	}
+	return ret;
+}
+
+static int open_kprobe_events(bool readwrite)
+{
+	return open_trace_file("kprobe_events", readwrite);
+}
+
+static int open_uprobe_events(bool readwrite)
+{
+	return open_trace_file("uprobe_events", readwrite);
+}
+
+int probe_file__open(int flag)
+{
+	int fd;
+
+	if (flag & PF_FL_UPROBE)
+		fd = open_uprobe_events(flag & PF_FL_RW);
+	else
+		fd = open_kprobe_events(flag & PF_FL_RW);
+	if (fd < 0)
+		print_open_warning(fd, flag & PF_FL_UPROBE);
+
+	return fd;
+}
+
+int probe_file__open_both(int *kfd, int *ufd, int flag)
+{
+	if (!kfd || !ufd)
+		return -EINVAL;
+
+	*kfd = open_kprobe_events(flag & PF_FL_RW);
+	*ufd = open_uprobe_events(flag & PF_FL_RW);
+	if (*kfd < 0 && *ufd < 0) {
+		print_both_open_warning(*kfd, *ufd);
+		return *kfd;
+	}
+
+	return 0;
+}
+
+/* Get raw string list of current kprobe_events  or uprobe_events */
+struct strlist *probe_file__get_rawlist(int fd)
+{
+	int ret, idx, fddup;
+	FILE *fp;
+	char buf[MAX_CMDLEN];
+	char *p;
+	struct strlist *sl;
+
+	if (fd < 0)
+		return NULL;
+
+	sl = strlist__new(NULL, NULL);
+	if (sl == NULL)
+		return NULL;
+
+	fddup = dup(fd);
+	if (fddup < 0)
+		goto out_free_sl;
+
+	fp = fdopen(fddup, "r");
+	if (!fp)
+		goto out_close_fddup;
+
+	while (!feof(fp)) {
+		p = fgets(buf, MAX_CMDLEN, fp);
+		if (!p)
+			break;
+
+		idx = strlen(p) - 1;
+		if (p[idx] == '\n')
+			p[idx] = '\0';
+		ret = strlist__add(sl, buf);
+		if (ret < 0) {
+			pr_debug("strlist__add failed (%d)\n", ret);
+			goto out_close_fp;
+		}
+	}
+	fclose(fp);
+
+	return sl;
+
+out_close_fp:
+	fclose(fp);
+	goto out_free_sl;
+out_close_fddup:
+	close(fddup);
+out_free_sl:
+	strlist__delete(sl);
+	return NULL;
+}
+
+static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
+{
+	char buf[128];
+	struct strlist *sl, *rawlist;
+	struct str_node *ent;
+	struct probe_trace_event tev;
+	int ret = 0;
+
+	memset(&tev, 0, sizeof(tev));
+	rawlist = probe_file__get_rawlist(fd);
+	if (!rawlist)
+		return NULL;
+	sl = strlist__new(NULL, NULL);
+	strlist__for_each_entry(ent, rawlist) {
+		ret = parse_probe_trace_command(ent->s, &tev);
+		if (ret < 0)
+			break;
+		if (include_group) {
+			ret = e_snprintf(buf, 128, "%s:%s", tev.group,
+					tev.event);
+			if (ret >= 0)
+				ret = strlist__add(sl, buf);
+		} else
+			ret = strlist__add(sl, tev.event);
+		clear_probe_trace_event(&tev);
+		if (ret < 0)
+			break;
+	}
+	strlist__delete(rawlist);
+
+	if (ret < 0) {
+		strlist__delete(sl);
+		return NULL;
+	}
+	return sl;
+}
+
+/* Get current perf-probe event names */
+struct strlist *probe_file__get_namelist(int fd)
+{
+	return __probe_file__get_namelist(fd, false);
+}
+
+int probe_file__add_event(int fd, struct probe_trace_event *tev)
+{
+	int ret = 0;
+	char *buf = synthesize_probe_trace_command(tev);
+	char sbuf[STRERR_BUFSIZE];
+
+	if (!buf) {
+		pr_debug("Failed to synthesize probe trace event.\n");
+		return -EINVAL;
+	}
+
+	pr_debug("Writing event: %s\n", buf);
+	if (!probe_event_dry_run) {
+		if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
+			ret = -errno;
+			pr_warning("Failed to write event: %s\n",
+				   str_error_r(errno, sbuf, sizeof(sbuf)));
+		}
+	}
+	free(buf);
+
+	return ret;
+}
+
+static int __del_trace_probe_event(int fd, struct str_node *ent)
+{
+	char *p;
+	char buf[128];
+	int ret;
+
+	/* Convert from perf-probe event to trace-probe event */
+	ret = e_snprintf(buf, 128, "-:%s", ent->s);
+	if (ret < 0)
+		goto error;
+
+	p = strchr(buf + 2, ':');
+	if (!p) {
+		pr_debug("Internal error: %s should have ':' but not.\n",
+			 ent->s);
+		ret = -ENOTSUP;
+		goto error;
+	}
+	*p = '/';
+
+	pr_debug("Writing event: %s\n", buf);
+	ret = write(fd, buf, strlen(buf));
+	if (ret < 0) {
+		ret = -errno;
+		goto error;
+	}
+
+	return 0;
+error:
+	pr_warning("Failed to delete event: %s\n",
+		   str_error_r(-ret, buf, sizeof(buf)));
+	return ret;
+}
+
+int probe_file__get_events(int fd, struct strfilter *filter,
+			   struct strlist *plist)
+{
+	struct strlist *namelist;
+	struct str_node *ent;
+	const char *p;
+	int ret = -ENOENT;
+
+	if (!plist)
+		return -EINVAL;
+
+	namelist = __probe_file__get_namelist(fd, true);
+	if (!namelist)
+		return -ENOENT;
+
+	strlist__for_each_entry(ent, namelist) {
+		p = strchr(ent->s, ':');
+		if ((p && strfilter__compare(filter, p + 1)) ||
+		    strfilter__compare(filter, ent->s)) {
+			strlist__add(plist, ent->s);
+			ret = 0;
+		}
+	}
+	strlist__delete(namelist);
+
+	return ret;
+}
+
+int probe_file__del_strlist(int fd, struct strlist *namelist)
+{
+	int ret = 0;
+	struct str_node *ent;
+
+	strlist__for_each_entry(ent, namelist) {
+		ret = __del_trace_probe_event(fd, ent);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+int probe_file__del_events(int fd, struct strfilter *filter)
+{
+	struct strlist *namelist;
+	int ret;
+
+	namelist = strlist__new(NULL, NULL);
+	if (!namelist)
+		return -ENOMEM;
+
+	ret = probe_file__get_events(fd, filter, namelist);
+	if (ret < 0)
+		return ret;
+
+	ret = probe_file__del_strlist(fd, namelist);
+	strlist__delete(namelist);
+
+	return ret;
+}
+
+/* Caller must ensure to remove this entry from list */
+static void probe_cache_entry__delete(struct probe_cache_entry *entry)
+{
+	if (entry) {
+		BUG_ON(!list_empty(&entry->node));
+
+		strlist__delete(entry->tevlist);
+		clear_perf_probe_event(&entry->pev);
+		zfree(&entry->spev);
+		free(entry);
+	}
+}
+
+static struct probe_cache_entry *
+probe_cache_entry__new(struct perf_probe_event *pev)
+{
+	struct probe_cache_entry *entry = zalloc(sizeof(*entry));
+
+	if (entry) {
+		INIT_LIST_HEAD(&entry->node);
+		entry->tevlist = strlist__new(NULL, NULL);
+		if (!entry->tevlist)
+			zfree(&entry);
+		else if (pev) {
+			entry->spev = synthesize_perf_probe_command(pev);
+			if (!entry->spev ||
+			    perf_probe_event__copy(&entry->pev, pev) < 0) {
+				probe_cache_entry__delete(entry);
+				return NULL;
+			}
+		}
+	}
+
+	return entry;
+}
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+				 struct probe_trace_event **tevs)
+{
+	struct probe_trace_event *tev;
+	struct str_node *node;
+	int ret, i;
+
+	ret = strlist__nr_entries(entry->tevlist);
+	if (ret > probe_conf.max_probes)
+		return -E2BIG;
+
+	*tevs = zalloc(ret * sizeof(*tev));
+	if (!*tevs)
+		return -ENOMEM;
+
+	i = 0;
+	strlist__for_each_entry(node, entry->tevlist) {
+		tev = &(*tevs)[i++];
+		ret = parse_probe_trace_command(node->s, tev);
+		if (ret < 0)
+			break;
+	}
+	return i;
+}
+
+/* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */
+static int probe_cache__open(struct probe_cache *pcache, const char *target,
+			     struct nsinfo *nsi)
+{
+	char cpath[PATH_MAX];
+	char sbuildid[SBUILD_ID_SIZE];
+	char *dir_name = NULL;
+	bool is_kallsyms = false;
+	int ret, fd;
+	struct nscookie nsc;
+
+	if (target && build_id_cache__cached(target)) {
+		/* This is a cached buildid */
+		strncpy(sbuildid, target, SBUILD_ID_SIZE);
+		dir_name = build_id_cache__linkname(sbuildid, NULL, 0);
+		goto found;
+	}
+
+	if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) {
+		target = DSO__NAME_KALLSYMS;
+		is_kallsyms = true;
+		ret = sysfs__sprintf_build_id("/", sbuildid);
+	} else {
+		nsinfo__mountns_enter(nsi, &nsc);
+		ret = filename__sprintf_build_id(target, sbuildid);
+		nsinfo__mountns_exit(&nsc);
+	}
+
+	if (ret < 0) {
+		pr_debug("Failed to get build-id from %s.\n", target);
+		return ret;
+	}
+
+	/* If we have no buildid cache, make it */
+	if (!build_id_cache__cached(sbuildid)) {
+		ret = build_id_cache__add_s(sbuildid, target, nsi,
+					    is_kallsyms, NULL);
+		if (ret < 0) {
+			pr_debug("Failed to add build-id cache: %s\n", target);
+			return ret;
+		}
+	}
+
+	dir_name = build_id_cache__cachedir(sbuildid, target, nsi, is_kallsyms,
+					    false);
+found:
+	if (!dir_name) {
+		pr_debug("Failed to get cache from %s\n", target);
+		return -ENOMEM;
+	}
+
+	snprintf(cpath, PATH_MAX, "%s/probes", dir_name);
+	fd = open(cpath, O_CREAT | O_RDWR, 0644);
+	if (fd < 0)
+		pr_debug("Failed to open cache(%d): %s\n", fd, cpath);
+	free(dir_name);
+	pcache->fd = fd;
+
+	return fd;
+}
+
+static int probe_cache__load(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry = NULL;
+	char buf[MAX_CMDLEN], *p;
+	int ret = 0, fddup;
+	FILE *fp;
+
+	fddup = dup(pcache->fd);
+	if (fddup < 0)
+		return -errno;
+	fp = fdopen(fddup, "r");
+	if (!fp) {
+		close(fddup);
+		return -EINVAL;
+	}
+
+	while (!feof(fp)) {
+		if (!fgets(buf, MAX_CMDLEN, fp))
+			break;
+		p = strchr(buf, '\n');
+		if (p)
+			*p = '\0';
+		/* #perf_probe_event or %sdt_event */
+		if (buf[0] == '#' || buf[0] == '%') {
+			entry = probe_cache_entry__new(NULL);
+			if (!entry) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			if (buf[0] == '%')
+				entry->sdt = true;
+			entry->spev = strdup(buf + 1);
+			if (entry->spev)
+				ret = parse_perf_probe_command(buf + 1,
+								&entry->pev);
+			else
+				ret = -ENOMEM;
+			if (ret < 0) {
+				probe_cache_entry__delete(entry);
+				goto out;
+			}
+			list_add_tail(&entry->node, &pcache->entries);
+		} else {	/* trace_probe_event */
+			if (!entry) {
+				ret = -EINVAL;
+				goto out;
+			}
+			strlist__add(entry->tevlist, buf);
+		}
+	}
+out:
+	fclose(fp);
+	return ret;
+}
+
+static struct probe_cache *probe_cache__alloc(void)
+{
+	struct probe_cache *pcache = zalloc(sizeof(*pcache));
+
+	if (pcache) {
+		INIT_LIST_HEAD(&pcache->entries);
+		pcache->fd = -EINVAL;
+	}
+	return pcache;
+}
+
+void probe_cache__purge(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry, *n;
+
+	list_for_each_entry_safe(entry, n, &pcache->entries, node) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+}
+
+void probe_cache__delete(struct probe_cache *pcache)
+{
+	if (!pcache)
+		return;
+
+	probe_cache__purge(pcache);
+	if (pcache->fd > 0)
+		close(pcache->fd);
+	free(pcache);
+}
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi)
+{
+	struct probe_cache *pcache = probe_cache__alloc();
+	int ret;
+
+	if (!pcache)
+		return NULL;
+
+	ret = probe_cache__open(pcache, target, nsi);
+	if (ret < 0) {
+		pr_debug("Cache open error: %d\n", ret);
+		goto out_err;
+	}
+
+	ret = probe_cache__load(pcache);
+	if (ret < 0) {
+		pr_debug("Cache read error: %d\n", ret);
+		goto out_err;
+	}
+
+	return pcache;
+
+out_err:
+	probe_cache__delete(pcache);
+	return NULL;
+}
+
+static bool streql(const char *a, const char *b)
+{
+	if (a == b)
+		return true;
+
+	if (!a || !b)
+		return false;
+
+	return !strcmp(a, b);
+}
+
+struct probe_cache_entry *
+probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev)
+{
+	struct probe_cache_entry *entry = NULL;
+	char *cmd = synthesize_perf_probe_command(pev);
+
+	if (!cmd)
+		return NULL;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		if (pev->sdt) {
+			if (entry->pev.event &&
+			    streql(entry->pev.event, pev->event) &&
+			    (!pev->group ||
+			     streql(entry->pev.group, pev->group)))
+				goto found;
+
+			continue;
+		}
+		/* Hit if same event name or same command-string */
+		if ((pev->event &&
+		     (streql(entry->pev.group, pev->group) &&
+		      streql(entry->pev.event, pev->event))) ||
+		    (!strcmp(entry->spev, cmd)))
+			goto found;
+	}
+	entry = NULL;
+
+found:
+	free(cmd);
+	return entry;
+}
+
+struct probe_cache_entry *
+probe_cache__find_by_name(struct probe_cache *pcache,
+			  const char *group, const char *event)
+{
+	struct probe_cache_entry *entry = NULL;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		/* Hit if same event name or same command-string */
+		if (streql(entry->pev.group, group) &&
+		    streql(entry->pev.event, event))
+			goto found;
+	}
+	entry = NULL;
+
+found:
+	return entry;
+}
+
+int probe_cache__add_entry(struct probe_cache *pcache,
+			   struct perf_probe_event *pev,
+			   struct probe_trace_event *tevs, int ntevs)
+{
+	struct probe_cache_entry *entry = NULL;
+	char *command;
+	int i, ret = 0;
+
+	if (!pcache || !pev || !tevs || ntevs <= 0) {
+		ret = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old cache entry */
+	entry = probe_cache__find(pcache, pev);
+	if (entry) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+
+	ret = -ENOMEM;
+	entry = probe_cache_entry__new(pev);
+	if (!entry)
+		goto out_err;
+
+	for (i = 0; i < ntevs; i++) {
+		if (!tevs[i].point.symbol)
+			continue;
+
+		command = synthesize_probe_trace_command(&tevs[i]);
+		if (!command)
+			goto out_err;
+		strlist__add(entry->tevlist, command);
+		free(command);
+	}
+	list_add_tail(&entry->node, &pcache->entries);
+	pr_debug("Added probe cache: %d\n", ntevs);
+	return 0;
+
+out_err:
+	pr_debug("Failed to add probe caches\n");
+	probe_cache_entry__delete(entry);
+	return ret;
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+static unsigned long long sdt_note__get_addr(struct sdt_note *note)
+{
+	return note->bit32 ? (unsigned long long)note->addr.a32[0]
+		 : (unsigned long long)note->addr.a64[0];
+}
+
+static const char * const type_to_suffix[] = {
+	":s64", "", "", "", ":s32", "", ":s16", ":s8",
+	"", ":u8", ":u16", "", ":u32", "", "", "", ":u64"
+};
+
+/*
+ * Isolate the string number and convert it into a decimal value;
+ * this will be an index to get suffix of the uprobe name (defining
+ * the type)
+ */
+static int sdt_arg_parse_size(char *n_ptr, const char **suffix)
+{
+	long type_idx;
+
+	type_idx = strtol(n_ptr, NULL, 10);
+	if (type_idx < -8 || type_idx > 8) {
+		pr_debug4("Failed to get a valid sdt type\n");
+		return -1;
+	}
+
+	*suffix = type_to_suffix[type_idx + 8];
+	return 0;
+}
+
+static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg)
+{
+	char *op, *desc = strdup(arg), *new_op = NULL;
+	const char *suffix = "";
+	int ret = -1;
+
+	if (desc == NULL) {
+		pr_debug4("Allocation error\n");
+		return ret;
+	}
+
+	/*
+	 * Argument is in N@OP format. N is size of the argument and OP is
+	 * the actual assembly operand. N can be omitted; in that case
+	 * argument is just OP(without @).
+	 */
+	op = strchr(desc, '@');
+	if (op) {
+		op[0] = '\0';
+		op++;
+
+		if (sdt_arg_parse_size(desc, &suffix))
+			goto error;
+	} else {
+		op = desc;
+	}
+
+	ret = arch_sdt_arg_parse_op(op, &new_op);
+
+	if (ret < 0)
+		goto error;
+
+	if (ret == SDT_ARG_VALID) {
+		ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix);
+		if (ret < 0)
+			goto error;
+	}
+
+	ret = 0;
+error:
+	free(desc);
+	free(new_op);
+	return ret;
+}
+
+static char *synthesize_sdt_probe_command(struct sdt_note *note,
+					const char *pathname,
+					const char *sdtgrp)
+{
+	struct strbuf buf;
+	char *ret = NULL, **args;
+	int i, args_count;
+
+	if (strbuf_init(&buf, 32) < 0)
+		return NULL;
+
+	if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+				sdtgrp, note->name, pathname,
+				sdt_note__get_addr(note)) < 0)
+		goto error;
+
+	if (!note->args)
+		goto out;
+
+	if (note->args) {
+		args = argv_split(note->args, &args_count);
+
+		for (i = 0; i < args_count; ++i) {
+			if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0)
+				goto error;
+		}
+	}
+
+out:
+	ret = strbuf_detach(&buf, NULL);
+error:
+	strbuf_release(&buf);
+	return ret;
+}
+
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
+{
+	struct probe_cache_entry *entry = NULL;
+	struct list_head sdtlist;
+	struct sdt_note *note;
+	char *buf;
+	char sdtgrp[64];
+	int ret;
+
+	INIT_LIST_HEAD(&sdtlist);
+	ret = get_sdt_note_list(&sdtlist, pathname);
+	if (ret < 0) {
+		pr_debug4("Failed to get sdt note: %d\n", ret);
+		return ret;
+	}
+	list_for_each_entry(note, &sdtlist, note_list) {
+		ret = snprintf(sdtgrp, 64, "sdt_%s", note->provider);
+		if (ret < 0)
+			break;
+		/* Try to find same-name entry */
+		entry = probe_cache__find_by_name(pcache, sdtgrp, note->name);
+		if (!entry) {
+			entry = probe_cache_entry__new(NULL);
+			if (!entry) {
+				ret = -ENOMEM;
+				break;
+			}
+			entry->sdt = true;
+			ret = asprintf(&entry->spev, "%s:%s=%s", sdtgrp,
+					note->name, note->name);
+			if (ret < 0)
+				break;
+			entry->pev.event = strdup(note->name);
+			entry->pev.group = strdup(sdtgrp);
+			list_add_tail(&entry->node, &pcache->entries);
+		}
+		buf = synthesize_sdt_probe_command(note, pathname, sdtgrp);
+		if (!buf) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		strlist__add(entry->tevlist, buf);
+		free(buf);
+		entry = NULL;
+	}
+	if (entry) {
+		list_del_init(&entry->node);
+		probe_cache_entry__delete(entry);
+	}
+	cleanup_sdt_note_list(&sdtlist);
+	return ret;
+}
+#endif
+
+static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd)
+{
+	struct str_node *snode;
+	struct stat st;
+	struct iovec iov[3];
+	const char *prefix = entry->sdt ? "%" : "#";
+	int ret;
+	/* Save stat for rollback */
+	ret = fstat(fd, &st);
+	if (ret < 0)
+		return ret;
+
+	pr_debug("Writing cache: %s%s\n", prefix, entry->spev);
+	iov[0].iov_base = (void *)prefix; iov[0].iov_len = 1;
+	iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev);
+	iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1;
+	ret = writev(fd, iov, 3);
+	if (ret < (int)iov[1].iov_len + 2)
+		goto rollback;
+
+	strlist__for_each_entry(snode, entry->tevlist) {
+		iov[0].iov_base = (void *)snode->s;
+		iov[0].iov_len = strlen(snode->s);
+		iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1;
+		ret = writev(fd, iov, 2);
+		if (ret < (int)iov[0].iov_len + 1)
+			goto rollback;
+	}
+	return 0;
+
+rollback:
+	/* Rollback to avoid cache file corruption */
+	if (ret > 0)
+		ret = -1;
+	if (ftruncate(fd, st.st_size) < 0)
+		ret = -2;
+
+	return ret;
+}
+
+int probe_cache__commit(struct probe_cache *pcache)
+{
+	struct probe_cache_entry *entry;
+	int ret = 0;
+
+	/* TBD: if we do not update existing entries, skip it */
+	ret = lseek(pcache->fd, 0, SEEK_SET);
+	if (ret < 0)
+		goto out;
+
+	ret = ftruncate(pcache->fd, 0);
+	if (ret < 0)
+		goto out;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		ret = probe_cache_entry__write(entry, pcache->fd);
+		pr_debug("Cache committed: %d\n", ret);
+		if (ret < 0)
+			break;
+	}
+out:
+	return ret;
+}
+
+static bool probe_cache_entry__compare(struct probe_cache_entry *entry,
+				       struct strfilter *filter)
+{
+	char buf[128], *ptr = entry->spev;
+
+	if (entry->pev.event) {
+		snprintf(buf, 128, "%s:%s", entry->pev.group, entry->pev.event);
+		ptr = buf;
+	}
+	return strfilter__compare(filter, ptr);
+}
+
+int probe_cache__filter_purge(struct probe_cache *pcache,
+			      struct strfilter *filter)
+{
+	struct probe_cache_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &pcache->entries, node) {
+		if (probe_cache_entry__compare(entry, filter)) {
+			pr_info("Removed cached event: %s\n", entry->spev);
+			list_del_init(&entry->node);
+			probe_cache_entry__delete(entry);
+		}
+	}
+	return 0;
+}
+
+static int probe_cache__show_entries(struct probe_cache *pcache,
+				     struct strfilter *filter)
+{
+	struct probe_cache_entry *entry;
+
+	for_each_probe_cache_entry(entry, pcache) {
+		if (probe_cache_entry__compare(entry, filter))
+			printf("%s\n", entry->spev);
+	}
+	return 0;
+}
+
+/* Show all cached probes */
+int probe_cache__show_all_caches(struct strfilter *filter)
+{
+	struct probe_cache *pcache;
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *buf = strfilter__string(filter);
+
+	pr_debug("list cache with filter: %s\n", buf);
+	free(buf);
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: %d\n", errno);
+		return -EINVAL;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		pcache = probe_cache__new(nd->s, NULL);
+		if (!pcache)
+			continue;
+		if (!list_empty(&pcache->entries)) {
+			buf = build_id_cache__origname(nd->s);
+			printf("%s (%s):\n", buf, nd->s);
+			free(buf);
+			probe_cache__show_entries(pcache, filter);
+		}
+		probe_cache__delete(pcache);
+	}
+	strlist__delete(bidlist);
+
+	return 0;
+}
+
+enum ftrace_readme {
+	FTRACE_README_PROBE_TYPE_X = 0,
+	FTRACE_README_KRETPROBE_OFFSET,
+	FTRACE_README_END,
+};
+
+static struct {
+	const char *pattern;
+	bool avail;
+} ftrace_readme_table[] = {
+#define DEFINE_TYPE(idx, pat)			\
+	[idx] = {.pattern = pat, .avail = false}
+	DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
+	DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+};
+
+static bool scan_ftrace_readme(enum ftrace_readme type)
+{
+	int fd;
+	FILE *fp;
+	char *buf = NULL;
+	size_t len = 0;
+	bool ret = false;
+	static bool scanned = false;
+
+	if (scanned)
+		goto result;
+
+	fd = open_trace_file("README", false);
+	if (fd < 0)
+		return ret;
+
+	fp = fdopen(fd, "r");
+	if (!fp) {
+		close(fd);
+		return ret;
+	}
+
+	while (getline(&buf, &len, fp) > 0)
+		for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++)
+			if (!ftrace_readme_table[i].avail)
+				ftrace_readme_table[i].avail =
+					strglobmatch(buf, ftrace_readme_table[i].pattern);
+	scanned = true;
+
+	fclose(fp);
+	free(buf);
+
+result:
+	if (type >= FTRACE_README_END)
+		return false;
+
+	return ftrace_readme_table[type].avail;
+}
+
+bool probe_type_is_available(enum probe_type type)
+{
+	if (type >= PROBE_TYPE_END)
+		return false;
+	else if (type == PROBE_TYPE_X)
+		return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X);
+
+	return true;
+}
+
+bool kretprobe_offset_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
+}
diff --git a/util/probe-file.h b/util/probe-file.h
new file mode 100644
index 0000000..63f29b1
--- /dev/null
+++ b/util/probe-file.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PROBE_FILE_H
+#define __PROBE_FILE_H
+
+#include "probe-event.h"
+
+struct strlist;
+struct strfilter;
+
+/* Cache of probe definitions */
+struct probe_cache_entry {
+	struct list_head	node;
+	bool			sdt;
+	struct perf_probe_event pev;
+	char			*spev;
+	struct strlist		*tevlist;
+};
+
+struct probe_cache {
+	int	fd;
+	struct list_head entries;
+};
+
+enum probe_type {
+	PROBE_TYPE_U = 0,
+	PROBE_TYPE_S,
+	PROBE_TYPE_X,
+	PROBE_TYPE_STRING,
+	PROBE_TYPE_BITFIELD,
+	PROBE_TYPE_END,
+};
+
+#define PF_FL_UPROBE	1
+#define PF_FL_RW	2
+#define for_each_probe_cache_entry(entry, pcache) \
+	list_for_each_entry(entry, &pcache->entries, node)
+
+/* probe-file.c depends on libelf */
+#ifdef HAVE_LIBELF_SUPPORT
+int open_trace_file(const char *trace_file, bool readwrite);
+int probe_file__open(int flag);
+int probe_file__open_both(int *kfd, int *ufd, int flag);
+struct strlist *probe_file__get_namelist(int fd);
+struct strlist *probe_file__get_rawlist(int fd);
+int probe_file__add_event(int fd, struct probe_trace_event *tev);
+
+int probe_file__del_events(int fd, struct strfilter *filter);
+int probe_file__get_events(int fd, struct strfilter *filter,
+				  struct strlist *plist);
+int probe_file__del_strlist(int fd, struct strlist *namelist);
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+				 struct probe_trace_event **tevs);
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi);
+int probe_cache__add_entry(struct probe_cache *pcache,
+			   struct perf_probe_event *pev,
+			   struct probe_trace_event *tevs, int ntevs);
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname);
+int probe_cache__commit(struct probe_cache *pcache);
+void probe_cache__purge(struct probe_cache *pcache);
+void probe_cache__delete(struct probe_cache *pcache);
+int probe_cache__filter_purge(struct probe_cache *pcache,
+			      struct strfilter *filter);
+struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
+					    struct perf_probe_event *pev);
+struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
+					const char *group, const char *event);
+int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
+bool kretprobe_offset_is_supported(void);
+#else	/* ! HAVE_LIBELF_SUPPORT */
+static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
+{
+	return NULL;
+}
+#define probe_cache__delete(pcache) do {} while (0)
+#endif
+#endif
diff --git a/util/probe-finder.c b/util/probe-finder.c
new file mode 100644
index 0000000..c37fbef
--- /dev/null
+++ b/util/probe-finder.c
@@ -0,0 +1,1881 @@
+/*
+ * probe-finder.c : C expression to kprobe event converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <dwarf-regs.h>
+
+#include <linux/bitops.h>
+#include "event.h"
+#include "dso.h"
+#include "debug.h"
+#include "intlist.h"
+#include "util.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "string2.h"
+
+/* Kprobe tracer basic type is up to u64 */
+#define MAX_BASIC_TYPE_BITS	64
+
+/* Dwarf FL wrappers */
+static char *debuginfo_path;	/* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+	.find_debuginfo = dwfl_standard_find_debuginfo,
+	.debuginfo_path = &debuginfo_path,
+
+	.section_address = dwfl_offline_section_address,
+
+	/* We use this table for core files too.  */
+	.find_elf = dwfl_build_id_find_elf,
+};
+
+/* Get a Dwarf from offline image */
+static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
+					 const char *path)
+{
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	dbg->dwfl = dwfl_begin(&offline_callbacks);
+	if (!dbg->dwfl)
+		goto error;
+
+	dwfl_report_begin(dbg->dwfl);
+	dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
+	if (!dbg->mod)
+		goto error;
+
+	dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
+	if (!dbg->dbg)
+		goto error;
+
+	dwfl_report_end(dbg->dwfl, NULL, NULL);
+
+	return 0;
+error:
+	if (dbg->dwfl)
+		dwfl_end(dbg->dwfl);
+	else
+		close(fd);
+	memset(dbg, 0, sizeof(*dbg));
+
+	return -ENOENT;
+}
+
+static struct debuginfo *__debuginfo__new(const char *path)
+{
+	struct debuginfo *dbg = zalloc(sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	if (debuginfo__init_offline_dwarf(dbg, path) < 0)
+		zfree(&dbg);
+	if (dbg)
+		pr_debug("Open Debuginfo file: %s\n", path);
+	return dbg;
+}
+
+enum dso_binary_type distro_dwarf_types[] = {
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+struct debuginfo *debuginfo__new(const char *path)
+{
+	enum dso_binary_type *type;
+	char buf[PATH_MAX], nil = '\0';
+	struct dso *dso;
+	struct debuginfo *dinfo = NULL;
+
+	/* Try to open distro debuginfo files */
+	dso = dso__new(path);
+	if (!dso)
+		goto out;
+
+	for (type = distro_dwarf_types;
+	     !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND;
+	     type++) {
+		if (dso__read_binary_type_filename(dso, *type, &nil,
+						   buf, PATH_MAX) < 0)
+			continue;
+		dinfo = __debuginfo__new(buf);
+	}
+	dso__put(dso);
+
+out:
+	/* if failed to open all distro debuginfo, open given binary */
+	return dinfo ? : __debuginfo__new(path);
+}
+
+void debuginfo__delete(struct debuginfo *dbg)
+{
+	if (dbg) {
+		if (dbg->dwfl)
+			dwfl_end(dbg->dwfl);
+		free(dbg);
+	}
+}
+
+/*
+ * Probe finder related functions
+ */
+
+static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
+{
+	struct probe_trace_arg_ref *ref;
+	ref = zalloc(sizeof(struct probe_trace_arg_ref));
+	if (ref != NULL)
+		ref->offset = offs;
+	return ref;
+}
+
+/*
+ * Convert a location into trace_arg.
+ * If tvar == NULL, this just checks variable can be converted.
+ * If fentry == true and vr_die is a parameter, do huristic search
+ * for the location fuzzed by function entry mcount.
+ */
+static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
+				     Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+				     unsigned int machine,
+				     struct probe_trace_arg *tvar)
+{
+	Dwarf_Attribute attr;
+	Dwarf_Addr tmp = 0;
+	Dwarf_Op *op;
+	size_t nops;
+	unsigned int regn;
+	Dwarf_Word offs = 0;
+	bool ref = false;
+	const char *regs;
+	int ret, ret2 = 0;
+
+	if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
+		goto static_var;
+
+	/* TODO: handle more than 1 exprs */
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+		return -EINVAL;	/* Broken DIE ? */
+	if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
+		ret = dwarf_entrypc(sp_die, &tmp);
+		if (ret)
+			return -ENOENT;
+
+		if (probe_conf.show_location_range &&
+			(dwarf_tag(vr_die) == DW_TAG_variable)) {
+			ret2 = -ERANGE;
+		} else if (addr != tmp ||
+			dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+			return -ENOENT;
+		}
+
+		ret = dwarf_highpc(sp_die, &tmp);
+		if (ret)
+			return -ENOENT;
+		/*
+		 * This is fuzzed by fentry mcount. We try to find the
+		 * parameter location at the earliest address.
+		 */
+		for (addr += 1; addr <= tmp; addr++) {
+			if (dwarf_getlocation_addr(&attr, addr, &op,
+						   &nops, 1) > 0)
+				goto found;
+		}
+		return -ENOENT;
+	}
+found:
+	if (nops == 0)
+		/* TODO: Support const_value */
+		return -ENOENT;
+
+	if (op->atom == DW_OP_addr) {
+static_var:
+		if (!tvar)
+			return ret2;
+		/* Static variables on memory (not stack), make @varname */
+		ret = strlen(dwarf_diename(vr_die));
+		tvar->value = zalloc(ret + 2);
+		if (tvar->value == NULL)
+			return -ENOMEM;
+		snprintf(tvar->value, ret + 2, "@%s", dwarf_diename(vr_die));
+		tvar->ref = alloc_trace_arg_ref((long)offs);
+		if (tvar->ref == NULL)
+			return -ENOMEM;
+		return ret2;
+	}
+
+	/* If this is based on frame buffer, set the offset */
+	if (op->atom == DW_OP_fbreg) {
+		if (fb_ops == NULL)
+			return -ENOTSUP;
+		ref = true;
+		offs = op->number;
+		op = &fb_ops[0];
+	}
+
+	if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
+		regn = op->atom - DW_OP_breg0;
+		offs += op->number;
+		ref = true;
+	} else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) {
+		regn = op->atom - DW_OP_reg0;
+	} else if (op->atom == DW_OP_bregx) {
+		regn = op->number;
+		offs += op->number2;
+		ref = true;
+	} else if (op->atom == DW_OP_regx) {
+		regn = op->number;
+	} else {
+		pr_debug("DW_OP %x is not supported.\n", op->atom);
+		return -ENOTSUP;
+	}
+
+	if (!tvar)
+		return ret2;
+
+	regs = get_dwarf_regstr(regn, machine);
+	if (!regs) {
+		/* This should be a bug in DWARF or this tool */
+		pr_warning("Mapping for the register number %u "
+			   "missing on this architecture.\n", regn);
+		return -ENOTSUP;
+	}
+
+	tvar->value = strdup(regs);
+	if (tvar->value == NULL)
+		return -ENOMEM;
+
+	if (ref) {
+		tvar->ref = alloc_trace_arg_ref((long)offs);
+		if (tvar->ref == NULL)
+			return -ENOMEM;
+	}
+	return ret2;
+}
+
+#define BYTES_TO_BITS(nb)	((nb) * BITS_PER_LONG / sizeof(long))
+
+static int convert_variable_type(Dwarf_Die *vr_die,
+				 struct probe_trace_arg *tvar,
+				 const char *cast)
+{
+	struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
+	Dwarf_Die type;
+	char buf[16];
+	char sbuf[STRERR_BUFSIZE];
+	int bsize, boffs, total;
+	int ret;
+	char prefix;
+
+	/* TODO: check all types */
+	if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 &&
+	    strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
+		/* Non string type is OK */
+		/* and respect signedness/hexadecimal cast */
+		tvar->type = strdup(cast);
+		return (tvar->type == NULL) ? -ENOMEM : 0;
+	}
+
+	bsize = dwarf_bitsize(vr_die);
+	if (bsize > 0) {
+		/* This is a bitfield */
+		boffs = dwarf_bitoffset(vr_die);
+		total = dwarf_bytesize(vr_die);
+		if (boffs < 0 || total < 0)
+			return -ENOENT;
+		ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs,
+				BYTES_TO_BITS(total));
+		goto formatted;
+	}
+
+	if (die_get_real_type(vr_die, &type) == NULL) {
+		pr_warning("Failed to get a type information of %s.\n",
+			   dwarf_diename(vr_die));
+		return -ENOENT;
+	}
+
+	pr_debug("%s type is %s.\n",
+		 dwarf_diename(vr_die), dwarf_diename(&type));
+
+	if (cast && strcmp(cast, "string") == 0) {	/* String type */
+		ret = dwarf_tag(&type);
+		if (ret != DW_TAG_pointer_type &&
+		    ret != DW_TAG_array_type) {
+			pr_warning("Failed to cast into string: "
+				   "%s(%s) is not a pointer nor array.\n",
+				   dwarf_diename(vr_die), dwarf_diename(&type));
+			return -EINVAL;
+		}
+		if (die_get_real_type(&type, &type) == NULL) {
+			pr_warning("Failed to get a type"
+				   " information.\n");
+			return -ENOENT;
+		}
+		if (ret == DW_TAG_pointer_type) {
+			while (*ref_ptr)
+				ref_ptr = &(*ref_ptr)->next;
+			/* Add new reference with offset +0 */
+			*ref_ptr = zalloc(sizeof(struct probe_trace_arg_ref));
+			if (*ref_ptr == NULL) {
+				pr_warning("Out of memory error\n");
+				return -ENOMEM;
+			}
+		}
+		if (!die_compare_name(&type, "char") &&
+		    !die_compare_name(&type, "unsigned char")) {
+			pr_warning("Failed to cast into string: "
+				   "%s is not (unsigned) char *.\n",
+				   dwarf_diename(vr_die));
+			return -EINVAL;
+		}
+		tvar->type = strdup(cast);
+		return (tvar->type == NULL) ? -ENOMEM : 0;
+	}
+
+	if (cast && (strcmp(cast, "u") == 0))
+		prefix = 'u';
+	else if (cast && (strcmp(cast, "s") == 0))
+		prefix = 's';
+	else if (cast && (strcmp(cast, "x") == 0) &&
+		 probe_type_is_available(PROBE_TYPE_X))
+		prefix = 'x';
+	else
+		prefix = die_is_signed_type(&type) ? 's' :
+			 probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
+
+	ret = dwarf_bytesize(&type);
+	if (ret <= 0)
+		/* No size ... try to use default type */
+		return 0;
+	ret = BYTES_TO_BITS(ret);
+
+	/* Check the bitwidth */
+	if (ret > MAX_BASIC_TYPE_BITS) {
+		pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
+			dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
+		ret = MAX_BASIC_TYPE_BITS;
+	}
+	ret = snprintf(buf, 16, "%c%d", prefix, ret);
+
+formatted:
+	if (ret < 0 || ret >= 16) {
+		if (ret >= 16)
+			ret = -E2BIG;
+		pr_warning("Failed to convert variable type: %s\n",
+			   str_error_r(-ret, sbuf, sizeof(sbuf)));
+		return ret;
+	}
+	tvar->type = strdup(buf);
+	if (tvar->type == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
+				    struct perf_probe_arg_field *field,
+				    struct probe_trace_arg_ref **ref_ptr,
+				    Dwarf_Die *die_mem)
+{
+	struct probe_trace_arg_ref *ref = *ref_ptr;
+	Dwarf_Die type;
+	Dwarf_Word offs;
+	int ret, tag;
+
+	pr_debug("converting %s in %s\n", field->name, varname);
+	if (die_get_real_type(vr_die, &type) == NULL) {
+		pr_warning("Failed to get the type of %s.\n", varname);
+		return -ENOENT;
+	}
+	pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type),
+		  (unsigned)dwarf_dieoffset(&type));
+	tag = dwarf_tag(&type);
+
+	if (field->name[0] == '[' &&
+	    (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
+		/* Save original type for next field or type */
+		memcpy(die_mem, &type, sizeof(*die_mem));
+		/* Get the type of this array */
+		if (die_get_real_type(&type, &type) == NULL) {
+			pr_warning("Failed to get the type of %s.\n", varname);
+			return -ENOENT;
+		}
+		pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type),
+			 (unsigned)dwarf_dieoffset(&type));
+		if (tag == DW_TAG_pointer_type) {
+			ref = zalloc(sizeof(struct probe_trace_arg_ref));
+			if (ref == NULL)
+				return -ENOMEM;
+			if (*ref_ptr)
+				(*ref_ptr)->next = ref;
+			else
+				*ref_ptr = ref;
+		}
+		ref->offset += dwarf_bytesize(&type) * field->index;
+		goto next;
+	} else if (tag == DW_TAG_pointer_type) {
+		/* Check the pointer and dereference */
+		if (!field->ref) {
+			pr_err("Semantic error: %s must be referred by '->'\n",
+			       field->name);
+			return -EINVAL;
+		}
+		/* Get the type pointed by this pointer */
+		if (die_get_real_type(&type, &type) == NULL) {
+			pr_warning("Failed to get the type of %s.\n", varname);
+			return -ENOENT;
+		}
+		/* Verify it is a data structure  */
+		tag = dwarf_tag(&type);
+		if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+			pr_warning("%s is not a data structure nor a union.\n",
+				   varname);
+			return -EINVAL;
+		}
+
+		ref = zalloc(sizeof(struct probe_trace_arg_ref));
+		if (ref == NULL)
+			return -ENOMEM;
+		if (*ref_ptr)
+			(*ref_ptr)->next = ref;
+		else
+			*ref_ptr = ref;
+	} else {
+		/* Verify it is a data structure  */
+		if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+			pr_warning("%s is not a data structure nor a union.\n",
+				   varname);
+			return -EINVAL;
+		}
+		if (field->name[0] == '[') {
+			pr_err("Semantic error: %s is not a pointer"
+			       " nor array.\n", varname);
+			return -EINVAL;
+		}
+		/* While prcessing unnamed field, we don't care about this */
+		if (field->ref && dwarf_diename(vr_die)) {
+			pr_err("Semantic error: %s must be referred by '.'\n",
+			       field->name);
+			return -EINVAL;
+		}
+		if (!ref) {
+			pr_warning("Structure on a register is not "
+				   "supported yet.\n");
+			return -ENOTSUP;
+		}
+	}
+
+	if (die_find_member(&type, field->name, die_mem) == NULL) {
+		pr_warning("%s(type:%s) has no member %s.\n", varname,
+			   dwarf_diename(&type), field->name);
+		return -EINVAL;
+	}
+
+	/* Get the offset of the field */
+	if (tag == DW_TAG_union_type) {
+		offs = 0;
+	} else {
+		ret = die_get_data_member_location(die_mem, &offs);
+		if (ret < 0) {
+			pr_warning("Failed to get the offset of %s.\n",
+				   field->name);
+			return ret;
+		}
+	}
+	ref->offset += (long)offs;
+
+	/* If this member is unnamed, we need to reuse this field */
+	if (!dwarf_diename(die_mem))
+		return convert_variable_fields(die_mem, varname, field,
+						&ref, die_mem);
+
+next:
+	/* Converting next field */
+	if (field->next)
+		return convert_variable_fields(die_mem, field->name,
+					field->next, &ref, die_mem);
+	else
+		return 0;
+}
+
+/* Show a variables in kprobe event format */
+static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
+{
+	Dwarf_Die die_mem;
+	int ret;
+
+	pr_debug("Converting variable %s into trace event.\n",
+		 dwarf_diename(vr_die));
+
+	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
+					&pf->sp_die, pf->machine, pf->tvar);
+	if (ret == -ENOENT || ret == -EINVAL) {
+		pr_err("Failed to find the location of the '%s' variable at this address.\n"
+		       " Perhaps it has been optimized out.\n"
+		       " Use -V with the --range option to show '%s' location range.\n",
+		       pf->pvar->var, pf->pvar->var);
+	} else if (ret == -ENOTSUP)
+		pr_err("Sorry, we don't support this variable location yet.\n");
+	else if (ret == 0 && pf->pvar->field) {
+		ret = convert_variable_fields(vr_die, pf->pvar->var,
+					      pf->pvar->field, &pf->tvar->ref,
+					      &die_mem);
+		vr_die = &die_mem;
+	}
+	if (ret == 0)
+		ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type);
+	/* *expr will be cached in libdw. Don't free it. */
+	return ret;
+}
+
+/* Find a variable in a scope DIE */
+static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	Dwarf_Die vr_die;
+	char *buf, *ptr;
+	int ret = 0;
+
+	/* Copy raw parameters */
+	if (!is_c_varname(pf->pvar->var))
+		return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
+
+	if (pf->pvar->name)
+		pf->tvar->name = strdup(pf->pvar->name);
+	else {
+		buf = synthesize_perf_probe_arg(pf->pvar);
+		if (!buf)
+			return -ENOMEM;
+		ptr = strchr(buf, ':');	/* Change type separator to _ */
+		if (ptr)
+			*ptr = '_';
+		pf->tvar->name = buf;
+	}
+	if (pf->tvar->name == NULL)
+		return -ENOMEM;
+
+	pr_debug("Searching '%s' variable in context.\n", pf->pvar->var);
+	/* Search child die for local variables and parameters. */
+	if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
+		/* Search again in global variables */
+		if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+						0, &vr_die)) {
+			pr_warning("Failed to find '%s' in this function.\n",
+				   pf->pvar->var);
+			ret = -ENOENT;
+		}
+	}
+	if (ret >= 0)
+		ret = convert_variable(&vr_die, pf);
+
+	return ret;
+}
+
+/* Convert subprogram DIE to trace point */
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
+				  Dwarf_Addr paddr, bool retprobe,
+				  const char *function,
+				  struct probe_trace_point *tp)
+{
+	Dwarf_Addr eaddr, highaddr;
+	GElf_Sym sym;
+	const char *symbol;
+
+	/* Verify the address is correct */
+	if (dwarf_entrypc(sp_die, &eaddr) != 0) {
+		pr_warning("Failed to get entry address of %s\n",
+			   dwarf_diename(sp_die));
+		return -ENOENT;
+	}
+	if (dwarf_highpc(sp_die, &highaddr) != 0) {
+		pr_warning("Failed to get end address of %s\n",
+			   dwarf_diename(sp_die));
+		return -ENOENT;
+	}
+	if (paddr > highaddr) {
+		pr_warning("Offset specified is greater than size of %s\n",
+			   dwarf_diename(sp_die));
+		return -EINVAL;
+	}
+
+	symbol = dwarf_diename(sp_die);
+	if (!symbol) {
+		/* Try to get the symbol name from symtab */
+		symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+		if (!symbol) {
+			pr_warning("Failed to find symbol at 0x%lx\n",
+				   (unsigned long)paddr);
+			return -ENOENT;
+		}
+		eaddr = sym.st_value;
+	}
+	tp->offset = (unsigned long)(paddr - eaddr);
+	tp->address = (unsigned long)paddr;
+	tp->symbol = strdup(symbol);
+	if (!tp->symbol)
+		return -ENOMEM;
+
+	/* Return probe must be on the head of a subprogram */
+	if (retprobe) {
+		if (eaddr != paddr) {
+			pr_warning("Failed to find \"%s%%return\",\n"
+				   " because %s is an inlined function and"
+				   " has no return point.\n", function,
+				   function);
+			return -EINVAL;
+		}
+		tp->retprobe = true;
+	}
+
+	return 0;
+}
+
+/* Call probe_finder callback with scope DIE */
+static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	Dwarf_Attribute fb_attr;
+	Dwarf_Frame *frame = NULL;
+	size_t nops;
+	int ret;
+
+	if (!sc_die) {
+		pr_err("Caller must pass a scope DIE. Program error.\n");
+		return -EINVAL;
+	}
+
+	/* If not a real subprogram, find a real one */
+	if (!die_is_func_def(sc_die)) {
+		if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+			if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+				pr_warning("Ignoring tail call from %s\n",
+						dwarf_diename(&pf->sp_die));
+				return 0;
+			} else {
+				pr_warning("Failed to find probe point in any "
+					   "functions.\n");
+				return -ENOENT;
+			}
+		}
+	} else
+		memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
+
+	/* Get the frame base attribute/ops from subprogram */
+	dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr);
+	ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
+	if (ret <= 0 || nops == 0) {
+		pf->fb_ops = NULL;
+#if _ELFUTILS_PREREQ(0, 142)
+	} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
+		   (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+		if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+		     (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
+		    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
+			pr_warning("Failed to get call frame on 0x%jx\n",
+				   (uintmax_t)pf->addr);
+			free(frame);
+			return -ENOENT;
+		}
+#endif
+	}
+
+	/* Call finder's callback handler */
+	ret = pf->callback(sc_die, pf);
+
+	/* Since *pf->fb_ops can be a part of frame. we should free it here. */
+	free(frame);
+	pf->fb_ops = NULL;
+
+	return ret;
+}
+
+struct find_scope_param {
+	const char *function;
+	const char *file;
+	int line;
+	int diff;
+	Dwarf_Die *die_mem;
+	bool found;
+};
+
+static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct find_scope_param *fsp = data;
+	const char *file;
+	int lno;
+
+	/* Skip if declared file name does not match */
+	if (fsp->file) {
+		file = dwarf_decl_file(fn_die);
+		if (!file || strcmp(fsp->file, file) != 0)
+			return 0;
+	}
+	/* If the function name is given, that's what user expects */
+	if (fsp->function) {
+		if (die_match_name(fn_die, fsp->function)) {
+			memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+			fsp->found = true;
+			return 1;
+		}
+	} else {
+		/* With the line number, find the nearest declared DIE */
+		dwarf_decl_line(fn_die, &lno);
+		if (lno < fsp->line && fsp->diff > fsp->line - lno) {
+			/* Keep a candidate and continue */
+			fsp->diff = fsp->line - lno;
+			memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+			fsp->found = true;
+		}
+	}
+	return 0;
+}
+
+/* Find an appropriate scope fits to given conditions */
+static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem)
+{
+	struct find_scope_param fsp = {
+		.function = pf->pev->point.function,
+		.file = pf->fname,
+		.line = pf->lno,
+		.diff = INT_MAX,
+		.die_mem = die_mem,
+		.found = false,
+	};
+
+	cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp);
+
+	return fsp.found ? die_mem : NULL;
+}
+
+static int probe_point_line_walker(const char *fname, int lineno,
+				   Dwarf_Addr addr, void *data)
+{
+	struct probe_finder *pf = data;
+	Dwarf_Die *sc_die, die_mem;
+	int ret;
+
+	if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+		return 0;
+
+	pf->addr = addr;
+	sc_die = find_best_scope(pf, &die_mem);
+	if (!sc_die) {
+		pr_warning("Failed to find scope of probe point.\n");
+		return -ENOENT;
+	}
+
+	ret = call_probe_finder(sc_die, pf);
+
+	/* Continue if no error, because the line will be in inline function */
+	return ret < 0 ? ret : 0;
+}
+
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+	return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
+}
+
+/* Find lines which match lazy pattern */
+static int find_lazy_match_lines(struct intlist *list,
+				 const char *fname, const char *pat)
+{
+	FILE *fp;
+	char *line = NULL;
+	size_t line_len;
+	ssize_t len;
+	int count = 0, linenum = 1;
+	char sbuf[STRERR_BUFSIZE];
+
+	fp = fopen(fname, "r");
+	if (!fp) {
+		pr_warning("Failed to open %s: %s\n", fname,
+			   str_error_r(errno, sbuf, sizeof(sbuf)));
+		return -errno;
+	}
+
+	while ((len = getline(&line, &line_len, fp)) > 0) {
+
+		if (line[len - 1] == '\n')
+			line[len - 1] = '\0';
+
+		if (strlazymatch(line, pat)) {
+			intlist__add(list, linenum);
+			count++;
+		}
+		linenum++;
+	}
+
+	if (ferror(fp))
+		count = -errno;
+	free(line);
+	fclose(fp);
+
+	if (count == 0)
+		pr_debug("No matched lines found in %s.\n", fname);
+	return count;
+}
+
+static int probe_point_lazy_walker(const char *fname, int lineno,
+				   Dwarf_Addr addr, void *data)
+{
+	struct probe_finder *pf = data;
+	Dwarf_Die *sc_die, die_mem;
+	int ret;
+
+	if (!intlist__has_entry(pf->lcache, lineno) ||
+	    strtailcmp(fname, pf->fname) != 0)
+		return 0;
+
+	pr_debug("Probe line found: line:%d addr:0x%llx\n",
+		 lineno, (unsigned long long)addr);
+	pf->addr = addr;
+	pf->lno = lineno;
+	sc_die = find_best_scope(pf, &die_mem);
+	if (!sc_die) {
+		pr_warning("Failed to find scope of probe point.\n");
+		return -ENOENT;
+	}
+
+	ret = call_probe_finder(sc_die, pf);
+
+	/*
+	 * Continue if no error, because the lazy pattern will match
+	 * to other lines
+	 */
+	return ret < 0 ? ret : 0;
+}
+
+/* Find probe points from lazy pattern  */
+static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	int ret = 0;
+	char *fpath;
+
+	if (intlist__empty(pf->lcache)) {
+		const char *comp_dir;
+
+		comp_dir = cu_get_comp_dir(&pf->cu_die);
+		ret = get_real_path(pf->fname, comp_dir, &fpath);
+		if (ret < 0) {
+			pr_warning("Failed to find source file path.\n");
+			return ret;
+		}
+
+		/* Matching lazy line pattern */
+		ret = find_lazy_match_lines(pf->lcache, fpath,
+					    pf->pev->point.lazy_line);
+		free(fpath);
+		if (ret <= 0)
+			return ret;
+	}
+
+	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
+}
+
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Not uprobe? */
+	if (!pf->pev->uprobes)
+		return;
+
+	/* Compiled with optimization? */
+	if (die_is_optimized_target(&pf->cu_die))
+		return;
+
+	/* Don't know entrypc? */
+	if (!pf->addr)
+		return;
+
+	/* Only FUNC and FUNC@SRC are eligible. */
+	if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+	    pp->offset || pp->abs_address)
+		return;
+
+	/* Not interested in func parameter? */
+	if (!perf_probe_with_var(pf->pev))
+		return;
+
+	pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+		"Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+		pf->addr);
+
+	die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
+}
+
+static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
+{
+	struct probe_finder *pf = data;
+	struct perf_probe_point *pp = &pf->pev->point;
+	Dwarf_Addr addr;
+	int ret;
+
+	if (pp->lazy_line)
+		ret = find_probe_point_lazy(in_die, pf);
+	else {
+		/* Get probe address */
+		if (dwarf_entrypc(in_die, &addr) != 0) {
+			pr_warning("Failed to get entry address of %s.\n",
+				   dwarf_diename(in_die));
+			return -ENOENT;
+		}
+		if (addr == 0) {
+			pr_debug("%s has no valid entry address. skipped.\n",
+				 dwarf_diename(in_die));
+			return -ENOENT;
+		}
+		pf->addr = addr;
+		pf->addr += pp->offset;
+		pr_debug("found inline addr: 0x%jx\n",
+			 (uintmax_t)pf->addr);
+
+		ret = call_probe_finder(in_die, pf);
+	}
+
+	return ret;
+}
+
+/* Callback parameter with return value for libdw */
+struct dwarf_callback_param {
+	void *data;
+	int retval;
+};
+
+/* Search function from function name */
+static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct dwarf_callback_param *param = data;
+	struct probe_finder *pf = param->data;
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Check tag and diename */
+	if (!die_is_func_def(sp_die) ||
+	    !die_match_name(sp_die, pp->function))
+		return DWARF_CB_OK;
+
+	/* Check declared file */
+	if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
+		return DWARF_CB_OK;
+
+	pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+		 (unsigned long)dwarf_dieoffset(sp_die));
+	pf->fname = dwarf_decl_file(sp_die);
+	if (pp->line) { /* Function relative line */
+		dwarf_decl_line(sp_die, &pf->lno);
+		pf->lno += pp->line;
+		param->retval = find_probe_point_by_line(pf);
+	} else if (die_is_func_instance(sp_die)) {
+		/* Instances always have the entry address */
+		dwarf_entrypc(sp_die, &pf->addr);
+		/* But in some case the entry address is 0 */
+		if (pf->addr == 0) {
+			pr_debug("%s has no entry PC. Skipped\n",
+				 dwarf_diename(sp_die));
+			param->retval = 0;
+		/* Real function */
+		} else if (pp->lazy_line)
+			param->retval = find_probe_point_lazy(sp_die, pf);
+		else {
+			skip_prologue(sp_die, pf);
+			pf->addr += pp->offset;
+			/* TODO: Check the address in this function */
+			param->retval = call_probe_finder(sp_die, pf);
+		}
+	} else if (!probe_conf.no_inlines) {
+		/* Inlined function: search instances */
+		param->retval = die_walk_instances(sp_die,
+					probe_point_inline_cb, (void *)pf);
+		/* This could be a non-existed inline definition */
+		if (param->retval == -ENOENT)
+			param->retval = 0;
+	}
+
+	/* We need to find other candidates */
+	if (strisglob(pp->function) && param->retval >= 0) {
+		param->retval = 0;	/* We have to clear the result */
+		return DWARF_CB_OK;
+	}
+
+	return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
+}
+
+static int find_probe_point_by_func(struct probe_finder *pf)
+{
+	struct dwarf_callback_param _param = {.data = (void *)pf,
+					      .retval = 0};
+	dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, &_param, 0);
+	return _param.retval;
+}
+
+struct pubname_callback_param {
+	char *function;
+	char *file;
+	Dwarf_Die *cu_die;
+	Dwarf_Die *sp_die;
+	int found;
+};
+
+static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
+{
+	struct pubname_callback_param *param = data;
+
+	if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
+		if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
+			return DWARF_CB_OK;
+
+		if (die_match_name(param->sp_die, param->function)) {
+			if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
+				return DWARF_CB_OK;
+
+			if (param->file &&
+			    strtailcmp(param->file, dwarf_decl_file(param->sp_die)))
+				return DWARF_CB_OK;
+
+			param->found = 1;
+			return DWARF_CB_ABORT;
+		}
+	}
+
+	return DWARF_CB_OK;
+}
+
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
+				  struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+	Dwarf_Off off, noff;
+	size_t cuhl;
+	Dwarf_Die *diep;
+	int ret = 0;
+
+	off = 0;
+	pf->lcache = intlist__new(NULL);
+	if (!pf->lcache)
+		return -ENOMEM;
+
+	/* Fastpath: lookup by function name from .debug_pubnames section */
+	if (pp->function && !strisglob(pp->function)) {
+		struct pubname_callback_param pubname_param = {
+			.function = pp->function,
+			.file	  = pp->file,
+			.cu_die	  = &pf->cu_die,
+			.sp_die	  = &pf->sp_die,
+			.found	  = 0,
+		};
+		struct dwarf_callback_param probe_param = {
+			.data = pf,
+		};
+
+		dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+				  &pubname_param, 0);
+		if (pubname_param.found) {
+			ret = probe_point_search_cb(&pf->sp_die, &probe_param);
+			if (ret)
+				goto found;
+		}
+	}
+
+	/* Loop on CUs (Compilation Unit) */
+	while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
+		/* Get the DIE(Debugging Information Entry) of this CU */
+		diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die);
+		if (!diep)
+			continue;
+
+		/* Check if target file is included. */
+		if (pp->file)
+			pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
+		else
+			pf->fname = NULL;
+
+		if (!pp->file || pf->fname) {
+			if (pp->function)
+				ret = find_probe_point_by_func(pf);
+			else if (pp->lazy_line)
+				ret = find_probe_point_lazy(&pf->cu_die, pf);
+			else {
+				pf->lno = pp->line;
+				ret = find_probe_point_by_line(pf);
+			}
+			if (ret < 0)
+				break;
+		}
+		off = noff;
+	}
+
+found:
+	intlist__delete(pf->lcache);
+	pf->lcache = NULL;
+
+	return ret;
+}
+
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+				  struct probe_finder *pf)
+{
+	int ret = 0;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+
+	if (pf->cfi_eh || pf->cfi_dbg)
+		return debuginfo__find_probe_location(dbg, pf);
+
+	/* Get the call frame information from this dwarf */
+	elf = dwarf_getelf(dbg->dbg);
+	if (elf == NULL)
+		return -EINVAL;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		return -EINVAL;
+
+	pf->machine = ehdr.e_machine;
+
+#if _ELFUTILS_PREREQ(0, 142)
+	do {
+		GElf_Shdr shdr;
+
+		if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+		    shdr.sh_type == SHT_PROGBITS)
+			pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+		pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+	} while (0);
+#endif
+
+	ret = debuginfo__find_probe_location(dbg, pf);
+	return ret;
+}
+
+struct local_vars_finder {
+	struct probe_finder *pf;
+	struct perf_probe_arg *args;
+	bool vars;
+	int max_args;
+	int nargs;
+	int ret;
+};
+
+/* Collect available variables in this scope */
+static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct local_vars_finder *vf = data;
+	struct probe_finder *pf = vf->pf;
+	int tag;
+
+	tag = dwarf_tag(die_mem);
+	if (tag == DW_TAG_formal_parameter ||
+	    (tag == DW_TAG_variable && vf->vars)) {
+		if (convert_variable_location(die_mem, vf->pf->addr,
+					      vf->pf->fb_ops, &pf->sp_die,
+					      pf->machine, NULL) == 0) {
+			vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
+			if (vf->args[vf->nargs].var == NULL) {
+				vf->ret = -ENOMEM;
+				return DIE_FIND_CB_END;
+			}
+			pr_debug(" %s", vf->args[vf->nargs].var);
+			vf->nargs++;
+		}
+	}
+
+	if (dwarf_haspc(die_mem, vf->pf->addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+}
+
+static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
+			     struct perf_probe_arg *args)
+{
+	Dwarf_Die die_mem;
+	int i;
+	int n = 0;
+	struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
+				.max_args = MAX_PROBE_ARGS, .ret = 0};
+
+	for (i = 0; i < pf->pev->nargs; i++) {
+		/* var never be NULL */
+		if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+			vf.vars = true;
+		else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
+			/* Copy normal argument */
+			args[n] = pf->pev->args[i];
+			n++;
+			continue;
+		}
+		pr_debug("Expanding %s into:", pf->pev->args[i].var);
+		vf.nargs = n;
+		/* Special local variables */
+		die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+			       &die_mem);
+		pr_debug(" (%d)\n", vf.nargs - n);
+		if (vf.ret < 0)
+			return vf.ret;
+		n = vf.nargs;
+	}
+	return n;
+}
+
+/* Add a found probe point into trace event list */
+static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	struct trace_event_finder *tf =
+			container_of(pf, struct trace_event_finder, pf);
+	struct perf_probe_point *pp = &pf->pev->point;
+	struct probe_trace_event *tev;
+	struct perf_probe_arg *args = NULL;
+	int ret, i;
+
+	/* Check number of tevs */
+	if (tf->ntevs == tf->max_tevs) {
+		pr_warning("Too many( > %d) probe point found.\n",
+			   tf->max_tevs);
+		return -ERANGE;
+	}
+	tev = &tf->tevs[tf->ntevs++];
+
+	/* Trace point should be converted from subprogram DIE */
+	ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
+				     pp->retprobe, pp->function, &tev->point);
+	if (ret < 0)
+		goto end;
+
+	tev->point.realname = strdup(dwarf_diename(sc_die));
+	if (!tev->point.realname) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
+		 tev->point.offset);
+
+	/* Expand special probe argument if exist */
+	args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
+	if (args == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	ret = expand_probe_args(sc_die, pf, args);
+	if (ret < 0)
+		goto end;
+
+	tev->nargs = ret;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (tev->args == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	/* Find each argument */
+	for (i = 0; i < tev->nargs; i++) {
+		pf->pvar = &args[i];
+		pf->tvar = &tev->args[i];
+		/* Variable should be found from scope DIE */
+		ret = find_variable(sc_die, pf);
+		if (ret != 0)
+			break;
+	}
+
+end:
+	if (ret) {
+		clear_probe_trace_event(tev);
+		tf->ntevs--;
+	}
+	free(args);
+	return ret;
+}
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+				 struct perf_probe_event *pev,
+				 struct probe_trace_event **tevs)
+{
+	struct trace_event_finder tf = {
+			.pf = {.pev = pev, .callback = add_probe_trace_event},
+			.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
+	int ret, i;
+
+	/* Allocate result tevs array */
+	*tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
+	if (*tevs == NULL)
+		return -ENOMEM;
+
+	tf.tevs = *tevs;
+	tf.ntevs = 0;
+
+	ret = debuginfo__find_probes(dbg, &tf.pf);
+	if (ret < 0) {
+		for (i = 0; i < tf.ntevs; i++)
+			clear_probe_trace_event(&tf.tevs[i]);
+		zfree(tevs);
+		return ret;
+	}
+
+	return (ret < 0) ? ret : tf.ntevs;
+}
+
+/* Collect available variables in this scope */
+static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct available_var_finder *af = data;
+	struct variable_list *vl;
+	struct strbuf buf = STRBUF_INIT;
+	int tag, ret;
+
+	vl = &af->vls[af->nvls - 1];
+
+	tag = dwarf_tag(die_mem);
+	if (tag == DW_TAG_formal_parameter ||
+	    tag == DW_TAG_variable) {
+		ret = convert_variable_location(die_mem, af->pf.addr,
+						af->pf.fb_ops, &af->pf.sp_die,
+						af->pf.machine, NULL);
+		if (ret == 0 || ret == -ERANGE) {
+			int ret2;
+			bool externs = !af->child;
+
+			if (strbuf_init(&buf, 64) < 0)
+				goto error;
+
+			if (probe_conf.show_location_range) {
+				if (!externs)
+					ret2 = strbuf_add(&buf,
+						ret ? "[INV]\t" : "[VAL]\t", 6);
+				else
+					ret2 = strbuf_add(&buf, "[EXT]\t", 6);
+				if (ret2)
+					goto error;
+			}
+
+			ret2 = die_get_varname(die_mem, &buf);
+
+			if (!ret2 && probe_conf.show_location_range &&
+				!externs) {
+				if (strbuf_addch(&buf, '\t') < 0)
+					goto error;
+				ret2 = die_get_var_range(&af->pf.sp_die,
+							die_mem, &buf);
+			}
+
+			pr_debug("Add new var: %s\n", buf.buf);
+			if (ret2 == 0) {
+				strlist__add(vl->vars,
+					strbuf_detach(&buf, NULL));
+			}
+			strbuf_release(&buf);
+		}
+	}
+
+	if (af->child && dwarf_haspc(die_mem, af->pf.addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+error:
+	strbuf_release(&buf);
+	pr_debug("Error in strbuf\n");
+	return DIE_FIND_CB_END;
+}
+
+/* Add a found vars into available variables list */
+static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+	struct available_var_finder *af =
+			container_of(pf, struct available_var_finder, pf);
+	struct perf_probe_point *pp = &pf->pev->point;
+	struct variable_list *vl;
+	Dwarf_Die die_mem;
+	int ret;
+
+	/* Check number of tevs */
+	if (af->nvls == af->max_vls) {
+		pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
+		return -ERANGE;
+	}
+	vl = &af->vls[af->nvls++];
+
+	/* Trace point should be converted from subprogram DIE */
+	ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr,
+				     pp->retprobe, pp->function, &vl->point);
+	if (ret < 0)
+		return ret;
+
+	pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
+		 vl->point.offset);
+
+	/* Find local variables */
+	vl->vars = strlist__new(NULL, NULL);
+	if (vl->vars == NULL)
+		return -ENOMEM;
+	af->child = true;
+	die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
+
+	/* Find external variables */
+	if (!probe_conf.show_ext_vars)
+		goto out;
+	/* Don't need to search child DIE for external vars. */
+	af->child = false;
+	die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
+
+out:
+	if (strlist__empty(vl->vars)) {
+		strlist__delete(vl->vars);
+		vl->vars = NULL;
+	}
+
+	return ret;
+}
+
+/*
+ * Find available variables at given probe point
+ * Return the number of found probe points. Return 0 if there is no
+ * matched probe point. Return <0 if an error occurs.
+ */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+				      struct perf_probe_event *pev,
+				      struct variable_list **vls)
+{
+	struct available_var_finder af = {
+			.pf = {.pev = pev, .callback = add_available_vars},
+			.mod = dbg->mod,
+			.max_vls = probe_conf.max_probes};
+	int ret;
+
+	/* Allocate result vls array */
+	*vls = zalloc(sizeof(struct variable_list) * af.max_vls);
+	if (*vls == NULL)
+		return -ENOMEM;
+
+	af.vls = *vls;
+	af.nvls = 0;
+
+	ret = debuginfo__find_probes(dbg, &af.pf);
+	if (ret < 0) {
+		/* Free vlist for error */
+		while (af.nvls--) {
+			zfree(&af.vls[af.nvls].point.symbol);
+			strlist__delete(af.vls[af.nvls].vars);
+		}
+		zfree(vls);
+		return ret;
+	}
+
+	return (ret < 0) ? ret : af.nvls;
+}
+
+/* For the kernel module, we need a special code to get a DIE */
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+				bool adjust_offset)
+{
+	int n, i;
+	Elf32_Word shndx;
+	Elf_Scn *scn;
+	Elf *elf;
+	GElf_Shdr mem, *shdr;
+	const char *p;
+
+	elf = dwfl_module_getelf(dbg->mod, &dbg->bias);
+	if (!elf)
+		return -EINVAL;
+
+	/* Get the number of relocations */
+	n = dwfl_module_relocations(dbg->mod);
+	if (n < 0)
+		return -ENOENT;
+	/* Search the relocation related .text section */
+	for (i = 0; i < n; i++) {
+		p = dwfl_module_relocation_info(dbg->mod, i, &shndx);
+		if (strcmp(p, ".text") == 0) {
+			/* OK, get the section header */
+			scn = elf_getscn(elf, shndx);
+			if (!scn)
+				return -ENOENT;
+			shdr = gelf_getshdr(scn, &mem);
+			if (!shdr)
+				return -ENOENT;
+			*offs = shdr->sh_addr;
+			if (adjust_offset)
+				*offs -= shdr->sh_offset;
+		}
+	}
+	return 0;
+}
+
+/* Reverse search */
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+				struct perf_probe_point *ppt)
+{
+	Dwarf_Die cudie, spdie, indie;
+	Dwarf_Addr _addr = 0, baseaddr = 0;
+	const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
+	int baseline = 0, lineno = 0, ret = 0;
+
+	/* We always need to relocate the address for aranges */
+	if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0)
+		addr += baseaddr;
+	/* Find cu die */
+	if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
+		pr_warning("Failed to find debug information for address %lx\n",
+			   addr);
+		ret = -EINVAL;
+		goto end;
+	}
+
+	/* Find a corresponding line (filename and lineno) */
+	cu_find_lineinfo(&cudie, addr, &fname, &lineno);
+	/* Don't care whether it failed or not */
+
+	/* Find a corresponding function (name, baseline and baseaddr) */
+	if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
+		/* Get function entry information */
+		func = basefunc = dwarf_diename(&spdie);
+		if (!func ||
+		    dwarf_entrypc(&spdie, &baseaddr) != 0 ||
+		    dwarf_decl_line(&spdie, &baseline) != 0) {
+			lineno = 0;
+			goto post;
+		}
+
+		fname = dwarf_decl_file(&spdie);
+		if (addr == (unsigned long)baseaddr) {
+			/* Function entry - Relative line number is 0 */
+			lineno = baseline;
+			goto post;
+		}
+
+		/* Track down the inline functions step by step */
+		while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr,
+						&indie)) {
+			/* There is an inline function */
+			if (dwarf_entrypc(&indie, &_addr) == 0 &&
+			    _addr == addr) {
+				/*
+				 * addr is at an inline function entry.
+				 * In this case, lineno should be the call-site
+				 * line number. (overwrite lineinfo)
+				 */
+				lineno = die_get_call_lineno(&indie);
+				fname = die_get_call_file(&indie);
+				break;
+			} else {
+				/*
+				 * addr is in an inline function body.
+				 * Since lineno points one of the lines
+				 * of the inline function, baseline should
+				 * be the entry line of the inline function.
+				 */
+				tmp = dwarf_diename(&indie);
+				if (!tmp ||
+				    dwarf_decl_line(&indie, &baseline) != 0)
+					break;
+				func = tmp;
+				spdie = indie;
+			}
+		}
+		/* Verify the lineno and baseline are in a same file */
+		tmp = dwarf_decl_file(&spdie);
+		if (!tmp || strcmp(tmp, fname) != 0)
+			lineno = 0;
+	}
+
+post:
+	/* Make a relative line number or an offset */
+	if (lineno)
+		ppt->line = lineno - baseline;
+	else if (basefunc) {
+		ppt->offset = addr - (unsigned long)baseaddr;
+		func = basefunc;
+	}
+
+	/* Duplicate strings */
+	if (func) {
+		ppt->function = strdup(func);
+		if (ppt->function == NULL) {
+			ret = -ENOMEM;
+			goto end;
+		}
+	}
+	if (fname) {
+		ppt->file = strdup(fname);
+		if (ppt->file == NULL) {
+			zfree(&ppt->function);
+			ret = -ENOMEM;
+			goto end;
+		}
+	}
+end:
+	if (ret == 0 && (fname || func))
+		ret = 1;	/* Found a point */
+	return ret;
+}
+
+/* Add a line and store the src path */
+static int line_range_add_line(const char *src, unsigned int lineno,
+			       struct line_range *lr)
+{
+	/* Copy source path */
+	if (!lr->path) {
+		lr->path = strdup(src);
+		if (lr->path == NULL)
+			return -ENOMEM;
+	}
+	return intlist__add(lr->line_list, lineno);
+}
+
+static int line_range_walk_cb(const char *fname, int lineno,
+			      Dwarf_Addr addr __maybe_unused,
+			      void *data)
+{
+	struct line_finder *lf = data;
+	int err;
+
+	if ((strtailcmp(fname, lf->fname) != 0) ||
+	    (lf->lno_s > lineno || lf->lno_e < lineno))
+		return 0;
+
+	err = line_range_add_line(fname, lineno, lf->lr);
+	if (err < 0 && err != -EEXIST)
+		return err;
+
+	return 0;
+}
+
+/* Find line range from its line number */
+static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
+{
+	int ret;
+
+	ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
+
+	/* Update status */
+	if (ret >= 0)
+		if (!intlist__empty(lf->lr->line_list))
+			ret = lf->found = 1;
+		else
+			ret = 0;	/* Lines are not found */
+	else {
+		zfree(&lf->lr->path);
+	}
+	return ret;
+}
+
+static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
+{
+	int ret = find_line_range_by_line(in_die, data);
+
+	/*
+	 * We have to check all instances of inlined function, because
+	 * some execution paths can be optimized out depends on the
+	 * function argument of instances. However, if an error occurs,
+	 * it should be handled by the caller.
+	 */
+	return ret < 0 ? ret : 0;
+}
+
+/* Search function definition from function name */
+static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct dwarf_callback_param *param = data;
+	struct line_finder *lf = param->data;
+	struct line_range *lr = lf->lr;
+
+	/* Check declared file */
+	if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
+		return DWARF_CB_OK;
+
+	if (die_is_func_def(sp_die) &&
+	    die_match_name(sp_die, lr->function)) {
+		lf->fname = dwarf_decl_file(sp_die);
+		dwarf_decl_line(sp_die, &lr->offset);
+		pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
+		lf->lno_s = lr->offset + lr->start;
+		if (lf->lno_s < 0)	/* Overflow */
+			lf->lno_s = INT_MAX;
+		lf->lno_e = lr->offset + lr->end;
+		if (lf->lno_e < 0)	/* Overflow */
+			lf->lno_e = INT_MAX;
+		pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
+		lr->start = lf->lno_s;
+		lr->end = lf->lno_e;
+		if (!die_is_func_instance(sp_die))
+			param->retval = die_walk_instances(sp_die,
+						line_range_inline_cb, lf);
+		else
+			param->retval = find_line_range_by_line(sp_die, lf);
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+static int find_line_range_by_func(struct line_finder *lf)
+{
+	struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
+	dwarf_getfuncs(&lf->cu_die, line_range_search_cb, &param, 0);
+	return param.retval;
+}
+
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr)
+{
+	struct line_finder lf = {.lr = lr, .found = 0};
+	int ret = 0;
+	Dwarf_Off off = 0, noff;
+	size_t cuhl;
+	Dwarf_Die *diep;
+	const char *comp_dir;
+
+	/* Fastpath: lookup by function name from .debug_pubnames section */
+	if (lr->function) {
+		struct pubname_callback_param pubname_param = {
+			.function = lr->function, .file = lr->file,
+			.cu_die = &lf.cu_die, .sp_die = &lf.sp_die, .found = 0};
+		struct dwarf_callback_param line_range_param = {
+			.data = (void *)&lf, .retval = 0};
+
+		dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+				  &pubname_param, 0);
+		if (pubname_param.found) {
+			line_range_search_cb(&lf.sp_die, &line_range_param);
+			if (lf.found)
+				goto found;
+		}
+	}
+
+	/* Loop on CUs (Compilation Unit) */
+	while (!lf.found && ret >= 0) {
+		if (dwarf_nextcu(dbg->dbg, off, &noff, &cuhl,
+				 NULL, NULL, NULL) != 0)
+			break;
+
+		/* Get the DIE(Debugging Information Entry) of this CU */
+		diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die);
+		if (!diep)
+			continue;
+
+		/* Check if target file is included. */
+		if (lr->file)
+			lf.fname = cu_find_realpath(&lf.cu_die, lr->file);
+		else
+			lf.fname = 0;
+
+		if (!lr->file || lf.fname) {
+			if (lr->function)
+				ret = find_line_range_by_func(&lf);
+			else {
+				lf.lno_s = lr->start;
+				lf.lno_e = lr->end;
+				ret = find_line_range_by_line(NULL, &lf);
+			}
+		}
+		off = noff;
+	}
+
+found:
+	/* Store comp_dir */
+	if (lf.found) {
+		comp_dir = cu_get_comp_dir(&lf.cu_die);
+		if (comp_dir) {
+			lr->comp_dir = strdup(comp_dir);
+			if (!lr->comp_dir)
+				ret = -ENOMEM;
+		}
+	}
+
+	pr_debug("path: %s\n", lr->path);
+	return (ret < 0) ? ret : lf.found;
+}
+
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+int get_real_path(const char *raw_path, const char *comp_dir,
+			 char **new_path)
+{
+	const char *prefix = symbol_conf.source_prefix;
+
+	if (!prefix) {
+		if (raw_path[0] != '/' && comp_dir)
+			/* If not an absolute path, try to use comp_dir */
+			prefix = comp_dir;
+		else {
+			if (access(raw_path, R_OK) == 0) {
+				*new_path = strdup(raw_path);
+				return *new_path ? 0 : -ENOMEM;
+			} else
+				return -errno;
+		}
+	}
+
+	*new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
+	if (!*new_path)
+		return -ENOMEM;
+
+	for (;;) {
+		sprintf(*new_path, "%s/%s", prefix, raw_path);
+
+		if (access(*new_path, R_OK) == 0)
+			return 0;
+
+		if (!symbol_conf.source_prefix) {
+			/* In case of searching comp_dir, don't retry */
+			zfree(new_path);
+			return -errno;
+		}
+
+		switch (errno) {
+		case ENAMETOOLONG:
+		case ENOENT:
+		case EROFS:
+		case EFAULT:
+			raw_path = strchr(++raw_path, '/');
+			if (!raw_path) {
+				zfree(new_path);
+				return -ENOENT;
+			}
+			continue;
+
+		default:
+			zfree(new_path);
+			return -errno;
+		}
+	}
+}
diff --git a/util/probe-finder.h b/util/probe-finder.h
new file mode 100644
index 0000000..1625298
--- /dev/null
+++ b/util/probe-finder.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_FINDER_H
+#define _PROBE_FINDER_H
+
+#include <stdbool.h>
+#include "intlist.h"
+#include "probe-event.h"
+#include "sane_ctype.h"
+
+#define MAX_PROBE_BUFFER	1024
+#define MAX_PROBES		 128
+#define MAX_PROBE_ARGS		 128
+
+#define PROBE_ARG_VARS		"$vars"
+#define PROBE_ARG_PARAMS	"$params"
+
+static inline int is_c_varname(const char *name)
+{
+	/* TODO */
+	return isalpha(name[0]) || name[0] == '_';
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+#include "dwarf-aux.h"
+
+/* TODO: export debuginfo data structure even if no dwarf support */
+
+/* debug information structure */
+struct debuginfo {
+	Dwarf		*dbg;
+	Dwfl_Module	*mod;
+	Dwfl		*dwfl;
+	Dwarf_Addr	bias;
+};
+
+/* This also tries to open distro debuginfo */
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+				 struct perf_probe_event *pev,
+				 struct probe_trace_event **tevs);
+
+/* Find a perf_probe_point from debuginfo */
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+				struct perf_probe_point *ppt);
+
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+			       bool adjust_offset);
+
+/* Find a line range */
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
+
+/* Find available variables */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+				      struct perf_probe_event *pev,
+				      struct variable_list **vls);
+
+/* Find a src file from a DWARF tag path */
+int get_real_path(const char *raw_path, const char *comp_dir,
+			 char **new_path);
+
+struct probe_finder {
+	struct perf_probe_event	*pev;		/* Target probe event */
+
+	/* Callback when a probe point is found */
+	int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
+
+	/* For function searching */
+	int			lno;		/* Line number */
+	Dwarf_Addr		addr;		/* Address */
+	const char		*fname;		/* Real file name */
+	Dwarf_Die		cu_die;		/* Current CU */
+	Dwarf_Die		sp_die;
+	struct intlist		*lcache;	/* Line cache for lazy match */
+
+	/* For variable searching */
+#if _ELFUTILS_PREREQ(0, 142)
+	/* Call Frame Information from .eh_frame */
+	Dwarf_CFI		*cfi_eh;
+	/* Call Frame Information from .debug_frame */
+	Dwarf_CFI		*cfi_dbg;
+#endif
+	Dwarf_Op		*fb_ops;	/* Frame base attribute */
+	unsigned int		machine;	/* Target machine arch */
+	struct perf_probe_arg	*pvar;		/* Current target variable */
+	struct probe_trace_arg	*tvar;		/* Current result variable */
+};
+
+struct trace_event_finder {
+	struct probe_finder	pf;
+	Dwfl_Module		*mod;		/* For solving symbols */
+	struct probe_trace_event *tevs;		/* Found trace events */
+	int			ntevs;		/* Number of trace events */
+	int			max_tevs;	/* Max number of trace events */
+};
+
+struct available_var_finder {
+	struct probe_finder	pf;
+	Dwfl_Module		*mod;		/* For solving symbols */
+	struct variable_list	*vls;		/* Found variable lists */
+	int			nvls;		/* Number of variable lists */
+	int			max_vls;	/* Max no. of variable lists */
+	bool			child;		/* Search child scopes */
+};
+
+struct line_finder {
+	struct line_range	*lr;		/* Target line range */
+
+	const char		*fname;		/* File name */
+	int			lno_s;		/* Start line number */
+	int			lno_e;		/* End line number */
+	Dwarf_Die		cu_die;		/* Current CU */
+	Dwarf_Die		sp_die;
+	int			found;
+};
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#endif /*_PROBE_FINDER_H */
diff --git a/util/pstack.c b/util/pstack.c
new file mode 100644
index 0000000..797fe1a
--- /dev/null
+++ b/util/pstack.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple pointer stack
+ *
+ * (c) 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "util.h"
+#include "pstack.h"
+#include "debug.h"
+#include <linux/kernel.h>
+#include <stdlib.h>
+
+struct pstack {
+	unsigned short	top;
+	unsigned short	max_nr_entries;
+	void		*entries[0];
+};
+
+struct pstack *pstack__new(unsigned short max_nr_entries)
+{
+	struct pstack *pstack = zalloc((sizeof(*pstack) +
+				       max_nr_entries * sizeof(void *)));
+	if (pstack != NULL)
+		pstack->max_nr_entries = max_nr_entries;
+	return pstack;
+}
+
+void pstack__delete(struct pstack *pstack)
+{
+	free(pstack);
+}
+
+bool pstack__empty(const struct pstack *pstack)
+{
+	return pstack->top == 0;
+}
+
+void pstack__remove(struct pstack *pstack, void *key)
+{
+	unsigned short i = pstack->top, last_index = pstack->top - 1;
+
+	while (i-- != 0) {
+		if (pstack->entries[i] == key) {
+			if (i < last_index)
+				memmove(pstack->entries + i,
+					pstack->entries + i + 1,
+					(last_index - i) * sizeof(void *));
+			--pstack->top;
+			return;
+		}
+	}
+	pr_err("%s: %p not on the pstack!\n", __func__, key);
+}
+
+void pstack__push(struct pstack *pstack, void *key)
+{
+	if (pstack->top == pstack->max_nr_entries) {
+		pr_err("%s: top=%d, overflow!\n", __func__, pstack->top);
+		return;
+	}
+	pstack->entries[pstack->top++] = key;
+}
+
+void *pstack__pop(struct pstack *pstack)
+{
+	void *ret;
+
+	if (pstack->top == 0) {
+		pr_err("%s: underflow!\n", __func__);
+		return NULL;
+	}
+
+	ret = pstack->entries[--pstack->top];
+	pstack->entries[pstack->top] = NULL;
+	return ret;
+}
+
+void *pstack__peek(struct pstack *pstack)
+{
+	if (pstack->top == 0)
+		return NULL;
+	return pstack->entries[pstack->top - 1];
+}
diff --git a/util/pstack.h b/util/pstack.h
new file mode 100644
index 0000000..8729b8b
--- /dev/null
+++ b/util/pstack.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PSTACK_
+#define _PERF_PSTACK_
+
+#include <stdbool.h>
+
+struct pstack;
+struct pstack *pstack__new(unsigned short max_nr_entries);
+void pstack__delete(struct pstack *pstack);
+bool pstack__empty(const struct pstack *pstack);
+void pstack__remove(struct pstack *pstack, void *key);
+void pstack__push(struct pstack *pstack, void *key);
+void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
+
+#endif /* _PERF_PSTACK_ */
diff --git a/util/python-ext-sources b/util/python-ext-sources
new file mode 100644
index 0000000..7aa0ea6
--- /dev/null
+++ b/util/python-ext-sources
@@ -0,0 +1,33 @@
+#
+# List of files needed by perf python extension
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+util/ctype.c
+util/evlist.c
+util/evsel.c
+util/cpumap.c
+util/memswap.c
+util/mmap.c
+util/namespaces.c
+../lib/bitmap.c
+../lib/find_bit.c
+../lib/hweight.c
+../lib/vsprintf.c
+util/thread_map.c
+util/util.c
+util/xyarray.c
+util/cgroup.c
+util/parse-branch-options.c
+util/rblist.c
+util/counts.c
+util/print_binary.c
+util/strlist.c
+util/trace-event.c
+../lib/rbtree.c
+util/string.c
+util/symbol_fprintf.c
+util/units.c
diff --git a/util/python.c b/util/python.c
new file mode 100644
index 0000000..863b614
--- /dev/null
+++ b/util/python.c
@@ -0,0 +1,1358 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <Python.h>
+#include <structmember.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <linux/err.h>
+#include "evlist.h"
+#include "callchain.h"
+#include "evsel.h"
+#include "event.h"
+#include "cpumap.h"
+#include "print_binary.h"
+#include "thread_map.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+  PyString_FromString(arg)
+#define _PyUnicode_AsString(arg) \
+  PyString_AsString(arg)
+#define _PyUnicode_FromFormat(...) \
+  PyString_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+  PyInt_FromLong(arg)
+
+#else
+
+#define _PyUnicode_FromString(arg) \
+  PyUnicode_FromString(arg)
+#define _PyUnicode_FromFormat(...) \
+  PyUnicode_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+  PyLong_FromLong(arg)
+#endif
+
+#ifndef Py_TYPE
+#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+
+/*
+ * Provide these two so that we don't have to link against callchain.c and
+ * start dragging hist.c, etc.
+ */
+struct callchain_param callchain_param;
+
+int parse_callchain_record(const char *arg __maybe_unused,
+			   struct callchain_param *param __maybe_unused)
+{
+	return 0;
+}
+
+/*
+ * Support debug printing even though util/debug.c is not linked.  That means
+ * implementing 'verbose' and 'eprintf'.
+ */
+int verbose;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (var >= level) {
+		va_start(args, fmt);
+		ret = vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
+/* Define PyVarObject_HEAD_INIT for python 2.5 */
+#ifndef PyVarObject_HEAD_INIT
+# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void);
+#else
+PyMODINIT_FUNC PyInit_perf(void);
+#endif
+
+#define member_def(type, member, ptype, help) \
+	{ #member, ptype, \
+	  offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
+	  0, help }
+
+#define sample_member_def(name, member, ptype, help) \
+	{ #name, ptype, \
+	  offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
+	  0, help }
+
+struct pyrf_event {
+	PyObject_HEAD
+	struct perf_evsel *evsel;
+	struct perf_sample sample;
+	union perf_event   event;
+};
+
+#define sample_members \
+	sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"),			 \
+	sample_member_def(sample_pid, pid, T_INT, "event pid"),			 \
+	sample_member_def(sample_tid, tid, T_INT, "event tid"),			 \
+	sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"),		 \
+	sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"),		 \
+	sample_member_def(sample_id, id, T_ULONGLONG, "event id"),			 \
+	sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
+	sample_member_def(sample_period, period, T_ULONGLONG, "event period"),		 \
+	sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
+
+static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+
+static PyMemberDef pyrf_mmap_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(perf_event_header, misc, T_UINT, "event misc"),
+	member_def(mmap_event, pid, T_UINT, "event pid"),
+	member_def(mmap_event, tid, T_UINT, "event tid"),
+	member_def(mmap_event, start, T_ULONGLONG, "start of the map"),
+	member_def(mmap_event, len, T_ULONGLONG, "map length"),
+	member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"),
+	member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", "
+			 "length: %#" PRIx64 ", offset: %#" PRIx64 ", "
+			 "filename: %s }",
+		     pevent->event.mmap.pid, pevent->event.mmap.tid,
+		     pevent->event.mmap.start, pevent->event.mmap.len,
+		     pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static PyTypeObject pyrf_mmap_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.mmap_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_mmap_event__doc,
+	.tp_members	= pyrf_mmap_event__members,
+	.tp_repr	= (reprfunc)pyrf_mmap_event__repr,
+};
+
+static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+
+static PyMemberDef pyrf_task_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(fork_event, pid, T_UINT, "event pid"),
+	member_def(fork_event, ppid, T_UINT, "event ppid"),
+	member_def(fork_event, tid, T_UINT, "event tid"),
+	member_def(fork_event, ptid, T_UINT, "event ptid"),
+	member_def(fork_event, time, T_ULONGLONG, "timestamp"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+{
+	return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+				   "ptid: %u, time: %" PRIu64 "}",
+				   pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
+				   pevent->event.fork.pid,
+				   pevent->event.fork.ppid,
+				   pevent->event.fork.tid,
+				   pevent->event.fork.ptid,
+				   pevent->event.fork.time);
+}
+
+static PyTypeObject pyrf_task_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.task_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_task_event__doc,
+	.tp_members	= pyrf_task_event__members,
+	.tp_repr	= (reprfunc)pyrf_task_event__repr,
+};
+
+static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+
+static PyMemberDef pyrf_comm_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(comm_event, pid, T_UINT, "event pid"),
+	member_def(comm_event, tid, T_UINT, "event tid"),
+	member_def(comm_event, comm, T_STRING_INPLACE, "process name"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+{
+	return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+				   pevent->event.comm.pid,
+				   pevent->event.comm.tid,
+				   pevent->event.comm.comm);
+}
+
+static PyTypeObject pyrf_comm_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.comm_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_comm_event__doc,
+	.tp_members	= pyrf_comm_event__members,
+	.tp_repr	= (reprfunc)pyrf_comm_event__repr,
+};
+
+static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+
+static PyMemberDef pyrf_throttle_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(throttle_event, time, T_ULONGLONG, "timestamp"),
+	member_def(throttle_event, id, T_ULONGLONG, "event id"),
+	member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+{
+	struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
+
+	return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
+				   ", stream_id: %" PRIu64 " }",
+				   pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
+				   te->time, te->id, te->stream_id);
+}
+
+static PyTypeObject pyrf_throttle_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.throttle_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_throttle_event__doc,
+	.tp_members	= pyrf_throttle_event__members,
+	.tp_repr	= (reprfunc)pyrf_throttle_event__repr,
+};
+
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+	sample_members
+	member_def(lost_event, id, T_ULONGLONG, "event id"),
+	member_def(lost_event, lost, T_ULONGLONG, "number of lost events"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: lost, id: %#" PRIx64 ", "
+			 "lost: %#" PRIx64 " }",
+		     pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.lost_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_lost_event__doc,
+	.tp_members	= pyrf_lost_event__members,
+	.tp_repr	= (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+	sample_members
+	member_def(read_event, pid, T_UINT, "event pid"),
+	member_def(read_event, tid, T_UINT, "event tid"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+	return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
+				   pevent->event.read.pid,
+				   pevent->event.read.tid);
+	/*
+ 	 * FIXME: return the array of read values,
+ 	 * making this method useful ;-)
+ 	 */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.read_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_read_event__doc,
+	.tp_members	= pyrf_read_event__members,
+	.tp_repr	= (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: sample }") < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static bool is_tracepoint(struct pyrf_event *pevent)
+{
+	return pevent->evsel->attr.type == PERF_TYPE_TRACEPOINT;
+}
+
+static PyObject*
+tracepoint_field(struct pyrf_event *pe, struct format_field *field)
+{
+	struct pevent *pevent = field->event->pevent;
+	void *data = pe->sample.raw_data;
+	PyObject *ret = NULL;
+	unsigned long long val;
+	unsigned int offset, len;
+
+	if (field->flags & FIELD_IS_ARRAY) {
+		offset = field->offset;
+		len    = field->size;
+		if (field->flags & FIELD_IS_DYNAMIC) {
+			val     = pevent_read_number(pevent, data + offset, len);
+			offset  = val;
+			len     = offset >> 16;
+			offset &= 0xffff;
+		}
+		if (field->flags & FIELD_IS_STRING &&
+		    is_printable_array(data + offset, len)) {
+			ret = _PyUnicode_FromString((char *)data + offset);
+		} else {
+			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+			field->flags &= ~FIELD_IS_STRING;
+		}
+	} else {
+		val = pevent_read_number(pevent, data + field->offset,
+					 field->size);
+		if (field->flags & FIELD_IS_POINTER)
+			ret = PyLong_FromUnsignedLong((unsigned long) val);
+		else if (field->flags & FIELD_IS_SIGNED)
+			ret = PyLong_FromLong((long) val);
+		else
+			ret = PyLong_FromUnsignedLong((unsigned long) val);
+	}
+
+	return ret;
+}
+
+static PyObject*
+get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
+{
+	const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
+	struct perf_evsel *evsel = pevent->evsel;
+	struct format_field *field;
+
+	if (!evsel->tp_format) {
+		struct event_format *tp_format;
+
+		tp_format = trace_event__tp_format_id(evsel->attr.config);
+		if (!tp_format)
+			return NULL;
+
+		evsel->tp_format = tp_format;
+	}
+
+	field = pevent_find_any_field(evsel->tp_format, str);
+	if (!field)
+		return NULL;
+
+	return tracepoint_field(pevent, field);
+}
+
+static PyObject*
+pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name)
+{
+	PyObject *obj = NULL;
+
+	if (is_tracepoint(pevent))
+		obj = get_tracepoint_field(pevent, attr_name);
+
+	return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name);
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.sample_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_sample_event__doc,
+	.tp_members	= pyrf_sample_event__members,
+	.tp_repr	= (reprfunc)pyrf_sample_event__repr,
+	.tp_getattro	= (getattrofunc) pyrf_sample_event__getattro,
+};
+
+static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
+
+static PyMemberDef pyrf_context_switch_event__members[] = {
+	sample_members
+	member_def(perf_event_header, type, T_UINT, "event type"),
+	member_def(context_switch_event, next_prev_pid, T_UINT, "next/prev pid"),
+	member_def(context_switch_event, next_prev_tid, T_UINT, "next/prev tid"),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
+{
+	PyObject *ret;
+	char *s;
+
+	if (asprintf(&s, "{ type: context_switch, next_prev_pid: %u, next_prev_tid: %u, switch_out: %u }",
+		     pevent->event.context_switch.next_prev_pid,
+		     pevent->event.context_switch.next_prev_tid,
+		     !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
+		ret = PyErr_NoMemory();
+	} else {
+		ret = _PyUnicode_FromString(s);
+		free(s);
+	}
+	return ret;
+}
+
+static PyTypeObject pyrf_context_switch_event__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.context_switch_event",
+	.tp_basicsize	= sizeof(struct pyrf_event),
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_context_switch_event__doc,
+	.tp_members	= pyrf_context_switch_event__members,
+	.tp_repr	= (reprfunc)pyrf_context_switch_event__repr,
+};
+
+static int pyrf_event__setup_types(void)
+{
+	int err;
+	pyrf_mmap_event__type.tp_new =
+	pyrf_task_event__type.tp_new =
+	pyrf_comm_event__type.tp_new =
+	pyrf_lost_event__type.tp_new =
+	pyrf_read_event__type.tp_new =
+	pyrf_sample_event__type.tp_new =
+	pyrf_context_switch_event__type.tp_new =
+	pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+	err = PyType_Ready(&pyrf_mmap_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_lost_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_task_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_comm_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_throttle_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_read_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_sample_event__type);
+	if (err < 0)
+		goto out;
+	err = PyType_Ready(&pyrf_context_switch_event__type);
+	if (err < 0)
+		goto out;
+out:
+	return err;
+}
+
+static PyTypeObject *pyrf_event__type[] = {
+	[PERF_RECORD_MMAP]	 = &pyrf_mmap_event__type,
+	[PERF_RECORD_LOST]	 = &pyrf_lost_event__type,
+	[PERF_RECORD_COMM]	 = &pyrf_comm_event__type,
+	[PERF_RECORD_EXIT]	 = &pyrf_task_event__type,
+	[PERF_RECORD_THROTTLE]	 = &pyrf_throttle_event__type,
+	[PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
+	[PERF_RECORD_FORK]	 = &pyrf_task_event__type,
+	[PERF_RECORD_READ]	 = &pyrf_read_event__type,
+	[PERF_RECORD_SAMPLE]	 = &pyrf_sample_event__type,
+	[PERF_RECORD_SWITCH]	 = &pyrf_context_switch_event__type,
+	[PERF_RECORD_SWITCH_CPU_WIDE]  = &pyrf_context_switch_event__type,
+};
+
+static PyObject *pyrf_event__new(union perf_event *event)
+{
+	struct pyrf_event *pevent;
+	PyTypeObject *ptype;
+
+	if ((event->header.type < PERF_RECORD_MMAP ||
+	     event->header.type > PERF_RECORD_SAMPLE) &&
+	    !(event->header.type == PERF_RECORD_SWITCH ||
+	      event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
+		return NULL;
+
+	ptype = pyrf_event__type[event->header.type];
+	pevent = PyObject_New(struct pyrf_event, ptype);
+	if (pevent != NULL)
+		memcpy(&pevent->event, event, event->header.size);
+	return (PyObject *)pevent;
+}
+
+struct pyrf_cpu_map {
+	PyObject_HEAD
+
+	struct cpu_map *cpus;
+};
+
+static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
+			      PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = { "cpustr", NULL };
+	char *cpustr = NULL;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
+					 kwlist, &cpustr))
+		return -1;
+
+	pcpus->cpus = cpu_map__new(cpustr);
+	if (pcpus->cpus == NULL)
+		return -1;
+	return 0;
+}
+
+static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
+{
+	cpu_map__put(pcpus->cpus);
+	Py_TYPE(pcpus)->tp_free((PyObject*)pcpus);
+}
+
+static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
+{
+	struct pyrf_cpu_map *pcpus = (void *)obj;
+
+	return pcpus->cpus->nr;
+}
+
+static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
+{
+	struct pyrf_cpu_map *pcpus = (void *)obj;
+
+	if (i >= pcpus->cpus->nr)
+		return NULL;
+
+	return Py_BuildValue("i", pcpus->cpus->map[i]);
+}
+
+static PySequenceMethods pyrf_cpu_map__sequence_methods = {
+	.sq_length = pyrf_cpu_map__length,
+	.sq_item   = pyrf_cpu_map__item,
+};
+
+static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+
+static PyTypeObject pyrf_cpu_map__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.cpu_map",
+	.tp_basicsize	= sizeof(struct pyrf_cpu_map),
+	.tp_dealloc	= (destructor)pyrf_cpu_map__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_cpu_map__doc,
+	.tp_as_sequence	= &pyrf_cpu_map__sequence_methods,
+	.tp_init	= (initproc)pyrf_cpu_map__init,
+};
+
+static int pyrf_cpu_map__setup_types(void)
+{
+	pyrf_cpu_map__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_cpu_map__type);
+}
+
+struct pyrf_thread_map {
+	PyObject_HEAD
+
+	struct thread_map *threads;
+};
+
+static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
+				 PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = { "pid", "tid", "uid", NULL };
+	int pid = -1, tid = -1, uid = UINT_MAX;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+					 kwlist, &pid, &tid, &uid))
+		return -1;
+
+	pthreads->threads = thread_map__new(pid, tid, uid);
+	if (pthreads->threads == NULL)
+		return -1;
+	return 0;
+}
+
+static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
+{
+	thread_map__put(pthreads->threads);
+	Py_TYPE(pthreads)->tp_free((PyObject*)pthreads);
+}
+
+static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
+{
+	struct pyrf_thread_map *pthreads = (void *)obj;
+
+	return pthreads->threads->nr;
+}
+
+static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
+{
+	struct pyrf_thread_map *pthreads = (void *)obj;
+
+	if (i >= pthreads->threads->nr)
+		return NULL;
+
+	return Py_BuildValue("i", pthreads->threads->map[i]);
+}
+
+static PySequenceMethods pyrf_thread_map__sequence_methods = {
+	.sq_length = pyrf_thread_map__length,
+	.sq_item   = pyrf_thread_map__item,
+};
+
+static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+
+static PyTypeObject pyrf_thread_map__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.thread_map",
+	.tp_basicsize	= sizeof(struct pyrf_thread_map),
+	.tp_dealloc	= (destructor)pyrf_thread_map__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_thread_map__doc,
+	.tp_as_sequence	= &pyrf_thread_map__sequence_methods,
+	.tp_init	= (initproc)pyrf_thread_map__init,
+};
+
+static int pyrf_thread_map__setup_types(void)
+{
+	pyrf_thread_map__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_thread_map__type);
+}
+
+struct pyrf_evsel {
+	PyObject_HEAD
+
+	struct perf_evsel evsel;
+};
+
+static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
+			    PyObject *args, PyObject *kwargs)
+{
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+		.sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
+	};
+	static char *kwlist[] = {
+		"type",
+		"config",
+		"sample_freq",
+		"sample_period",
+		"sample_type",
+		"read_format",
+		"disabled",
+		"inherit",
+		"pinned",
+		"exclusive",
+		"exclude_user",
+		"exclude_kernel",
+		"exclude_hv",
+		"exclude_idle",
+		"mmap",
+		"context_switch",
+		"comm",
+		"freq",
+		"inherit_stat",
+		"enable_on_exec",
+		"task",
+		"watermark",
+		"precise_ip",
+		"mmap_data",
+		"sample_id_all",
+		"wakeup_events",
+		"bp_type",
+		"bp_addr",
+		"bp_len",
+		 NULL
+	};
+	u64 sample_period = 0;
+	u32 disabled = 0,
+	    inherit = 0,
+	    pinned = 0,
+	    exclusive = 0,
+	    exclude_user = 0,
+	    exclude_kernel = 0,
+	    exclude_hv = 0,
+	    exclude_idle = 0,
+	    mmap = 0,
+	    context_switch = 0,
+	    comm = 0,
+	    freq = 1,
+	    inherit_stat = 0,
+	    enable_on_exec = 0,
+	    task = 0,
+	    watermark = 0,
+	    precise_ip = 0,
+	    mmap_data = 0,
+	    sample_id_all = 1;
+	int idx = 0;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+					 "|iKiKKiiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
+					 &attr.type, &attr.config, &attr.sample_freq,
+					 &sample_period, &attr.sample_type,
+					 &attr.read_format, &disabled, &inherit,
+					 &pinned, &exclusive, &exclude_user,
+					 &exclude_kernel, &exclude_hv, &exclude_idle,
+					 &mmap, &context_switch, &comm, &freq, &inherit_stat,
+					 &enable_on_exec, &task, &watermark,
+					 &precise_ip, &mmap_data, &sample_id_all,
+					 &attr.wakeup_events, &attr.bp_type,
+					 &attr.bp_addr, &attr.bp_len, &idx))
+		return -1;
+
+	/* union... */
+	if (sample_period != 0) {
+		if (attr.sample_freq != 0)
+			return -1; /* FIXME: throw right exception */
+		attr.sample_period = sample_period;
+	}
+
+	/* Bitfields */
+	attr.disabled	    = disabled;
+	attr.inherit	    = inherit;
+	attr.pinned	    = pinned;
+	attr.exclusive	    = exclusive;
+	attr.exclude_user   = exclude_user;
+	attr.exclude_kernel = exclude_kernel;
+	attr.exclude_hv	    = exclude_hv;
+	attr.exclude_idle   = exclude_idle;
+	attr.mmap	    = mmap;
+	attr.context_switch = context_switch;
+	attr.comm	    = comm;
+	attr.freq	    = freq;
+	attr.inherit_stat   = inherit_stat;
+	attr.enable_on_exec = enable_on_exec;
+	attr.task	    = task;
+	attr.watermark	    = watermark;
+	attr.precise_ip	    = precise_ip;
+	attr.mmap_data	    = mmap_data;
+	attr.sample_id_all  = sample_id_all;
+	attr.size	    = sizeof(attr);
+
+	perf_evsel__init(&pevsel->evsel, &attr, idx);
+	return 0;
+}
+
+static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
+{
+	perf_evsel__exit(&pevsel->evsel);
+	Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
+}
+
+static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
+				  PyObject *args, PyObject *kwargs)
+{
+	struct perf_evsel *evsel = &pevsel->evsel;
+	struct cpu_map *cpus = NULL;
+	struct thread_map *threads = NULL;
+	PyObject *pcpus = NULL, *pthreads = NULL;
+	int group = 0, inherit = 0;
+	static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
+					 &pcpus, &pthreads, &group, &inherit))
+		return NULL;
+
+	if (pthreads != NULL)
+		threads = ((struct pyrf_thread_map *)pthreads)->threads;
+
+	if (pcpus != NULL)
+		cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+
+	evsel->attr.inherit = inherit;
+	/*
+	 * This will group just the fds for this single evsel, to group
+	 * multiple events, use evlist.open().
+	 */
+	if (perf_evsel__open(evsel, cpus, threads) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyMethodDef pyrf_evsel__methods[] = {
+	{
+		.ml_name  = "open",
+		.ml_meth  = (PyCFunction)pyrf_evsel__open,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("open the event selector file descriptor table.")
+	},
+	{ .ml_name = NULL, }
+};
+
+static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evsel__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.evsel",
+	.tp_basicsize	= sizeof(struct pyrf_evsel),
+	.tp_dealloc	= (destructor)pyrf_evsel__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_evsel__doc,
+	.tp_methods	= pyrf_evsel__methods,
+	.tp_init	= (initproc)pyrf_evsel__init,
+};
+
+static int pyrf_evsel__setup_types(void)
+{
+	pyrf_evsel__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_evsel__type);
+}
+
+struct pyrf_evlist {
+	PyObject_HEAD
+
+	struct perf_evlist evlist;
+};
+
+static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
+			     PyObject *args, PyObject *kwargs __maybe_unused)
+{
+	PyObject *pcpus = NULL, *pthreads = NULL;
+	struct cpu_map *cpus;
+	struct thread_map *threads;
+
+	if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
+		return -1;
+
+	threads = ((struct pyrf_thread_map *)pthreads)->threads;
+	cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+	perf_evlist__init(&pevlist->evlist, cpus, threads);
+	return 0;
+}
+
+static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
+{
+	perf_evlist__exit(&pevlist->evlist);
+	Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
+}
+
+static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
+				   PyObject *args, PyObject *kwargs)
+{
+	struct perf_evlist *evlist = &pevlist->evlist;
+	static char *kwlist[] = { "pages", "overwrite", NULL };
+	int pages = 128, overwrite = false;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
+					 &pages, &overwrite))
+		return NULL;
+
+	if (perf_evlist__mmap(evlist, pages) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
+				   PyObject *args, PyObject *kwargs)
+{
+	struct perf_evlist *evlist = &pevlist->evlist;
+	static char *kwlist[] = { "timeout", NULL };
+	int timeout = -1, n;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
+		return NULL;
+
+	n = perf_evlist__poll(evlist, timeout);
+	if (n < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	return Py_BuildValue("i", n);
+}
+
+static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
+					 PyObject *args __maybe_unused,
+					 PyObject *kwargs __maybe_unused)
+{
+	struct perf_evlist *evlist = &pevlist->evlist;
+        PyObject *list = PyList_New(0);
+	int i;
+
+	for (i = 0; i < evlist->pollfd.nr; ++i) {
+		PyObject *file;
+#if PY_MAJOR_VERSION < 3
+		FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
+
+		if (fp == NULL)
+			goto free_list;
+
+		file = PyFile_FromFile(fp, "perf", "r", NULL);
+#else
+		file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
+#endif
+		if (file == NULL)
+			goto free_list;
+
+		if (PyList_Append(list, file) != 0) {
+			Py_DECREF(file);
+			goto free_list;
+		}
+
+		Py_DECREF(file);
+	}
+
+	return list;
+free_list:
+	return PyErr_NoMemory();
+}
+
+
+static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
+				  PyObject *args,
+				  PyObject *kwargs __maybe_unused)
+{
+	struct perf_evlist *evlist = &pevlist->evlist;
+	PyObject *pevsel;
+	struct perf_evsel *evsel;
+
+	if (!PyArg_ParseTuple(args, "O", &pevsel))
+		return NULL;
+
+	Py_INCREF(pevsel);
+	evsel = &((struct pyrf_evsel *)pevsel)->evsel;
+	evsel->idx = evlist->nr_entries;
+	perf_evlist__add(evlist, evsel);
+
+	return Py_BuildValue("i", evlist->nr_entries);
+}
+
+static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
+					  PyObject *args, PyObject *kwargs)
+{
+	struct perf_evlist *evlist = &pevlist->evlist;
+	union perf_event *event;
+	int sample_id_all = 1, cpu;
+	static char *kwlist[] = { "cpu", "sample_id_all", NULL };
+	struct perf_mmap *md;
+	int err;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
+					 &cpu, &sample_id_all))
+		return NULL;
+
+	md = &evlist->mmap[cpu];
+	if (perf_mmap__read_init(md) < 0)
+		goto end;
+
+	event = perf_mmap__read_event(md);
+	if (event != NULL) {
+		PyObject *pyevent = pyrf_event__new(event);
+		struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
+		struct perf_evsel *evsel;
+
+		if (pyevent == NULL)
+			return PyErr_NoMemory();
+
+		evsel = perf_evlist__event2evsel(evlist, event);
+		if (!evsel) {
+			Py_INCREF(Py_None);
+			return Py_None;
+		}
+
+		pevent->evsel = evsel;
+
+		err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
+
+		/* Consume the even only after we parsed it out. */
+		perf_mmap__consume(md);
+
+		if (err)
+			return PyErr_Format(PyExc_OSError,
+					    "perf: can't parse sample, err=%d", err);
+		return pyevent;
+	}
+end:
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
+				   PyObject *args, PyObject *kwargs)
+{
+	struct perf_evlist *evlist = &pevlist->evlist;
+	int group = 0;
+	static char *kwlist[] = { "group", NULL };
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group))
+		return NULL;
+
+	if (group)
+		perf_evlist__set_leader(evlist);
+
+	if (perf_evlist__open(evlist) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+static PyMethodDef pyrf_evlist__methods[] = {
+	{
+		.ml_name  = "mmap",
+		.ml_meth  = (PyCFunction)pyrf_evlist__mmap,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("mmap the file descriptor table.")
+	},
+	{
+		.ml_name  = "open",
+		.ml_meth  = (PyCFunction)pyrf_evlist__open,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("open the file descriptors.")
+	},
+	{
+		.ml_name  = "poll",
+		.ml_meth  = (PyCFunction)pyrf_evlist__poll,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("poll the file descriptor table.")
+	},
+	{
+		.ml_name  = "get_pollfd",
+		.ml_meth  = (PyCFunction)pyrf_evlist__get_pollfd,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("get the poll file descriptor table.")
+	},
+	{
+		.ml_name  = "add",
+		.ml_meth  = (PyCFunction)pyrf_evlist__add,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("adds an event selector to the list.")
+	},
+	{
+		.ml_name  = "read_on_cpu",
+		.ml_meth  = (PyCFunction)pyrf_evlist__read_on_cpu,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("reads an event.")
+	},
+	{ .ml_name = NULL, }
+};
+
+static Py_ssize_t pyrf_evlist__length(PyObject *obj)
+{
+	struct pyrf_evlist *pevlist = (void *)obj;
+
+	return pevlist->evlist.nr_entries;
+}
+
+static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
+{
+	struct pyrf_evlist *pevlist = (void *)obj;
+	struct perf_evsel *pos;
+
+	if (i >= pevlist->evlist.nr_entries)
+		return NULL;
+
+	evlist__for_each_entry(&pevlist->evlist, pos) {
+		if (i-- == 0)
+			break;
+	}
+
+	return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
+}
+
+static PySequenceMethods pyrf_evlist__sequence_methods = {
+	.sq_length = pyrf_evlist__length,
+	.sq_item   = pyrf_evlist__item,
+};
+
+static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evlist__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.evlist",
+	.tp_basicsize	= sizeof(struct pyrf_evlist),
+	.tp_dealloc	= (destructor)pyrf_evlist__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_as_sequence	= &pyrf_evlist__sequence_methods,
+	.tp_doc		= pyrf_evlist__doc,
+	.tp_methods	= pyrf_evlist__methods,
+	.tp_init	= (initproc)pyrf_evlist__init,
+};
+
+static int pyrf_evlist__setup_types(void)
+{
+	pyrf_evlist__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_evlist__type);
+}
+
+#define PERF_CONST(name) { #name, PERF_##name }
+
+static struct {
+	const char *name;
+	int	    value;
+} perf__constants[] = {
+	PERF_CONST(TYPE_HARDWARE),
+	PERF_CONST(TYPE_SOFTWARE),
+	PERF_CONST(TYPE_TRACEPOINT),
+	PERF_CONST(TYPE_HW_CACHE),
+	PERF_CONST(TYPE_RAW),
+	PERF_CONST(TYPE_BREAKPOINT),
+
+	PERF_CONST(COUNT_HW_CPU_CYCLES),
+	PERF_CONST(COUNT_HW_INSTRUCTIONS),
+	PERF_CONST(COUNT_HW_CACHE_REFERENCES),
+	PERF_CONST(COUNT_HW_CACHE_MISSES),
+	PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS),
+	PERF_CONST(COUNT_HW_BRANCH_MISSES),
+	PERF_CONST(COUNT_HW_BUS_CYCLES),
+	PERF_CONST(COUNT_HW_CACHE_L1D),
+	PERF_CONST(COUNT_HW_CACHE_L1I),
+	PERF_CONST(COUNT_HW_CACHE_LL),
+	PERF_CONST(COUNT_HW_CACHE_DTLB),
+	PERF_CONST(COUNT_HW_CACHE_ITLB),
+	PERF_CONST(COUNT_HW_CACHE_BPU),
+	PERF_CONST(COUNT_HW_CACHE_OP_READ),
+	PERF_CONST(COUNT_HW_CACHE_OP_WRITE),
+	PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH),
+	PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS),
+	PERF_CONST(COUNT_HW_CACHE_RESULT_MISS),
+
+	PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND),
+	PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND),
+
+	PERF_CONST(COUNT_SW_CPU_CLOCK),
+	PERF_CONST(COUNT_SW_TASK_CLOCK),
+	PERF_CONST(COUNT_SW_PAGE_FAULTS),
+	PERF_CONST(COUNT_SW_CONTEXT_SWITCHES),
+	PERF_CONST(COUNT_SW_CPU_MIGRATIONS),
+	PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN),
+	PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ),
+	PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS),
+	PERF_CONST(COUNT_SW_EMULATION_FAULTS),
+	PERF_CONST(COUNT_SW_DUMMY),
+
+	PERF_CONST(SAMPLE_IP),
+	PERF_CONST(SAMPLE_TID),
+	PERF_CONST(SAMPLE_TIME),
+	PERF_CONST(SAMPLE_ADDR),
+	PERF_CONST(SAMPLE_READ),
+	PERF_CONST(SAMPLE_CALLCHAIN),
+	PERF_CONST(SAMPLE_ID),
+	PERF_CONST(SAMPLE_CPU),
+	PERF_CONST(SAMPLE_PERIOD),
+	PERF_CONST(SAMPLE_STREAM_ID),
+	PERF_CONST(SAMPLE_RAW),
+
+	PERF_CONST(FORMAT_TOTAL_TIME_ENABLED),
+	PERF_CONST(FORMAT_TOTAL_TIME_RUNNING),
+	PERF_CONST(FORMAT_ID),
+	PERF_CONST(FORMAT_GROUP),
+
+	PERF_CONST(RECORD_MMAP),
+	PERF_CONST(RECORD_LOST),
+	PERF_CONST(RECORD_COMM),
+	PERF_CONST(RECORD_EXIT),
+	PERF_CONST(RECORD_THROTTLE),
+	PERF_CONST(RECORD_UNTHROTTLE),
+	PERF_CONST(RECORD_FORK),
+	PERF_CONST(RECORD_READ),
+	PERF_CONST(RECORD_SAMPLE),
+	PERF_CONST(RECORD_MMAP2),
+	PERF_CONST(RECORD_AUX),
+	PERF_CONST(RECORD_ITRACE_START),
+	PERF_CONST(RECORD_LOST_SAMPLES),
+	PERF_CONST(RECORD_SWITCH),
+	PERF_CONST(RECORD_SWITCH_CPU_WIDE),
+
+	PERF_CONST(RECORD_MISC_SWITCH_OUT),
+	{ .name = NULL, },
+};
+
+static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
+				  PyObject *args, PyObject *kwargs)
+{
+	struct event_format *tp_format;
+	static char *kwlist[] = { "sys", "name", NULL };
+	char *sys  = NULL;
+	char *name = NULL;
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist,
+					 &sys, &name))
+		return NULL;
+
+	tp_format = trace_event__tp_format(sys, name);
+	if (IS_ERR(tp_format))
+		return _PyLong_FromLong(-1);
+
+	return _PyLong_FromLong(tp_format->id);
+}
+
+static PyMethodDef perf__methods[] = {
+	{
+		.ml_name  = "tracepoint",
+		.ml_meth  = (PyCFunction) pyrf__tracepoint,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("Get tracepoint config.")
+	},
+	{ .ml_name = NULL, }
+};
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void)
+#else
+PyMODINIT_FUNC PyInit_perf(void)
+#endif
+{
+	PyObject *obj;
+	int i;
+	PyObject *dict;
+#if PY_MAJOR_VERSION < 3
+	PyObject *module = Py_InitModule("perf", perf__methods);
+#else
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"perf",			/* m_name */
+		"",			/* m_doc */
+		-1,			/* m_size */
+		perf__methods,		/* m_methods */
+		NULL,			/* m_reload */
+		NULL,			/* m_traverse */
+		NULL,			/* m_clear */
+		NULL,			/* m_free */
+	};
+	PyObject *module = PyModule_Create(&moduledef);
+#endif
+
+	if (module == NULL ||
+	    pyrf_event__setup_types() < 0 ||
+	    pyrf_evlist__setup_types() < 0 ||
+	    pyrf_evsel__setup_types() < 0 ||
+	    pyrf_thread_map__setup_types() < 0 ||
+	    pyrf_cpu_map__setup_types() < 0)
+#if PY_MAJOR_VERSION < 3
+		return;
+#else
+		return module;
+#endif
+
+	/* The page_size is placed in util object. */
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	Py_INCREF(&pyrf_evlist__type);
+	PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
+
+	Py_INCREF(&pyrf_evsel__type);
+	PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
+
+	Py_INCREF(&pyrf_mmap_event__type);
+	PyModule_AddObject(module, "mmap_event", (PyObject *)&pyrf_mmap_event__type);
+
+	Py_INCREF(&pyrf_lost_event__type);
+	PyModule_AddObject(module, "lost_event", (PyObject *)&pyrf_lost_event__type);
+
+	Py_INCREF(&pyrf_comm_event__type);
+	PyModule_AddObject(module, "comm_event", (PyObject *)&pyrf_comm_event__type);
+
+	Py_INCREF(&pyrf_task_event__type);
+	PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+	Py_INCREF(&pyrf_throttle_event__type);
+	PyModule_AddObject(module, "throttle_event", (PyObject *)&pyrf_throttle_event__type);
+
+	Py_INCREF(&pyrf_task_event__type);
+	PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+	Py_INCREF(&pyrf_read_event__type);
+	PyModule_AddObject(module, "read_event", (PyObject *)&pyrf_read_event__type);
+
+	Py_INCREF(&pyrf_sample_event__type);
+	PyModule_AddObject(module, "sample_event", (PyObject *)&pyrf_sample_event__type);
+
+	Py_INCREF(&pyrf_context_switch_event__type);
+	PyModule_AddObject(module, "switch_event", (PyObject *)&pyrf_context_switch_event__type);
+
+	Py_INCREF(&pyrf_thread_map__type);
+	PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
+
+	Py_INCREF(&pyrf_cpu_map__type);
+	PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+
+	dict = PyModule_GetDict(module);
+	if (dict == NULL)
+		goto error;
+
+	for (i = 0; perf__constants[i].name != NULL; i++) {
+		obj = _PyLong_FromLong(perf__constants[i].value);
+		if (obj == NULL)
+			goto error;
+		PyDict_SetItemString(dict, perf__constants[i].name, obj);
+		Py_DECREF(obj);
+	}
+
+error:
+	if (PyErr_Occurred())
+		PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+#if PY_MAJOR_VERSION >= 3
+	return module;
+#endif
+}
+
+/*
+ * Dummy, to avoid dragging all the test_attr infrastructure in the python
+ * binding.
+ */
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+                     int fd, int group_fd, unsigned long flags)
+{
+}
diff --git a/util/quote.c b/util/quote.c
new file mode 100644
index 0000000..22eaa20
--- /dev/null
+++ b/util/quote.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include "strbuf.h"
+#include "quote.h"
+#include "util.h"
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ *
+ * E.g.
+ *  original     sq_quote     result
+ *  name     ==> name      ==> 'name'
+ *  a b      ==> a b       ==> 'a b'
+ *  a'b      ==> a'\''b    ==> 'a'\''b'
+ *  a!b      ==> a'\!'b    ==> 'a'\!'b'
+ */
+static inline int need_bs_quote(char c)
+{
+	return (c == '\'' || c == '!');
+}
+
+static int sq_quote_buf(struct strbuf *dst, const char *src)
+{
+	char *to_free = NULL;
+	int ret;
+
+	if (dst->buf == src)
+		to_free = strbuf_detach(dst, NULL);
+
+	ret = strbuf_addch(dst, '\'');
+	while (!ret && *src) {
+		size_t len = strcspn(src, "'!");
+		ret = strbuf_add(dst, src, len);
+		src += len;
+		while (!ret && need_bs_quote(*src))
+			ret = strbuf_addf(dst, "'\\%c\'", *src++);
+	}
+	if (!ret)
+		ret = strbuf_addch(dst, '\'');
+	free(to_free);
+
+	return ret;
+}
+
+int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+{
+	int i, ret;
+
+	/* Copy into destination buffer. */
+	ret = strbuf_grow(dst, 255);
+	for (i = 0; !ret && argv[i]; ++i) {
+		ret = strbuf_addch(dst, ' ');
+		if (ret)
+			break;
+		ret = sq_quote_buf(dst, argv[i]);
+		if (maxlen && dst->len > maxlen)
+			return -ENOSPC;
+	}
+	return ret;
+}
diff --git a/util/quote.h b/util/quote.h
new file mode 100644
index 0000000..274bf26
--- /dev/null
+++ b/util/quote.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_QUOTE_H
+#define __PERF_QUOTE_H
+
+#include <stddef.h>
+
+/* Help to copy the thing properly quoted for the shell safety.
+ * any single quote is replaced with '\'', any exclamation point
+ * is replaced with '\!', and the whole thing is enclosed in a
+ * single quote pair.
+ *
+ * For example, if you are passing the result to system() as an
+ * argument:
+ *
+ * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
+ *
+ * would be appropriate.  If the system() is going to call ssh to
+ * run the command on the other side:
+ *
+ * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
+ * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
+ *
+ * Note that the above examples leak memory!  Remember to free result from
+ * sq_quote() in a real application.
+ */
+
+struct strbuf;
+
+int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+
+#endif /* __PERF_QUOTE_H */
diff --git a/util/rb_resort.h b/util/rb_resort.h
new file mode 100644
index 0000000..a920f70
--- /dev/null
+++ b/util/rb_resort.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+				    b->thread->shortname) < 0,
+	struct thread *thread;
+)
+{
+	entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+	DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+	struct rb_node *nd;
+
+	resort_rb__for_each_entry(nd, threads) {
+		struct thread *t = threads_entry;
+		printf("%s: %d\n", t->shortname, t->tid);
+	}
+
+ * Then delete it:
+
+	resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * 	struct threads_sorted_entry {}
+ * 	threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...)					\
+struct __name##_sorted_entry {							\
+	struct rb_node	rb_node;						\
+	__VA_ARGS__								\
+};										\
+static void __name##_sorted__init_entry(struct rb_node *nd,			\
+					struct __name##_sorted_entry *entry);	\
+										\
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb)	\
+{										\
+	struct __name##_sorted_entry *a, *b;					\
+	a = rb_entry(nda, struct __name##_sorted_entry, rb_node);		\
+	b = rb_entry(ndb, struct __name##_sorted_entry, rb_node);		\
+	return __comp;								\
+}										\
+										\
+struct __name##_sorted {							\
+       struct rb_root		    entries;					\
+       struct __name##_sorted_entry nd[0];					\
+};										\
+										\
+static void __name##_sorted__insert(struct __name##_sorted *sorted,		\
+				      struct rb_node *sorted_nd)		\
+{										\
+	struct rb_node **p = &sorted->entries.rb_node, *parent = NULL;		\
+	while (*p != NULL) {							\
+		parent = *p;							\
+		if (__name##_sorted__cmp(sorted_nd, parent))			\
+			p = &(*p)->rb_left;					\
+		else								\
+			p = &(*p)->rb_right;					\
+	}									\
+	rb_link_node(sorted_nd, parent, p);					\
+	rb_insert_color(sorted_nd, &sorted->entries);				\
+}										\
+										\
+static void __name##_sorted__sort(struct __name##_sorted *sorted,		\
+				    struct rb_root *entries)			\
+{										\
+	struct rb_node *nd;							\
+	unsigned int i = 0;							\
+	for (nd = rb_first(entries); nd; nd = rb_next(nd)) {			\
+		struct __name##_sorted_entry *snd = &sorted->nd[i++];		\
+		__name##_sorted__init_entry(nd, snd);				\
+		__name##_sorted__insert(sorted, &snd->rb_node);			\
+	}									\
+}										\
+										\
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries,	\
+						    int nr_entries)		\
+{										\
+	struct __name##_sorted *sorted;						\
+	sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries);	\
+	if (sorted) {								\
+		sorted->entries = RB_ROOT;					\
+		__name##_sorted__sort(sorted, entries);				\
+	}									\
+	return sorted;								\
+}										\
+										\
+static void __name##_sorted__delete(struct __name##_sorted *sorted)		\
+{										\
+	free(sorted);								\
+}										\
+										\
+static void __name##_sorted__init_entry(struct rb_node *nd,			\
+					struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name)						\
+struct __name##_sorted_entry *__name##_entry;					\
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each_entry(__nd, __name)					\
+	for (__nd = rb_first(&__name->entries);					\
+	     __name##_entry = rb_entry(__nd, struct __name##_sorted_entry,	\
+				       rb_node), __nd;				\
+	     __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name)						\
+	__name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist)				\
+	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries,			\
+				  __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)	\
+	DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries,	\
+				  __machine->threads[hash_bucket].nr)
+
+#endif /* _PERF_RESORT_RB_H_ */
diff --git a/util/rblist.c b/util/rblist.c
new file mode 100644
index 0000000..0efc325
--- /dev/null
+++ b/util/rblist.c
@@ -0,0 +1,133 @@
+/*
+ * Based on strlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rblist.h"
+
+int rblist__add_node(struct rblist *rblist, const void *new_entry)
+{
+	struct rb_node **p = &rblist->entries.rb_node;
+	struct rb_node *parent = NULL, *new_node;
+
+	while (*p != NULL) {
+		int rc;
+
+		parent = *p;
+
+		rc = rblist->node_cmp(parent, new_entry);
+		if (rc > 0)
+			p = &(*p)->rb_left;
+		else if (rc < 0)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	new_node = rblist->node_new(rblist, new_entry);
+	if (new_node == NULL)
+		return -ENOMEM;
+
+	rb_link_node(new_node, parent, p);
+	rb_insert_color(new_node, &rblist->entries);
+	++rblist->nr_entries;
+
+	return 0;
+}
+
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
+{
+	rb_erase(rb_node, &rblist->entries);
+	--rblist->nr_entries;
+	rblist->node_delete(rblist, rb_node);
+}
+
+static struct rb_node *__rblist__findnew(struct rblist *rblist,
+					 const void *entry,
+					 bool create)
+{
+	struct rb_node **p = &rblist->entries.rb_node;
+	struct rb_node *parent = NULL, *new_node = NULL;
+
+	while (*p != NULL) {
+		int rc;
+
+		parent = *p;
+
+		rc = rblist->node_cmp(parent, entry);
+		if (rc > 0)
+			p = &(*p)->rb_left;
+		else if (rc < 0)
+			p = &(*p)->rb_right;
+		else
+			return parent;
+	}
+
+	if (create) {
+		new_node = rblist->node_new(rblist, entry);
+		if (new_node) {
+			rb_link_node(new_node, parent, p);
+			rb_insert_color(new_node, &rblist->entries);
+			++rblist->nr_entries;
+		}
+	}
+
+	return new_node;
+}
+
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
+{
+	return __rblist__findnew(rblist, entry, false);
+}
+
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
+{
+	return __rblist__findnew(rblist, entry, true);
+}
+
+void rblist__init(struct rblist *rblist)
+{
+	if (rblist != NULL) {
+		rblist->entries	 = RB_ROOT;
+		rblist->nr_entries = 0;
+	}
+
+	return;
+}
+
+void rblist__exit(struct rblist *rblist)
+{
+	struct rb_node *pos, *next = rb_first(&rblist->entries);
+
+	while (next) {
+		pos = next;
+		next = rb_next(pos);
+		rblist__remove_node(rblist, pos);
+	}
+}
+
+void rblist__delete(struct rblist *rblist)
+{
+	if (rblist != NULL) {
+		rblist__exit(rblist);
+		free(rblist);
+	}
+}
+
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx)
+{
+	struct rb_node *node;
+
+	for (node = rb_first(&rblist->entries); node; node = rb_next(node)) {
+		if (!idx--)
+			return node;
+	}
+
+	return NULL;
+}
diff --git a/util/rblist.h b/util/rblist.h
new file mode 100644
index 0000000..76df15c
--- /dev/null
+++ b/util/rblist.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RBLIST_H
+#define __PERF_RBLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+/*
+ * create node structs of the form:
+ * struct my_node {
+ *     struct rb_node rb_node;
+ *     ... my data ...
+ * };
+ *
+ * create list structs of the form:
+ * struct mylist {
+ *     struct rblist rblist;
+ *     ... my data ...
+ * };
+ */
+
+struct rblist {
+	struct rb_root entries;
+	unsigned int   nr_entries;
+
+	int (*node_cmp)(struct rb_node *rbn, const void *entry);
+	struct rb_node *(*node_new)(struct rblist *rlist, const void *new_entry);
+	void (*node_delete)(struct rblist *rblist, struct rb_node *rb_node);
+};
+
+void rblist__init(struct rblist *rblist);
+void rblist__exit(struct rblist *rblist);
+void rblist__delete(struct rblist *rblist);
+int rblist__add_node(struct rblist *rblist, const void *new_entry);
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx);
+
+static inline bool rblist__empty(const struct rblist *rblist)
+{
+	return rblist->nr_entries == 0;
+}
+
+static inline unsigned int rblist__nr_entries(const struct rblist *rblist)
+{
+	return rblist->nr_entries;
+}
+
+#endif /* __PERF_RBLIST_H */
diff --git a/util/record.c b/util/record.c
new file mode 100644
index 0000000..9cfc7bf
--- /dev/null
+++ b/util/record.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "evlist.h"
+#include "evsel.h"
+#include "cpumap.h"
+#include "parse-events.h"
+#include <errno.h>
+#include <api/fs/fs.h>
+#include <subcmd/parse-options.h>
+#include "util.h"
+#include "cloexec.h"
+
+typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	unsigned long flags = perf_event_open_cloexec_flag();
+	int err = -EAGAIN, fd;
+	static pid_t pid = -1;
+
+	evlist = perf_evlist__new();
+	if (!evlist)
+		return -ENOMEM;
+
+	if (parse_events(evlist, str, NULL))
+		goto out_delete;
+
+	evsel = perf_evlist__first(evlist);
+
+	while (1) {
+		fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1, flags);
+		if (fd < 0) {
+			if (pid == -1 && errno == EACCES) {
+				pid = 0;
+				continue;
+			}
+			goto out_delete;
+		}
+		break;
+	}
+	close(fd);
+
+	fn(evsel);
+
+	fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1, flags);
+	if (fd < 0) {
+		if (errno == EINVAL)
+			err = -EINVAL;
+		goto out_delete;
+	}
+	close(fd);
+	err = 0;
+
+out_delete:
+	perf_evlist__delete(evlist);
+	return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+	struct cpu_map *cpus;
+	int cpu, ret, i = 0;
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+	cpu = cpus->map[0];
+	cpu_map__put(cpus);
+
+	do {
+		ret = perf_do_probe_api(fn, cpu, try[i++]);
+		if (!ret)
+			return true;
+	} while (ret == -EAGAIN && try[i]);
+
+	return false;
+}
+
+static void perf_probe_sample_identifier(struct perf_evsel *evsel)
+{
+	evsel->attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct perf_evsel *evsel)
+{
+	evsel->attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct perf_evsel *evsel)
+{
+	evsel->attr.context_switch = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+	return perf_probe_api(perf_probe_sample_identifier);
+}
+
+static bool perf_can_comm_exec(void)
+{
+	return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+	return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.exclude_kernel = 1,
+	};
+	struct cpu_map *cpus;
+	int cpu, fd;
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+	cpu = cpus->map[0];
+	cpu_map__put(cpus);
+
+	fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+	if (fd < 0)
+		return false;
+	close(fd);
+
+	return true;
+}
+
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+			 struct callchain_param *callchain)
+{
+	struct perf_evsel *evsel;
+	bool use_sample_identifier = false;
+	bool use_comm_exec;
+	bool sample_id = opts->sample_id;
+
+	/*
+	 * Set the evsel leader links before we configure attributes,
+	 * since some might depend on this info.
+	 */
+	if (opts->group)
+		perf_evlist__set_leader(evlist);
+
+	if (evlist->cpus->map[0] < 0)
+		opts->no_inherit = true;
+
+	use_comm_exec = perf_can_comm_exec();
+
+	evlist__for_each_entry(evlist, evsel) {
+		perf_evsel__config(evsel, opts, callchain);
+		if (evsel->tracking && use_comm_exec)
+			evsel->attr.comm_exec = 1;
+	}
+
+	if (opts->full_auxtrace) {
+		/*
+		 * Need to be able to synthesize and parse selected events with
+		 * arbitrary sample types, which requires always being able to
+		 * match the id.
+		 */
+		use_sample_identifier = perf_can_sample_identifier();
+		sample_id = true;
+	} else if (evlist->nr_entries > 1) {
+		struct perf_evsel *first = perf_evlist__first(evlist);
+
+		evlist__for_each_entry(evlist, evsel) {
+			if (evsel->attr.sample_type == first->attr.sample_type)
+				continue;
+			use_sample_identifier = perf_can_sample_identifier();
+			break;
+		}
+		sample_id = true;
+	}
+
+	if (sample_id) {
+		evlist__for_each_entry(evlist, evsel)
+			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+	}
+
+	perf_evlist__set_id_pos(evlist);
+}
+
+static int get_max_rate(unsigned int *rate)
+{
+	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
+}
+
+static int record_opts__config_freq(struct record_opts *opts)
+{
+	bool user_freq = opts->user_freq != UINT_MAX;
+	unsigned int max_rate;
+
+	if (opts->user_interval != ULLONG_MAX)
+		opts->default_interval = opts->user_interval;
+	if (user_freq)
+		opts->freq = opts->user_freq;
+
+	/*
+	 * User specified count overrides default frequency.
+	 */
+	if (opts->default_interval)
+		opts->freq = 0;
+	else if (opts->freq) {
+		opts->default_interval = opts->freq;
+	} else {
+		pr_err("frequency and count are zero, aborting\n");
+		return -1;
+	}
+
+	if (get_max_rate(&max_rate))
+		return 0;
+
+	/*
+	 * User specified frequency is over current maximum.
+	 */
+	if (user_freq && (max_rate < opts->freq)) {
+		if (opts->strict_freq) {
+			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
+			       "       Please use -F freq option with a lower value or consider\n"
+			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
+			       max_rate);
+			return -1;
+		} else {
+			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
+				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				   "         The kernel will lower it when perf's interrupts take too long.\n"
+				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
+				   max_rate, opts->freq, max_rate);
+
+			opts->freq = max_rate;
+		}
+	}
+
+	/*
+	 * Default frequency is over current maximum.
+	 */
+	if (max_rate < opts->freq) {
+		pr_warning("Lowering default frequency rate to %u.\n"
+			   "Please consider tweaking "
+			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
+			   max_rate);
+		opts->freq = max_rate;
+	}
+
+	return 0;
+}
+
+int record_opts__config(struct record_opts *opts)
+{
+	return record_opts__config_freq(opts);
+}
+
+bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
+{
+	struct perf_evlist *temp_evlist;
+	struct perf_evsel *evsel;
+	int err, fd, cpu;
+	bool ret = false;
+	pid_t pid = -1;
+
+	temp_evlist = perf_evlist__new();
+	if (!temp_evlist)
+		return false;
+
+	err = parse_events(temp_evlist, str, NULL);
+	if (err)
+		goto out_delete;
+
+	evsel = perf_evlist__last(temp_evlist);
+
+	if (!evlist || cpu_map__empty(evlist->cpus)) {
+		struct cpu_map *cpus = cpu_map__new(NULL);
+
+		cpu =  cpus ? cpus->map[0] : 0;
+		cpu_map__put(cpus);
+	} else {
+		cpu = evlist->cpus->map[0];
+	}
+
+	while (1) {
+		fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1,
+					 perf_event_open_cloexec_flag());
+		if (fd < 0) {
+			if (pid == -1 && errno == EACCES) {
+				pid = 0;
+				continue;
+			}
+			goto out_delete;
+		}
+		break;
+	}
+	close(fd);
+	ret = true;
+
+out_delete:
+	perf_evlist__delete(temp_evlist);
+	return ret;
+}
+
+int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+	unsigned int freq;
+	struct record_opts *opts = opt->value;
+
+	if (!str)
+		return -EINVAL;
+
+	if (strcasecmp(str, "max") == 0) {
+		if (get_max_rate(&freq)) {
+			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
+			return -1;
+		}
+		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
+	} else {
+		freq = atoi(str);
+	}
+
+	opts->user_freq = freq;
+	return 0;
+}
diff --git a/util/rwsem.c b/util/rwsem.c
new file mode 100644
index 0000000..5e52e7b
--- /dev/null
+++ b/util/rwsem.c
@@ -0,0 +1,32 @@
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+	return pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int exit_rwsem(struct rw_semaphore *sem)
+{
+	return pthread_rwlock_destroy(&sem->lock);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+	return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/util/rwsem.h b/util/rwsem.h
new file mode 100644
index 0000000..94565ad
--- /dev/null
+++ b/util/rwsem.h
@@ -0,0 +1,19 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+	pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+int exit_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/util/sane_ctype.h b/util/sane_ctype.h
new file mode 100644
index 0000000..c2b42ff
--- /dev/null
+++ b/util/sane_ctype.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_SANE_CTYPE_H
+#define _PERF_SANE_CTYPE_H
+
+extern const char *graph_line;
+extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isxdigit
+#undef isalpha
+#undef isprint
+#undef isalnum
+#undef islower
+#undef isupper
+#undef tolower
+#undef toupper
+
+extern unsigned char sane_ctype[256];
+#define GIT_SPACE		0x01
+#define GIT_DIGIT		0x02
+#define GIT_ALPHA		0x04
+#define GIT_GLOB_SPECIAL	0x08
+#define GIT_REGEX_SPECIAL	0x10
+#define GIT_PRINT_EXTRA		0x20
+#define GIT_PRINT		0x3E
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isxdigit(x)	\
+	(sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G')
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define isprint(x) sane_istest(x,GIT_PRINT)
+#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20))
+#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20))
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+	if (sane_istest(x, GIT_ALPHA))
+		x = (x & ~0x20) | high;
+	return x;
+}
+
+#endif /* _PERF_SANE_CTYPE_H */
diff --git a/util/scripting-engines/Build b/util/scripting-engines/Build
new file mode 100644
index 0000000..82d28c6
--- /dev/null
+++ b/util/scripting-engines/Build
@@ -0,0 +1,6 @@
+libperf-$(CONFIG_LIBPERL)   += trace-event-perl.o
+libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
+
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow
diff --git a/util/scripting-engines/trace-event-perl.c b/util/scripting-engines/trace-event-perl.c
new file mode 100644
index 0000000..7b79c41
--- /dev/null
+++ b/util/scripting-engines/trace-event-perl.c
@@ -0,0 +1,752 @@
+/*
+ * trace-event-perl.  Feed perf script events to an embedded Perl interpreter.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/time64.h>
+
+#include <stdbool.h>
+/* perl needs the following define, right after including stdbool.h */
+#define HAS_BOOL
+#include <EXTERN.h>
+#include <perl.h>
+
+#include "../../perf.h"
+#include "../callchain.h"
+#include "../machine.h"
+#include "../thread.h"
+#include "../event.h"
+#include "../trace-event.h"
+#include "../evsel.h"
+#include "../debug.h"
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+typedef PerlInterpreter * INTERP;
+
+void xs_init(pTHX);
+
+void xs_init(pTHX)
+{
+	const char *file = __FILE__;
+	dXSUB_SYS;
+
+	newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
+	      file);
+	newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
+}
+
+INTERP my_perl;
+
+#define TRACE_EVENT_TYPE_MAX				\
+	((1 << (sizeof(unsigned short) * 8)) - 1)
+
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static void define_symbolic_value(const char *ev_name,
+				  const char *field_name,
+				  const char *field_value,
+				  const char *field_str)
+{
+	unsigned long long value;
+	dSP;
+
+	value = eval_flag(field_value);
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVuv(value)));
+	XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_symbolic_value", 0))
+		call_pv("main::define_symbolic_value", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_symbolic_values(struct print_flag_sym *field,
+				   const char *ev_name,
+				   const char *field_name)
+{
+	define_symbolic_value(ev_name, field_name, field->value, field->str);
+	if (field->next)
+		define_symbolic_values(field->next, ev_name, field_name);
+}
+
+static void define_symbolic_field(const char *ev_name,
+				  const char *field_name)
+{
+	dSP;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_symbolic_field", 0))
+		call_pv("main::define_symbolic_field", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_flag_value(const char *ev_name,
+			      const char *field_name,
+			      const char *field_value,
+			      const char *field_str)
+{
+	unsigned long long value;
+	dSP;
+
+	value = eval_flag(field_value);
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVuv(value)));
+	XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_flag_value", 0))
+		call_pv("main::define_flag_value", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_flag_values(struct print_flag_sym *field,
+			       const char *ev_name,
+			       const char *field_name)
+{
+	define_flag_value(ev_name, field_name, field->value, field->str);
+	if (field->next)
+		define_flag_values(field->next, ev_name, field_name);
+}
+
+static void define_flag_field(const char *ev_name,
+			      const char *field_name,
+			      const char *delim)
+{
+	dSP;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+	XPUSHs(sv_2mortal(newSVpv(delim, 0)));
+
+	PUTBACK;
+	if (get_cv("main::define_flag_field", 0))
+		call_pv("main::define_flag_field", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void define_event_symbols(struct event_format *event,
+				 const char *ev_name,
+				 struct print_arg *args)
+{
+	if (args == NULL)
+		return;
+
+	switch (args->type) {
+	case PRINT_NULL:
+		break;
+	case PRINT_ATOM:
+		define_flag_value(ev_name, cur_field_name, "0",
+				  args->atom.atom);
+		zero_flag_atom = 0;
+		break;
+	case PRINT_FIELD:
+		free(cur_field_name);
+		cur_field_name = strdup(args->field.name);
+		break;
+	case PRINT_FLAGS:
+		define_event_symbols(event, ev_name, args->flags.field);
+		define_flag_field(ev_name, cur_field_name, args->flags.delim);
+		define_flag_values(args->flags.flags, ev_name, cur_field_name);
+		break;
+	case PRINT_SYMBOL:
+		define_event_symbols(event, ev_name, args->symbol.field);
+		define_symbolic_field(ev_name, cur_field_name);
+		define_symbolic_values(args->symbol.symbols, ev_name,
+				       cur_field_name);
+		break;
+	case PRINT_HEX:
+	case PRINT_HEX_STR:
+		define_event_symbols(event, ev_name, args->hex.field);
+		define_event_symbols(event, ev_name, args->hex.size);
+		break;
+	case PRINT_INT_ARRAY:
+		define_event_symbols(event, ev_name, args->int_array.field);
+		define_event_symbols(event, ev_name, args->int_array.count);
+		define_event_symbols(event, ev_name, args->int_array.el_size);
+		break;
+	case PRINT_BSTRING:
+	case PRINT_DYNAMIC_ARRAY:
+	case PRINT_DYNAMIC_ARRAY_LEN:
+	case PRINT_STRING:
+	case PRINT_BITMASK:
+		break;
+	case PRINT_TYPE:
+		define_event_symbols(event, ev_name, args->typecast.item);
+		break;
+	case PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			zero_flag_atom = 1;
+		define_event_symbols(event, ev_name, args->op.left);
+		define_event_symbols(event, ev_name, args->op.right);
+		break;
+	case PRINT_FUNC:
+	default:
+		pr_err("Unsupported print arg type\n");
+		/* we should warn... */
+		return;
+	}
+
+	if (args->next)
+		define_event_symbols(event, ev_name, args->next);
+}
+
+static SV *perl_process_callchain(struct perf_sample *sample,
+				  struct perf_evsel *evsel,
+				  struct addr_location *al)
+{
+	AV *list;
+
+	list = newAV();
+	if (!list)
+		goto exit;
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		goto exit;
+
+	if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+				      sample, NULL, NULL, scripting_max_stack) != 0) {
+		pr_err("Failed to resolve callchain. Skipping\n");
+		goto exit;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+
+	while (1) {
+		HV *elem;
+		struct callchain_cursor_node *node;
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+
+		elem = newHV();
+		if (!elem)
+			goto exit;
+
+		if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+			hv_undef(elem);
+			goto exit;
+		}
+
+		if (node->sym) {
+			HV *sym = newHV();
+			if (!sym) {
+				hv_undef(elem);
+				goto exit;
+			}
+			if (!hv_stores(sym, "start",   newSVuv(node->sym->start)) ||
+			    !hv_stores(sym, "end",     newSVuv(node->sym->end)) ||
+			    !hv_stores(sym, "binding", newSVuv(node->sym->binding)) ||
+			    !hv_stores(sym, "name",    newSVpvn(node->sym->name,
+								node->sym->namelen)) ||
+			    !hv_stores(elem, "sym",    newRV_noinc((SV*)sym))) {
+				hv_undef(sym);
+				hv_undef(elem);
+				goto exit;
+			}
+		}
+
+		if (node->map) {
+			struct map *map = node->map;
+			const char *dsoname = "[unknown]";
+			if (map && map->dso) {
+				if (symbol_conf.show_kernel_path && map->dso->long_name)
+					dsoname = map->dso->long_name;
+				else
+					dsoname = map->dso->name;
+			}
+			if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+				hv_undef(elem);
+				goto exit;
+			}
+		}
+
+		callchain_cursor_advance(&callchain_cursor);
+		av_push(list, newRV_noinc((SV*)elem));
+	}
+
+exit:
+	return newRV_noinc((SV*)list);
+}
+
+static void perl_process_tracepoint(struct perf_sample *sample,
+				    struct perf_evsel *evsel,
+				    struct addr_location *al)
+{
+	struct thread *thread = al->thread;
+	struct event_format *event = evsel->tp_format;
+	struct format_field *field;
+	static char handler[256];
+	unsigned long long val;
+	unsigned long s, ns;
+	int pid;
+	int cpu = sample->cpu;
+	void *data = sample->raw_data;
+	unsigned long long nsecs = sample->time;
+	const char *comm = thread__comm_str(thread);
+
+	dSP;
+
+	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+		return;
+
+	if (!event) {
+		pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->attr.config);
+		return;
+	}
+
+	pid = raw_field_value(event, "common_pid", data);
+
+	sprintf(handler, "%s::%s", event->system, event->name);
+
+	if (!test_and_set_bit(event->id, events_defined))
+		define_event_symbols(event, handler, event->print_fmt.args);
+
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
+
+	scripting_context->event_data = data;
+	scripting_context->pevent = evsel->tp_format->pevent;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+
+	XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+	XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+	XPUSHs(sv_2mortal(newSVuv(cpu)));
+	XPUSHs(sv_2mortal(newSVuv(s)));
+	XPUSHs(sv_2mortal(newSVuv(ns)));
+	XPUSHs(sv_2mortal(newSViv(pid)));
+	XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+	XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+
+	/* common fields other than pid can be accessed via xsub fns */
+
+	for (field = event->format.fields; field; field = field->next) {
+		if (field->flags & FIELD_IS_STRING) {
+			int offset;
+			if (field->flags & FIELD_IS_DYNAMIC) {
+				offset = *(int *)(data + field->offset);
+				offset &= 0xffff;
+			} else
+				offset = field->offset;
+			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
+		} else { /* FIELD_IS_NUMERIC */
+			val = read_size(event, data + field->offset,
+					field->size);
+			if (field->flags & FIELD_IS_SIGNED) {
+				XPUSHs(sv_2mortal(newSViv(val)));
+			} else {
+				XPUSHs(sv_2mortal(newSVuv(val)));
+			}
+		}
+	}
+
+	PUTBACK;
+
+	if (get_cv(handler, 0))
+		call_pv(handler, G_SCALAR);
+	else if (get_cv("main::trace_unhandled", 0)) {
+		XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+		XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+		XPUSHs(sv_2mortal(newSVuv(cpu)));
+		XPUSHs(sv_2mortal(newSVuv(nsecs)));
+		XPUSHs(sv_2mortal(newSViv(pid)));
+		XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+		XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+		call_pv("main::trace_unhandled", G_SCALAR);
+	}
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void perl_process_event_generic(union perf_event *event,
+				       struct perf_sample *sample,
+				       struct perf_evsel *evsel)
+{
+	dSP;
+
+	if (!get_cv("process_event", 0))
+		return;
+
+	ENTER;
+	SAVETMPS;
+	PUSHMARK(SP);
+	XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size)));
+	XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr))));
+	XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample))));
+	XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size)));
+	PUTBACK;
+	call_pv("process_event", G_SCALAR);
+	SPAGAIN;
+	PUTBACK;
+	FREETMPS;
+	LEAVE;
+}
+
+static void perl_process_event(union perf_event *event,
+			       struct perf_sample *sample,
+			       struct perf_evsel *evsel,
+			       struct addr_location *al)
+{
+	perl_process_tracepoint(sample, evsel, al);
+	perl_process_event_generic(event, sample, evsel);
+}
+
+static void run_start_sub(void)
+{
+	dSP; /* access to Perl stack */
+	PUSHMARK(SP);
+
+	if (get_cv("main::trace_begin", 0))
+		call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
+}
+
+/*
+ * Start trace script
+ */
+static int perl_start_script(const char *script, int argc, const char **argv)
+{
+	const char **command_line;
+	int i, err = 0;
+
+	command_line = malloc((argc + 2) * sizeof(const char *));
+	command_line[0] = "";
+	command_line[1] = script;
+	for (i = 2; i < argc + 2; i++)
+		command_line[i] = argv[i - 2];
+
+	my_perl = perl_alloc();
+	perl_construct(my_perl);
+
+	if (perl_parse(my_perl, xs_init, argc + 2, (char **)command_line,
+		       (char **)NULL)) {
+		err = -1;
+		goto error;
+	}
+
+	if (perl_run(my_perl)) {
+		err = -1;
+		goto error;
+	}
+
+	if (SvTRUE(ERRSV)) {
+		err = -1;
+		goto error;
+	}
+
+	run_start_sub();
+
+	free(command_line);
+	return 0;
+error:
+	perl_free(my_perl);
+	free(command_line);
+
+	return err;
+}
+
+static int perl_flush_script(void)
+{
+	return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int perl_stop_script(void)
+{
+	dSP; /* access to Perl stack */
+	PUSHMARK(SP);
+
+	if (get_cv("main::trace_end", 0))
+		call_pv("main::trace_end", G_DISCARD | G_NOARGS);
+
+	perl_destruct(my_perl);
+	perl_free(my_perl);
+
+	return 0;
+}
+
+static int perl_generate_script(struct pevent *pevent, const char *outfile)
+{
+	struct event_format *event = NULL;
+	struct format_field *f;
+	char fname[PATH_MAX];
+	int not_first, count;
+	FILE *ofp;
+
+	sprintf(fname, "%s.pl", outfile);
+	ofp = fopen(fname, "w");
+	if (ofp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", fname);
+		return -1;
+	}
+
+	fprintf(ofp, "# perf script event handlers, "
+		"generated by perf script -g perl\n");
+
+	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+		" License version 2\n\n");
+
+	fprintf(ofp, "# The common_* event handler fields are the most useful "
+		"fields common to\n");
+
+	fprintf(ofp, "# all events.  They don't necessarily correspond to "
+		"the 'common_*' fields\n");
+
+	fprintf(ofp, "# in the format files.  Those fields not available as "
+		"handler params can\n");
+
+	fprintf(ofp, "# be retrieved using Perl functions of the form "
+		"common_*($context).\n");
+
+	fprintf(ofp, "# See Context.pm for the list of available "
+		"functions.\n\n");
+
+	fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
+		"Perf-Trace-Util/lib\";\n");
+
+	fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
+	fprintf(ofp, "use Perf::Trace::Core;\n");
+	fprintf(ofp, "use Perf::Trace::Context;\n");
+	fprintf(ofp, "use Perf::Trace::Util;\n\n");
+
+	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
+	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+	fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+	my $callchain = shift;\n\
+	for my $node (@$callchain)\n\
+	{\n\
+		if(exists $node->{sym})\n\
+		{\n\
+			printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+		}\n\
+		else\n\
+		{\n\
+			printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+		}\n\
+	}\n\
+}\n\n\
+");
+
+
+	while ((event = trace_find_next_event(pevent, event))) {
+		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
+		fprintf(ofp, "\tmy (");
+
+		fprintf(ofp, "$event_name, ");
+		fprintf(ofp, "$context, ");
+		fprintf(ofp, "$common_cpu, ");
+		fprintf(ofp, "$common_secs, ");
+		fprintf(ofp, "$common_nsecs,\n");
+		fprintf(ofp, "\t    $common_pid, ");
+		fprintf(ofp, "$common_comm, ");
+		fprintf(ofp, "$common_callchain,\n\t    ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t    ");
+
+			fprintf(ofp, "$%s", f->name);
+		}
+		fprintf(ofp, ") = @_;\n\n");
+
+		fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+			"$common_secs, $common_nsecs,\n\t             "
+			"$common_pid, $common_comm, $common_callchain);\n\n");
+
+		fprintf(ofp, "\tprintf(\"");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (count && count % 4 == 0) {
+				fprintf(ofp, "\".\n\t       \"");
+			}
+			count++;
+
+			fprintf(ofp, "%s=", f->name);
+			if (f->flags & FIELD_IS_STRING ||
+			    f->flags & FIELD_IS_FLAG ||
+			    f->flags & FIELD_IS_SYMBOLIC)
+				fprintf(ofp, "%%s");
+			else if (f->flags & FIELD_IS_SIGNED)
+				fprintf(ofp, "%%d");
+			else
+				fprintf(ofp, "%%u");
+		}
+
+		fprintf(ofp, "\\n\",\n\t       ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t       ");
+
+			if (f->flags & FIELD_IS_FLAG) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t       ");
+					count = 4;
+				}
+				fprintf(ofp, "flag_str(\"");
+				fprintf(ofp, "%s::%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", $%s)", f->name,
+					f->name);
+			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t       ");
+					count = 4;
+				}
+				fprintf(ofp, "symbol_str(\"");
+				fprintf(ofp, "%s::%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", $%s)", f->name,
+					f->name);
+			} else
+				fprintf(ofp, "$%s", f->name);
+		}
+
+		fprintf(ofp, ");\n\n");
+
+		fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
+		fprintf(ofp, "}\n\n");
+	}
+
+	fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
+		"$common_cpu, $common_secs, $common_nsecs,\n\t    "
+		"$common_pid, $common_comm, $common_callchain) = @_;\n\n");
+
+	fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+		"$common_secs, $common_nsecs,\n\t             $common_pid, "
+		"$common_comm, $common_callchain);\n");
+	fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+	fprintf(ofp, "}\n\n");
+
+	fprintf(ofp, "sub print_header\n{\n"
+		"\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
+		"\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t       "
+		"$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n");
+
+	fprintf(ofp,
+		"\n# Packed byte string args of process_event():\n"
+		"#\n"
+		"# $event:\tunion perf_event\tutil/event.h\n"
+		"# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n"
+		"# $sample:\tstruct perf_sample\tutil/event.h\n"
+		"# $raw_data:\tperf_sample->raw_data\tutil/event.h\n"
+		"\n"
+		"sub process_event\n"
+		"{\n"
+		"\tmy ($event, $attr, $sample, $raw_data) = @_;\n"
+		"\n"
+		"\tmy @event\t= unpack(\"LSS\", $event);\n"
+		"\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n"
+		"\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n"
+		"\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n"
+		"\n"
+		"\tuse Data::Dumper;\n"
+		"\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n"
+		"}\n");
+
+	fclose(ofp);
+
+	fprintf(stderr, "generated Perl script: %s\n", fname);
+
+	return 0;
+}
+
+struct scripting_ops perl_scripting_ops = {
+	.name = "Perl",
+	.start_script = perl_start_script,
+	.flush_script = perl_flush_script,
+	.stop_script = perl_stop_script,
+	.process_event = perl_process_event,
+	.generate_script = perl_generate_script,
+};
diff --git a/util/scripting-engines/trace-event-python.c b/util/scripting-engines/trace-event-python.c
new file mode 100644
index 0000000..10dd5fc
--- /dev/null
+++ b/util/scripting-engines/trace-event-python.c
@@ -0,0 +1,1551 @@
+/*
+ * trace-event-python.  Feed trace events to an embedded Python interpreter.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <Python.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/time64.h>
+
+#include "../../perf.h"
+#include "../debug.h"
+#include "../callchain.h"
+#include "../evsel.h"
+#include "../util.h"
+#include "../event.h"
+#include "../thread.h"
+#include "../comm.h"
+#include "../machine.h"
+#include "../db-export.h"
+#include "../thread-stack.h"
+#include "../trace-event.h"
+#include "../call-path.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "print_binary.h"
+#include "stat.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+  PyString_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+  PyString_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+  PyString_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+  PyInt_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+  PyInt_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+  PyCObject_FromVoidPtr((arg1), (arg2))
+
+PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyUnicode_FromString(arg) \
+  PyUnicode_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+  PyUnicode_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+  PyBytes_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+  PyLong_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+  PyLong_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+  PyCapsule_New((arg1), (arg2), (arg3))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
+
+#define TRACE_EVENT_TYPE_MAX				\
+	((1 << (sizeof(unsigned short) * 8)) - 1)
+
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+#define MAX_FIELDS	64
+#define N_COMMON_FIELDS	7
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static PyObject *main_module, *main_dict;
+
+struct tables {
+	struct db_export	dbe;
+	PyObject		*evsel_handler;
+	PyObject		*machine_handler;
+	PyObject		*thread_handler;
+	PyObject		*comm_handler;
+	PyObject		*comm_thread_handler;
+	PyObject		*dso_handler;
+	PyObject		*symbol_handler;
+	PyObject		*branch_type_handler;
+	PyObject		*sample_handler;
+	PyObject		*call_path_handler;
+	PyObject		*call_return_handler;
+	bool			db_export_mode;
+};
+
+static struct tables tables_global;
+
+static void handler_call_die(const char *handler_name) __noreturn;
+static void handler_call_die(const char *handler_name)
+{
+	PyErr_Print();
+	Py_FatalError("problem in Python trace event handler");
+	// Py_FatalError does not return
+	// but we have to make the compiler happy
+	abort();
+}
+
+/*
+ * Insert val into into the dictionary and decrement the reference counter.
+ * This is necessary for dictionaries since PyDict_SetItemString() does not
+ * steal a reference, as opposed to PyTuple_SetItem().
+ */
+static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val)
+{
+	PyDict_SetItemString(dict, key, val);
+	Py_DECREF(val);
+}
+
+static PyObject *get_handler(const char *handler_name)
+{
+	PyObject *handler;
+
+	handler = PyDict_GetItemString(main_dict, handler_name);
+	if (handler && !PyCallable_Check(handler))
+		return NULL;
+	return handler;
+}
+
+static int get_argument_count(PyObject *handler)
+{
+	int arg_count = 0;
+
+	/*
+	 * The attribute for the code object is func_code in Python 2,
+	 * whereas it is __code__ in Python 3.0+.
+	 */
+	PyObject *code_obj = PyObject_GetAttrString(handler,
+		"func_code");
+	if (PyErr_Occurred()) {
+		PyErr_Clear();
+		code_obj = PyObject_GetAttrString(handler,
+			"__code__");
+	}
+	PyErr_Clear();
+	if (code_obj) {
+		PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
+			"co_argcount");
+		if (arg_count_obj) {
+			arg_count = (int) _PyLong_AsLong(arg_count_obj);
+			Py_DECREF(arg_count_obj);
+		}
+		Py_DECREF(code_obj);
+	}
+	return arg_count;
+}
+
+static void call_object(PyObject *handler, PyObject *args, const char *die_msg)
+{
+	PyObject *retval;
+
+	retval = PyObject_CallObject(handler, args);
+	if (retval == NULL)
+		handler_call_die(die_msg);
+	Py_DECREF(retval);
+}
+
+static void try_call_object(const char *handler_name, PyObject *args)
+{
+	PyObject *handler;
+
+	handler = get_handler(handler_name);
+	if (handler)
+		call_object(handler, args, handler_name);
+}
+
+static void define_value(enum print_arg_type field_type,
+			 const char *ev_name,
+			 const char *field_name,
+			 const char *field_value,
+			 const char *field_str)
+{
+	const char *handler_name = "define_flag_value";
+	PyObject *t;
+	unsigned long long value;
+	unsigned n = 0;
+
+	if (field_type == PRINT_SYMBOL)
+		handler_name = "define_symbolic_value";
+
+	t = PyTuple_New(4);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	value = eval_flag(field_value);
+
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(value));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str));
+
+	try_call_object(handler_name, t);
+
+	Py_DECREF(t);
+}
+
+static void define_values(enum print_arg_type field_type,
+			  struct print_flag_sym *field,
+			  const char *ev_name,
+			  const char *field_name)
+{
+	define_value(field_type, ev_name, field_name, field->value,
+		     field->str);
+
+	if (field->next)
+		define_values(field_type, field->next, ev_name, field_name);
+}
+
+static void define_field(enum print_arg_type field_type,
+			 const char *ev_name,
+			 const char *field_name,
+			 const char *delim)
+{
+	const char *handler_name = "define_flag_field";
+	PyObject *t;
+	unsigned n = 0;
+
+	if (field_type == PRINT_SYMBOL)
+		handler_name = "define_symbolic_field";
+
+	if (field_type == PRINT_FLAGS)
+		t = PyTuple_New(3);
+	else
+		t = PyTuple_New(2);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+	if (field_type == PRINT_FLAGS)
+		PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
+
+	try_call_object(handler_name, t);
+
+	Py_DECREF(t);
+}
+
+static void define_event_symbols(struct event_format *event,
+				 const char *ev_name,
+				 struct print_arg *args)
+{
+	if (args == NULL)
+		return;
+
+	switch (args->type) {
+	case PRINT_NULL:
+		break;
+	case PRINT_ATOM:
+		define_value(PRINT_FLAGS, ev_name, cur_field_name, "0",
+			     args->atom.atom);
+		zero_flag_atom = 0;
+		break;
+	case PRINT_FIELD:
+		free(cur_field_name);
+		cur_field_name = strdup(args->field.name);
+		break;
+	case PRINT_FLAGS:
+		define_event_symbols(event, ev_name, args->flags.field);
+		define_field(PRINT_FLAGS, ev_name, cur_field_name,
+			     args->flags.delim);
+		define_values(PRINT_FLAGS, args->flags.flags, ev_name,
+			      cur_field_name);
+		break;
+	case PRINT_SYMBOL:
+		define_event_symbols(event, ev_name, args->symbol.field);
+		define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+		define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
+			      cur_field_name);
+		break;
+	case PRINT_HEX:
+	case PRINT_HEX_STR:
+		define_event_symbols(event, ev_name, args->hex.field);
+		define_event_symbols(event, ev_name, args->hex.size);
+		break;
+	case PRINT_INT_ARRAY:
+		define_event_symbols(event, ev_name, args->int_array.field);
+		define_event_symbols(event, ev_name, args->int_array.count);
+		define_event_symbols(event, ev_name, args->int_array.el_size);
+		break;
+	case PRINT_STRING:
+		break;
+	case PRINT_TYPE:
+		define_event_symbols(event, ev_name, args->typecast.item);
+		break;
+	case PRINT_OP:
+		if (strcmp(args->op.op, ":") == 0)
+			zero_flag_atom = 1;
+		define_event_symbols(event, ev_name, args->op.left);
+		define_event_symbols(event, ev_name, args->op.right);
+		break;
+	default:
+		/* gcc warns for these? */
+	case PRINT_BSTRING:
+	case PRINT_DYNAMIC_ARRAY:
+	case PRINT_DYNAMIC_ARRAY_LEN:
+	case PRINT_FUNC:
+	case PRINT_BITMASK:
+		/* we should warn... */
+		return;
+	}
+
+	if (args->next)
+		define_event_symbols(event, ev_name, args->next);
+}
+
+static PyObject *get_field_numeric_entry(struct event_format *event,
+		struct format_field *field, void *data)
+{
+	bool is_array = field->flags & FIELD_IS_ARRAY;
+	PyObject *obj = NULL, *list = NULL;
+	unsigned long long val;
+	unsigned int item_size, n_items, i;
+
+	if (is_array) {
+		list = PyList_New(field->arraylen);
+		item_size = field->size / field->arraylen;
+		n_items = field->arraylen;
+	} else {
+		item_size = field->size;
+		n_items = 1;
+	}
+
+	for (i = 0; i < n_items; i++) {
+
+		val = read_size(event, data + field->offset + i * item_size,
+				item_size);
+		if (field->flags & FIELD_IS_SIGNED) {
+			if ((long long)val >= LONG_MIN &&
+					(long long)val <= LONG_MAX)
+				obj = _PyLong_FromLong(val);
+			else
+				obj = PyLong_FromLongLong(val);
+		} else {
+			if (val <= LONG_MAX)
+				obj = _PyLong_FromLong(val);
+			else
+				obj = PyLong_FromUnsignedLongLong(val);
+		}
+		if (is_array)
+			PyList_SET_ITEM(list, i, obj);
+	}
+	if (is_array)
+		obj = list;
+	return obj;
+}
+
+
+static PyObject *python_process_callchain(struct perf_sample *sample,
+					 struct perf_evsel *evsel,
+					 struct addr_location *al)
+{
+	PyObject *pylist;
+
+	pylist = PyList_New(0);
+	if (!pylist)
+		Py_FatalError("couldn't create Python list");
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		goto exit;
+
+	if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+				      sample, NULL, NULL,
+				      scripting_max_stack) != 0) {
+		pr_err("Failed to resolve callchain. Skipping\n");
+		goto exit;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+
+	while (1) {
+		PyObject *pyelem;
+		struct callchain_cursor_node *node;
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+
+		pyelem = PyDict_New();
+		if (!pyelem)
+			Py_FatalError("couldn't create Python dictionary");
+
+
+		pydict_set_item_string_decref(pyelem, "ip",
+				PyLong_FromUnsignedLongLong(node->ip));
+
+		if (node->sym) {
+			PyObject *pysym  = PyDict_New();
+			if (!pysym)
+				Py_FatalError("couldn't create Python dictionary");
+			pydict_set_item_string_decref(pysym, "start",
+					PyLong_FromUnsignedLongLong(node->sym->start));
+			pydict_set_item_string_decref(pysym, "end",
+					PyLong_FromUnsignedLongLong(node->sym->end));
+			pydict_set_item_string_decref(pysym, "binding",
+					_PyLong_FromLong(node->sym->binding));
+			pydict_set_item_string_decref(pysym, "name",
+					_PyUnicode_FromStringAndSize(node->sym->name,
+							node->sym->namelen));
+			pydict_set_item_string_decref(pyelem, "sym", pysym);
+		}
+
+		if (node->map) {
+			struct map *map = node->map;
+			const char *dsoname = "[unknown]";
+			if (map && map->dso) {
+				if (symbol_conf.show_kernel_path && map->dso->long_name)
+					dsoname = map->dso->long_name;
+				else
+					dsoname = map->dso->name;
+			}
+			pydict_set_item_string_decref(pyelem, "dso",
+					_PyUnicode_FromString(dsoname));
+		}
+
+		callchain_cursor_advance(&callchain_cursor);
+		PyList_Append(pylist, pyelem);
+		Py_DECREF(pyelem);
+	}
+
+exit:
+	return pylist;
+}
+
+static PyObject *get_sample_value_as_tuple(struct sample_read_value *value)
+{
+	PyObject *t;
+
+	t = PyTuple_New(2);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+	PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id));
+	PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value));
+	return t;
+}
+
+static void set_sample_read_in_dict(PyObject *dict_sample,
+					 struct perf_sample *sample,
+					 struct perf_evsel *evsel)
+{
+	u64 read_format = evsel->attr.read_format;
+	PyObject *values;
+	unsigned int i;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		pydict_set_item_string_decref(dict_sample, "time_enabled",
+			PyLong_FromUnsignedLongLong(sample->read.time_enabled));
+	}
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		pydict_set_item_string_decref(dict_sample, "time_running",
+			PyLong_FromUnsignedLongLong(sample->read.time_running));
+	}
+
+	if (read_format & PERF_FORMAT_GROUP)
+		values = PyList_New(sample->read.group.nr);
+	else
+		values = PyList_New(1);
+
+	if (!values)
+		Py_FatalError("couldn't create Python list");
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		for (i = 0; i < sample->read.group.nr; i++) {
+			PyObject *t = get_sample_value_as_tuple(&sample->read.group.values[i]);
+			PyList_SET_ITEM(values, i, t);
+		}
+	} else {
+		PyObject *t = get_sample_value_as_tuple(&sample->read.one);
+		PyList_SET_ITEM(values, 0, t);
+	}
+	pydict_set_item_string_decref(dict_sample, "values", values);
+}
+
+static PyObject *get_perf_sample_dict(struct perf_sample *sample,
+					 struct perf_evsel *evsel,
+					 struct addr_location *al,
+					 PyObject *callchain)
+{
+	PyObject *dict, *dict_sample;
+
+	dict = PyDict_New();
+	if (!dict)
+		Py_FatalError("couldn't create Python dictionary");
+
+	dict_sample = PyDict_New();
+	if (!dict_sample)
+		Py_FatalError("couldn't create Python dictionary");
+
+	pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
+	pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize(
+			(const char *)&evsel->attr, sizeof(evsel->attr)));
+
+	pydict_set_item_string_decref(dict_sample, "pid",
+			_PyLong_FromLong(sample->pid));
+	pydict_set_item_string_decref(dict_sample, "tid",
+			_PyLong_FromLong(sample->tid));
+	pydict_set_item_string_decref(dict_sample, "cpu",
+			_PyLong_FromLong(sample->cpu));
+	pydict_set_item_string_decref(dict_sample, "ip",
+			PyLong_FromUnsignedLongLong(sample->ip));
+	pydict_set_item_string_decref(dict_sample, "time",
+			PyLong_FromUnsignedLongLong(sample->time));
+	pydict_set_item_string_decref(dict_sample, "period",
+			PyLong_FromUnsignedLongLong(sample->period));
+	pydict_set_item_string_decref(dict_sample, "phys_addr",
+			PyLong_FromUnsignedLongLong(sample->phys_addr));
+	set_sample_read_in_dict(dict_sample, sample, evsel);
+	pydict_set_item_string_decref(dict, "sample", dict_sample);
+
+	pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
+			(const char *)sample->raw_data, sample->raw_size));
+	pydict_set_item_string_decref(dict, "comm",
+			_PyUnicode_FromString(thread__comm_str(al->thread)));
+	if (al->map) {
+		pydict_set_item_string_decref(dict, "dso",
+			_PyUnicode_FromString(al->map->dso->name));
+	}
+	if (al->sym) {
+		pydict_set_item_string_decref(dict, "symbol",
+			_PyUnicode_FromString(al->sym->name));
+	}
+
+	pydict_set_item_string_decref(dict, "callchain", callchain);
+
+	return dict;
+}
+
+static void python_process_tracepoint(struct perf_sample *sample,
+				      struct perf_evsel *evsel,
+				      struct addr_location *al)
+{
+	struct event_format *event = evsel->tp_format;
+	PyObject *handler, *context, *t, *obj = NULL, *callchain;
+	PyObject *dict = NULL, *all_entries_dict = NULL;
+	static char handler_name[256];
+	struct format_field *field;
+	unsigned long s, ns;
+	unsigned n = 0;
+	int pid;
+	int cpu = sample->cpu;
+	void *data = sample->raw_data;
+	unsigned long long nsecs = sample->time;
+	const char *comm = thread__comm_str(al->thread);
+	const char *default_handler_name = "trace_unhandled";
+
+	if (!event) {
+		snprintf(handler_name, sizeof(handler_name),
+			 "ug! no event found for type %" PRIu64, (u64)evsel->attr.config);
+		Py_FatalError(handler_name);
+	}
+
+	pid = raw_field_value(event, "common_pid", data);
+
+	sprintf(handler_name, "%s__%s", event->system, event->name);
+
+	if (!test_and_set_bit(event->id, events_defined))
+		define_event_symbols(event, handler_name, event->print_fmt.args);
+
+	handler = get_handler(handler_name);
+	if (!handler) {
+		handler = get_handler(default_handler_name);
+		if (!handler)
+			return;
+		dict = PyDict_New();
+		if (!dict)
+			Py_FatalError("couldn't create Python dict");
+	}
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
+
+	scripting_context->event_data = data;
+	scripting_context->pevent = evsel->tp_format->pevent;
+
+	context = _PyCapsule_New(scripting_context, NULL, NULL);
+
+	PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name));
+	PyTuple_SetItem(t, n++, context);
+
+	/* ip unwinding */
+	callchain = python_process_callchain(sample, evsel, al);
+	/* Need an additional reference for the perf_sample dict */
+	Py_INCREF(callchain);
+
+	if (!dict) {
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(s));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(ns));
+		PyTuple_SetItem(t, n++, _PyLong_FromLong(pid));
+		PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm));
+		PyTuple_SetItem(t, n++, callchain);
+	} else {
+		pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu));
+		pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s));
+		pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns));
+		pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid));
+		pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm));
+		pydict_set_item_string_decref(dict, "common_callchain", callchain);
+	}
+	for (field = event->format.fields; field; field = field->next) {
+		unsigned int offset, len;
+		unsigned long long val;
+
+		if (field->flags & FIELD_IS_ARRAY) {
+			offset = field->offset;
+			len    = field->size;
+			if (field->flags & FIELD_IS_DYNAMIC) {
+				val     = pevent_read_number(scripting_context->pevent,
+							     data + offset, len);
+				offset  = val;
+				len     = offset >> 16;
+				offset &= 0xffff;
+			}
+			if (field->flags & FIELD_IS_STRING &&
+			    is_printable_array(data + offset, len)) {
+				obj = _PyUnicode_FromString((char *) data + offset);
+			} else {
+				obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+				field->flags &= ~FIELD_IS_STRING;
+			}
+		} else { /* FIELD_IS_NUMERIC */
+			obj = get_field_numeric_entry(event, field, data);
+		}
+		if (!dict)
+			PyTuple_SetItem(t, n++, obj);
+		else
+			pydict_set_item_string_decref(dict, field->name, obj);
+
+	}
+
+	if (dict)
+		PyTuple_SetItem(t, n++, dict);
+
+	if (get_argument_count(handler) == (int) n + 1) {
+		all_entries_dict = get_perf_sample_dict(sample, evsel, al,
+			callchain);
+		PyTuple_SetItem(t, n++,	all_entries_dict);
+	} else {
+		Py_DECREF(callchain);
+	}
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	if (!dict) {
+		call_object(handler, t, handler_name);
+	} else {
+		call_object(handler, t, default_handler_name);
+		Py_DECREF(dict);
+	}
+
+	Py_XDECREF(all_entries_dict);
+	Py_DECREF(t);
+}
+
+static PyObject *tuple_new(unsigned int sz)
+{
+	PyObject *t;
+
+	t = PyTuple_New(sz);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+	return t;
+}
+
+static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
+{
+#if BITS_PER_LONG == 64
+	return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+#endif
+#if BITS_PER_LONG == 32
+	return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val));
+#endif
+}
+
+static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val)
+{
+	return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+}
+
+static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
+{
+	return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
+}
+
+static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(2);
+
+	tuple_set_u64(t, 0, evsel->db_id);
+	tuple_set_string(t, 1, perf_evsel__name(evsel));
+
+	call_object(tables->evsel_handler, t, "evsel_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_machine(struct db_export *dbe,
+				 struct machine *machine)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(3);
+
+	tuple_set_u64(t, 0, machine->db_id);
+	tuple_set_s32(t, 1, machine->pid);
+	tuple_set_string(t, 2, machine->root_dir ? machine->root_dir : "");
+
+	call_object(tables->machine_handler, t, "machine_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_thread(struct db_export *dbe, struct thread *thread,
+				u64 main_thread_db_id, struct machine *machine)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(5);
+
+	tuple_set_u64(t, 0, thread->db_id);
+	tuple_set_u64(t, 1, machine->db_id);
+	tuple_set_u64(t, 2, main_thread_db_id);
+	tuple_set_s32(t, 3, thread->pid_);
+	tuple_set_s32(t, 4, thread->tid);
+
+	call_object(tables->thread_handler, t, "thread_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_comm(struct db_export *dbe, struct comm *comm)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(2);
+
+	tuple_set_u64(t, 0, comm->db_id);
+	tuple_set_string(t, 1, comm__str(comm));
+
+	call_object(tables->comm_handler, t, "comm_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_comm_thread(struct db_export *dbe, u64 db_id,
+				     struct comm *comm, struct thread *thread)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(3);
+
+	tuple_set_u64(t, 0, db_id);
+	tuple_set_u64(t, 1, comm->db_id);
+	tuple_set_u64(t, 2, thread->db_id);
+
+	call_object(tables->comm_thread_handler, t, "comm_thread_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_dso(struct db_export *dbe, struct dso *dso,
+			     struct machine *machine)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	char sbuild_id[SBUILD_ID_SIZE];
+	PyObject *t;
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
+	t = tuple_new(5);
+
+	tuple_set_u64(t, 0, dso->db_id);
+	tuple_set_u64(t, 1, machine->db_id);
+	tuple_set_string(t, 2, dso->short_name);
+	tuple_set_string(t, 3, dso->long_name);
+	tuple_set_string(t, 4, sbuild_id);
+
+	call_object(tables->dso_handler, t, "dso_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_symbol(struct db_export *dbe, struct symbol *sym,
+				struct dso *dso)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	u64 *sym_db_id = symbol__priv(sym);
+	PyObject *t;
+
+	t = tuple_new(6);
+
+	tuple_set_u64(t, 0, *sym_db_id);
+	tuple_set_u64(t, 1, dso->db_id);
+	tuple_set_u64(t, 2, sym->start);
+	tuple_set_u64(t, 3, sym->end);
+	tuple_set_s32(t, 4, sym->binding);
+	tuple_set_string(t, 5, sym->name);
+
+	call_object(tables->symbol_handler, t, "symbol_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_branch_type(struct db_export *dbe, u32 branch_type,
+				     const char *name)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(2);
+
+	tuple_set_s32(t, 0, branch_type);
+	tuple_set_string(t, 1, name);
+
+	call_object(tables->branch_type_handler, t, "branch_type_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_sample(struct db_export *dbe,
+				struct export_sample *es)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(22);
+
+	tuple_set_u64(t, 0, es->db_id);
+	tuple_set_u64(t, 1, es->evsel->db_id);
+	tuple_set_u64(t, 2, es->al->machine->db_id);
+	tuple_set_u64(t, 3, es->al->thread->db_id);
+	tuple_set_u64(t, 4, es->comm_db_id);
+	tuple_set_u64(t, 5, es->dso_db_id);
+	tuple_set_u64(t, 6, es->sym_db_id);
+	tuple_set_u64(t, 7, es->offset);
+	tuple_set_u64(t, 8, es->sample->ip);
+	tuple_set_u64(t, 9, es->sample->time);
+	tuple_set_s32(t, 10, es->sample->cpu);
+	tuple_set_u64(t, 11, es->addr_dso_db_id);
+	tuple_set_u64(t, 12, es->addr_sym_db_id);
+	tuple_set_u64(t, 13, es->addr_offset);
+	tuple_set_u64(t, 14, es->sample->addr);
+	tuple_set_u64(t, 15, es->sample->period);
+	tuple_set_u64(t, 16, es->sample->weight);
+	tuple_set_u64(t, 17, es->sample->transaction);
+	tuple_set_u64(t, 18, es->sample->data_src);
+	tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
+	tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+	tuple_set_u64(t, 21, es->call_path_id);
+
+	call_object(tables->sample_handler, t, "sample_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+	u64 parent_db_id, sym_db_id;
+
+	parent_db_id = cp->parent ? cp->parent->db_id : 0;
+	sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0;
+
+	t = tuple_new(4);
+
+	tuple_set_u64(t, 0, cp->db_id);
+	tuple_set_u64(t, 1, parent_db_id);
+	tuple_set_u64(t, 2, sym_db_id);
+	tuple_set_u64(t, 3, cp->ip);
+
+	call_object(tables->call_path_handler, t, "call_path_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_export_call_return(struct db_export *dbe,
+				     struct call_return *cr)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
+	PyObject *t;
+
+	t = tuple_new(11);
+
+	tuple_set_u64(t, 0, cr->db_id);
+	tuple_set_u64(t, 1, cr->thread->db_id);
+	tuple_set_u64(t, 2, comm_db_id);
+	tuple_set_u64(t, 3, cr->cp->db_id);
+	tuple_set_u64(t, 4, cr->call_time);
+	tuple_set_u64(t, 5, cr->return_time);
+	tuple_set_u64(t, 6, cr->branch_count);
+	tuple_set_u64(t, 7, cr->call_ref);
+	tuple_set_u64(t, 8, cr->return_ref);
+	tuple_set_u64(t, 9, cr->cp->parent->db_id);
+	tuple_set_s32(t, 10, cr->flags);
+
+	call_object(tables->call_return_handler, t, "call_return_table");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
+static int python_process_call_return(struct call_return *cr, void *data)
+{
+	struct db_export *dbe = data;
+
+	return db_export__call_return(dbe, cr);
+}
+
+static void python_process_general_event(struct perf_sample *sample,
+					 struct perf_evsel *evsel,
+					 struct addr_location *al)
+{
+	PyObject *handler, *t, *dict, *callchain;
+	static char handler_name[64];
+	unsigned n = 0;
+
+	snprintf(handler_name, sizeof(handler_name), "%s", "process_event");
+
+	handler = get_handler(handler_name);
+	if (!handler)
+		return;
+
+	/*
+	 * Use the MAX_FIELDS to make the function expandable, though
+	 * currently there is only one item for the tuple.
+	 */
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	/* ip unwinding */
+	callchain = python_process_callchain(sample, evsel, al);
+	dict = get_perf_sample_dict(sample, evsel, al, callchain);
+
+	PyTuple_SetItem(t, n++, dict);
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(dict);
+	Py_DECREF(t);
+}
+
+static void python_process_event(union perf_event *event,
+				 struct perf_sample *sample,
+				 struct perf_evsel *evsel,
+				 struct addr_location *al)
+{
+	struct tables *tables = &tables_global;
+
+	switch (evsel->attr.type) {
+	case PERF_TYPE_TRACEPOINT:
+		python_process_tracepoint(sample, evsel, al);
+		break;
+	/* Reserve for future process_hw/sw/raw APIs */
+	default:
+		if (tables->db_export_mode)
+			db_export__sample(&tables->dbe, event, sample, evsel, al);
+		else
+			python_process_general_event(sample, evsel, al);
+	}
+}
+
+static void get_handler_name(char *str, size_t size,
+			     struct perf_evsel *evsel)
+{
+	char *p = str;
+
+	scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+
+	while ((p = strchr(p, ':'))) {
+		*p = '_';
+		p++;
+	}
+}
+
+static void
+process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp,
+	     struct perf_counts_values *count)
+{
+	PyObject *handler, *t;
+	static char handler_name[256];
+	int n = 0;
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	get_handler_name(handler_name, sizeof(handler_name),
+			 counter);
+
+	handler = get_handler(handler_name);
+	if (!handler) {
+		pr_debug("can't find python handler %s\n", handler_name);
+		return;
+	}
+
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+	PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
+
+	tuple_set_u64(t, n++, tstamp);
+	tuple_set_u64(t, n++, count->val);
+	tuple_set_u64(t, n++, count->ena);
+	tuple_set_u64(t, n++, count->run);
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+				struct perf_evsel *counter, u64 tstamp)
+{
+	struct thread_map *threads = counter->threads;
+	struct cpu_map *cpus = counter->cpus;
+	int cpu, thread;
+
+	if (config->aggr_mode == AGGR_GLOBAL) {
+		process_stat(counter, -1, -1, tstamp,
+			     &counter->counts->aggr);
+		return;
+	}
+
+	for (thread = 0; thread < threads->nr; thread++) {
+		for (cpu = 0; cpu < cpus->nr; cpu++) {
+			process_stat(counter, cpus->map[cpu],
+				     thread_map__pid(threads, thread), tstamp,
+				     perf_counts(counter->counts, cpu, thread));
+		}
+	}
+}
+
+static void python_process_stat_interval(u64 tstamp)
+{
+	PyObject *handler, *t;
+	static const char handler_name[] = "stat__interval";
+	int n = 0;
+
+	t = PyTuple_New(MAX_FIELDS);
+	if (!t)
+		Py_FatalError("couldn't create Python tuple");
+
+	handler = get_handler(handler_name);
+	if (!handler) {
+		pr_debug("can't find python handler %s\n", handler_name);
+		return;
+	}
+
+	tuple_set_u64(t, n++, tstamp);
+
+	if (_PyTuple_Resize(&t, n) == -1)
+		Py_FatalError("error resizing Python tuple");
+
+	call_object(handler, t, handler_name);
+
+	Py_DECREF(t);
+}
+
+static int run_start_sub(void)
+{
+	main_module = PyImport_AddModule("__main__");
+	if (main_module == NULL)
+		return -1;
+	Py_INCREF(main_module);
+
+	main_dict = PyModule_GetDict(main_module);
+	if (main_dict == NULL)
+		goto error;
+	Py_INCREF(main_dict);
+
+	try_call_object("trace_begin", NULL);
+
+	return 0;
+
+error:
+	Py_XDECREF(main_dict);
+	Py_XDECREF(main_module);
+	return -1;
+}
+
+#define SET_TABLE_HANDLER_(name, handler_name, table_name) do {		\
+	tables->handler_name = get_handler(#table_name);		\
+	if (tables->handler_name)					\
+		tables->dbe.export_ ## name = python_export_ ## name;	\
+} while (0)
+
+#define SET_TABLE_HANDLER(name) \
+	SET_TABLE_HANDLER_(name, name ## _handler, name ## _table)
+
+static void set_table_handlers(struct tables *tables)
+{
+	const char *perf_db_export_mode = "perf_db_export_mode";
+	const char *perf_db_export_calls = "perf_db_export_calls";
+	const char *perf_db_export_callchains = "perf_db_export_callchains";
+	PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
+	bool export_calls = false;
+	bool export_callchains = false;
+	int ret;
+
+	memset(tables, 0, sizeof(struct tables));
+	if (db_export__init(&tables->dbe))
+		Py_FatalError("failed to initialize export");
+
+	db_export_mode = PyDict_GetItemString(main_dict, perf_db_export_mode);
+	if (!db_export_mode)
+		return;
+
+	ret = PyObject_IsTrue(db_export_mode);
+	if (ret == -1)
+		handler_call_die(perf_db_export_mode);
+	if (!ret)
+		return;
+
+	/* handle export calls */
+	tables->dbe.crp = NULL;
+	db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
+	if (db_export_calls) {
+		ret = PyObject_IsTrue(db_export_calls);
+		if (ret == -1)
+			handler_call_die(perf_db_export_calls);
+		export_calls = !!ret;
+	}
+
+	if (export_calls) {
+		tables->dbe.crp =
+			call_return_processor__new(python_process_call_return,
+						   &tables->dbe);
+		if (!tables->dbe.crp)
+			Py_FatalError("failed to create calls processor");
+	}
+
+	/* handle export callchains */
+	tables->dbe.cpr = NULL;
+	db_export_callchains = PyDict_GetItemString(main_dict,
+						    perf_db_export_callchains);
+	if (db_export_callchains) {
+		ret = PyObject_IsTrue(db_export_callchains);
+		if (ret == -1)
+			handler_call_die(perf_db_export_callchains);
+		export_callchains = !!ret;
+	}
+
+	if (export_callchains) {
+		/*
+		 * Attempt to use the call path root from the call return
+		 * processor, if the call return processor is in use. Otherwise,
+		 * we allocate a new call path root. This prevents exporting
+		 * duplicate call path ids when both are in use simultaniously.
+		 */
+		if (tables->dbe.crp)
+			tables->dbe.cpr = tables->dbe.crp->cpr;
+		else
+			tables->dbe.cpr = call_path_root__new();
+
+		if (!tables->dbe.cpr)
+			Py_FatalError("failed to create call path root");
+	}
+
+	tables->db_export_mode = true;
+	/*
+	 * Reserve per symbol space for symbol->db_id via symbol__priv()
+	 */
+	symbol_conf.priv_size = sizeof(u64);
+
+	SET_TABLE_HANDLER(evsel);
+	SET_TABLE_HANDLER(machine);
+	SET_TABLE_HANDLER(thread);
+	SET_TABLE_HANDLER(comm);
+	SET_TABLE_HANDLER(comm_thread);
+	SET_TABLE_HANDLER(dso);
+	SET_TABLE_HANDLER(symbol);
+	SET_TABLE_HANDLER(branch_type);
+	SET_TABLE_HANDLER(sample);
+	SET_TABLE_HANDLER(call_path);
+	SET_TABLE_HANDLER(call_return);
+}
+
+#if PY_MAJOR_VERSION < 3
+static void _free_command_line(const char **command_line, int num)
+{
+	free(command_line);
+}
+#else
+static void _free_command_line(wchar_t **command_line, int num)
+{
+	int i;
+	for (i = 0; i < num; i++)
+		PyMem_RawFree(command_line[i]);
+	free(command_line);
+}
+#endif
+
+
+/*
+ * Start trace script
+ */
+static int python_start_script(const char *script, int argc, const char **argv)
+{
+	struct tables *tables = &tables_global;
+#if PY_MAJOR_VERSION < 3
+	const char **command_line;
+#else
+	wchar_t **command_line;
+#endif
+	char buf[PATH_MAX];
+	int i, err = 0;
+	FILE *fp;
+
+#if PY_MAJOR_VERSION < 3
+	command_line = malloc((argc + 1) * sizeof(const char *));
+	command_line[0] = script;
+	for (i = 1; i < argc + 1; i++)
+		command_line[i] = argv[i - 1];
+#else
+	command_line = malloc((argc + 1) * sizeof(wchar_t *));
+	command_line[0] = Py_DecodeLocale(script, NULL);
+	for (i = 1; i < argc + 1; i++)
+		command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+#endif
+
+	Py_Initialize();
+
+#if PY_MAJOR_VERSION < 3
+	initperf_trace_context();
+	PySys_SetArgv(argc + 1, (char **)command_line);
+#else
+	PyInit_perf_trace_context();
+	PySys_SetArgv(argc + 1, command_line);
+#endif
+
+	fp = fopen(script, "r");
+	if (!fp) {
+		sprintf(buf, "Can't open python script \"%s\"", script);
+		perror(buf);
+		err = -1;
+		goto error;
+	}
+
+	err = PyRun_SimpleFile(fp, script);
+	if (err) {
+		fprintf(stderr, "Error running python script %s\n", script);
+		goto error;
+	}
+
+	err = run_start_sub();
+	if (err) {
+		fprintf(stderr, "Error starting python script %s\n", script);
+		goto error;
+	}
+
+	set_table_handlers(tables);
+
+	if (tables->db_export_mode) {
+		err = db_export__branch_types(&tables->dbe);
+		if (err)
+			goto error;
+	}
+
+	_free_command_line(command_line, argc + 1);
+
+	return err;
+error:
+	Py_Finalize();
+	_free_command_line(command_line, argc + 1);
+
+	return err;
+}
+
+static int python_flush_script(void)
+{
+	struct tables *tables = &tables_global;
+
+	return db_export__flush(&tables->dbe);
+}
+
+/*
+ * Stop trace script
+ */
+static int python_stop_script(void)
+{
+	struct tables *tables = &tables_global;
+
+	try_call_object("trace_end", NULL);
+
+	db_export__exit(&tables->dbe);
+
+	Py_XDECREF(main_dict);
+	Py_XDECREF(main_module);
+	Py_Finalize();
+
+	return 0;
+}
+
+static int python_generate_script(struct pevent *pevent, const char *outfile)
+{
+	struct event_format *event = NULL;
+	struct format_field *f;
+	char fname[PATH_MAX];
+	int not_first, count;
+	FILE *ofp;
+
+	sprintf(fname, "%s.py", outfile);
+	ofp = fopen(fname, "w");
+	if (ofp == NULL) {
+		fprintf(stderr, "couldn't open %s\n", fname);
+		return -1;
+	}
+	fprintf(ofp, "# perf script event handlers, "
+		"generated by perf script -g python\n");
+
+	fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+		" License version 2\n\n");
+
+	fprintf(ofp, "# The common_* event handler fields are the most useful "
+		"fields common to\n");
+
+	fprintf(ofp, "# all events.  They don't necessarily correspond to "
+		"the 'common_*' fields\n");
+
+	fprintf(ofp, "# in the format files.  Those fields not available as "
+		"handler params can\n");
+
+	fprintf(ofp, "# be retrieved using Python functions of the form "
+		"common_*(context).\n");
+
+	fprintf(ofp, "# See the perf-script-python Documentation for the list "
+		"of available functions.\n\n");
+
+	fprintf(ofp, "import os\n");
+	fprintf(ofp, "import sys\n\n");
+
+	fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n");
+	fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n");
+	fprintf(ofp, "\nfrom perf_trace_context import *\n");
+	fprintf(ofp, "from Core import *\n\n\n");
+
+	fprintf(ofp, "def trace_begin():\n");
+	fprintf(ofp, "\tprint \"in trace_begin\"\n\n");
+
+	fprintf(ofp, "def trace_end():\n");
+	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
+
+	while ((event = trace_find_next_event(pevent, event))) {
+		fprintf(ofp, "def %s__%s(", event->system, event->name);
+		fprintf(ofp, "event_name, ");
+		fprintf(ofp, "context, ");
+		fprintf(ofp, "common_cpu,\n");
+		fprintf(ofp, "\tcommon_secs, ");
+		fprintf(ofp, "common_nsecs, ");
+		fprintf(ofp, "common_pid, ");
+		fprintf(ofp, "common_comm,\n\t");
+		fprintf(ofp, "common_callchain, ");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t");
+
+			fprintf(ofp, "%s", f->name);
+		}
+		if (not_first++)
+			fprintf(ofp, ", ");
+		if (++count % 5 == 0)
+			fprintf(ofp, "\n\t\t");
+		fprintf(ofp, "perf_sample_dict");
+
+		fprintf(ofp, "):\n");
+
+		fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
+			"common_secs, common_nsecs,\n\t\t\t"
+			"common_pid, common_comm)\n\n");
+
+		fprintf(ofp, "\t\tprint \"");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+			if (count && count % 3 == 0) {
+				fprintf(ofp, "\" \\\n\t\t\"");
+			}
+			count++;
+
+			fprintf(ofp, "%s=", f->name);
+			if (f->flags & FIELD_IS_STRING ||
+			    f->flags & FIELD_IS_FLAG ||
+			    f->flags & FIELD_IS_ARRAY ||
+			    f->flags & FIELD_IS_SYMBOLIC)
+				fprintf(ofp, "%%s");
+			else if (f->flags & FIELD_IS_SIGNED)
+				fprintf(ofp, "%%d");
+			else
+				fprintf(ofp, "%%u");
+		}
+
+		fprintf(ofp, "\" %% \\\n\t\t(");
+
+		not_first = 0;
+		count = 0;
+
+		for (f = event->format.fields; f; f = f->next) {
+			if (not_first++)
+				fprintf(ofp, ", ");
+
+			if (++count % 5 == 0)
+				fprintf(ofp, "\n\t\t");
+
+			if (f->flags & FIELD_IS_FLAG) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t\t");
+					count = 4;
+				}
+				fprintf(ofp, "flag_str(\"");
+				fprintf(ofp, "%s__%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", %s)", f->name,
+					f->name);
+			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+				if ((count - 1) % 5 != 0) {
+					fprintf(ofp, "\n\t\t");
+					count = 4;
+				}
+				fprintf(ofp, "symbol_str(\"");
+				fprintf(ofp, "%s__%s\", ", event->system,
+					event->name);
+				fprintf(ofp, "\"%s\", %s)", f->name,
+					f->name);
+			} else
+				fprintf(ofp, "%s", f->name);
+		}
+
+		fprintf(ofp, ")\n\n");
+
+		fprintf(ofp, "\t\tprint 'Sample: {'+"
+			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+
+		fprintf(ofp, "\t\tfor node in common_callchain:");
+		fprintf(ofp, "\n\t\t\tif 'sym' in node:");
+		fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])");
+		fprintf(ofp, "\n\t\t\telse:");
+		fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n");
+		fprintf(ofp, "\t\tprint \"\\n\"\n\n");
+
+	}
+
+	fprintf(ofp, "def trace_unhandled(event_name, context, "
+		"event_fields_dict, perf_sample_dict):\n");
+
+	fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n");
+	fprintf(ofp, "\t\tprint 'Sample: {'+"
+		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+
+	fprintf(ofp, "def print_header("
+		"event_name, cpu, secs, nsecs, pid, comm):\n"
+		"\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+		"(event_name, cpu, secs, nsecs, pid, comm),\n\n");
+
+	fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
+		"\treturn delimiter.join"
+		"(['%%s=%%s'%%(k,str(v))for k,v in sorted(a_dict.items())])\n");
+
+	fclose(ofp);
+
+	fprintf(stderr, "generated Python script: %s\n", fname);
+
+	return 0;
+}
+
+struct scripting_ops python_scripting_ops = {
+	.name			= "Python",
+	.start_script		= python_start_script,
+	.flush_script		= python_flush_script,
+	.stop_script		= python_stop_script,
+	.process_event		= python_process_event,
+	.process_stat		= python_process_stat,
+	.process_stat_interval	= python_process_stat_interval,
+	.generate_script	= python_generate_script,
+};
diff --git a/util/session.c b/util/session.c
new file mode 100644
index 0000000..f4a7a43
--- /dev/null
+++ b/util/session.c
@@ -0,0 +1,2247 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <traceevent/event-parse.h>
+#include <api/fs/fs.h>
+
+#include <byteswap.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include "evlist.h"
+#include "evsel.h"
+#include "memswap.h"
+#include "session.h"
+#include "tool.h"
+#include "sort.h"
+#include "util.h"
+#include "cpumap.h"
+#include "perf_regs.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "stat.h"
+
+static int perf_session__deliver_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_tool *tool,
+				       u64 file_offset);
+
+static int perf_session__open(struct perf_session *session)
+{
+	struct perf_data *data = session->data;
+
+	if (perf_session__read_header(session) < 0) {
+		pr_err("incompatible file format (rerun with -v to learn more)\n");
+		return -1;
+	}
+
+	if (perf_data__is_pipe(data))
+		return 0;
+
+	if (perf_header__has_feat(&session->header, HEADER_STAT))
+		return 0;
+
+	if (!perf_evlist__valid_sample_type(session->evlist)) {
+		pr_err("non matching sample_type\n");
+		return -1;
+	}
+
+	if (!perf_evlist__valid_sample_id_all(session->evlist)) {
+		pr_err("non matching sample_id_all\n");
+		return -1;
+	}
+
+	if (!perf_evlist__valid_read_format(session->evlist)) {
+		pr_err("non matching read_format\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void perf_session__set_id_hdr_size(struct perf_session *session)
+{
+	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);
+
+	machines__set_id_hdr_size(&session->machines, id_hdr_size);
+}
+
+int perf_session__create_kernel_maps(struct perf_session *session)
+{
+	int ret = machine__create_kernel_maps(&session->machines.host);
+
+	if (ret >= 0)
+		ret = machines__create_guest_kernel_maps(&session->machines);
+	return ret;
+}
+
+static void perf_session__destroy_kernel_maps(struct perf_session *session)
+{
+	machines__destroy_kernel_maps(&session->machines);
+}
+
+static bool perf_session__has_comm_exec(struct perf_session *session)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->attr.comm_exec)
+			return true;
+	}
+
+	return false;
+}
+
+static void perf_session__set_comm_exec(struct perf_session *session)
+{
+	bool comm_exec = perf_session__has_comm_exec(session);
+
+	machines__set_comm_exec(&session->machines, comm_exec);
+}
+
+static int ordered_events__deliver_event(struct ordered_events *oe,
+					 struct ordered_event *event)
+{
+	struct perf_session *session = container_of(oe, struct perf_session,
+						    ordered_events);
+
+	return perf_session__deliver_event(session, event->event,
+					   session->tool, event->file_offset);
+}
+
+struct perf_session *perf_session__new(struct perf_data *data,
+				       bool repipe, struct perf_tool *tool)
+{
+	struct perf_session *session = zalloc(sizeof(*session));
+
+	if (!session)
+		goto out;
+
+	session->repipe = repipe;
+	session->tool   = tool;
+	INIT_LIST_HEAD(&session->auxtrace_index);
+	machines__init(&session->machines);
+	ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
+
+	if (data) {
+		if (perf_data__open(data))
+			goto out_delete;
+
+		session->data = data;
+
+		if (perf_data__is_read(data)) {
+			if (perf_session__open(session) < 0)
+				goto out_close;
+
+			/*
+			 * set session attributes that are present in perf.data
+			 * but not in pipe-mode.
+			 */
+			if (!data->is_pipe) {
+				perf_session__set_id_hdr_size(session);
+				perf_session__set_comm_exec(session);
+			}
+		}
+	} else  {
+		session->machines.host.env = &perf_env;
+	}
+
+	if (!data || perf_data__is_write(data)) {
+		/*
+		 * In O_RDONLY mode this will be performed when reading the
+		 * kernel MMAP event, in perf_event__process_mmap().
+		 */
+		if (perf_session__create_kernel_maps(session) < 0)
+			pr_warning("Cannot read kernel map\n");
+	}
+
+	/*
+	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
+	 * processed, so perf_evlist__sample_id_all is not meaningful here.
+	 */
+	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
+	    tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
+		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+		tool->ordered_events = false;
+	}
+
+	return session;
+
+ out_close:
+	perf_data__close(data);
+ out_delete:
+	perf_session__delete(session);
+ out:
+	return NULL;
+}
+
+static void perf_session__delete_threads(struct perf_session *session)
+{
+	machine__delete_threads(&session->machines.host);
+}
+
+void perf_session__delete(struct perf_session *session)
+{
+	if (session == NULL)
+		return;
+	auxtrace__free(session);
+	auxtrace_index__free(&session->auxtrace_index);
+	perf_session__destroy_kernel_maps(session);
+	perf_session__delete_threads(session);
+	perf_env__exit(&session->header.env);
+	machines__exit(&session->machines);
+	if (session->data)
+		perf_data__close(session->data);
+	free(session);
+}
+
+static int process_event_synth_tracing_data_stub(struct perf_tool *tool
+						 __maybe_unused,
+						 union perf_event *event
+						 __maybe_unused,
+						 struct perf_session *session
+						__maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
+					 union perf_event *event __maybe_unused,
+					 struct perf_evlist **pevlist
+					 __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+						 union perf_event *event __maybe_unused,
+						 struct perf_evlist **pevlist
+						 __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_event_update(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample __maybe_unused,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_event_stub(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event __maybe_unused,
+				       struct ordered_events *oe __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_finished_round(struct perf_tool *tool,
+				  union perf_event *event,
+				  struct ordered_events *oe);
+
+static int skipn(int fd, off_t n)
+{
+	char buf[4096];
+	ssize_t ret;
+
+	while (n > 0) {
+		ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+		if (ret <= 0)
+			return ret;
+		n -= ret;
+	}
+
+	return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event,
+				       struct perf_session *session
+				       __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	if (perf_data__is_pipe(session->data))
+		skipn(perf_data__fd(session->data), event->auxtrace.size);
+	return event->auxtrace.size;
+}
+
+static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct perf_session *session __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+
+static
+int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct perf_session *session __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_thread_map(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event __maybe_unused,
+			       struct perf_session *session __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_cpu_map(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event __maybe_unused,
+				   struct perf_session *session __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat_config(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_stat_stub(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event __maybe_unused,
+			     struct perf_session *perf_session
+			     __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event __maybe_unused,
+				   struct perf_session *perf_session
+				   __maybe_unused)
+{
+	if (dump_trace)
+		perf_event__fprintf_stat_round(event, stdout);
+
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+void perf_tool__fill_defaults(struct perf_tool *tool)
+{
+	if (tool->sample == NULL)
+		tool->sample = process_event_sample_stub;
+	if (tool->mmap == NULL)
+		tool->mmap = process_event_stub;
+	if (tool->mmap2 == NULL)
+		tool->mmap2 = process_event_stub;
+	if (tool->comm == NULL)
+		tool->comm = process_event_stub;
+	if (tool->namespaces == NULL)
+		tool->namespaces = process_event_stub;
+	if (tool->fork == NULL)
+		tool->fork = process_event_stub;
+	if (tool->exit == NULL)
+		tool->exit = process_event_stub;
+	if (tool->lost == NULL)
+		tool->lost = perf_event__process_lost;
+	if (tool->lost_samples == NULL)
+		tool->lost_samples = perf_event__process_lost_samples;
+	if (tool->aux == NULL)
+		tool->aux = perf_event__process_aux;
+	if (tool->itrace_start == NULL)
+		tool->itrace_start = perf_event__process_itrace_start;
+	if (tool->context_switch == NULL)
+		tool->context_switch = perf_event__process_switch;
+	if (tool->read == NULL)
+		tool->read = process_event_sample_stub;
+	if (tool->throttle == NULL)
+		tool->throttle = process_event_stub;
+	if (tool->unthrottle == NULL)
+		tool->unthrottle = process_event_stub;
+	if (tool->attr == NULL)
+		tool->attr = process_event_synth_attr_stub;
+	if (tool->event_update == NULL)
+		tool->event_update = process_event_synth_event_update_stub;
+	if (tool->tracing_data == NULL)
+		tool->tracing_data = process_event_synth_tracing_data_stub;
+	if (tool->build_id == NULL)
+		tool->build_id = process_event_op2_stub;
+	if (tool->finished_round == NULL) {
+		if (tool->ordered_events)
+			tool->finished_round = process_finished_round;
+		else
+			tool->finished_round = process_finished_round_stub;
+	}
+	if (tool->id_index == NULL)
+		tool->id_index = process_event_op2_stub;
+	if (tool->auxtrace_info == NULL)
+		tool->auxtrace_info = process_event_op2_stub;
+	if (tool->auxtrace == NULL)
+		tool->auxtrace = process_event_auxtrace_stub;
+	if (tool->auxtrace_error == NULL)
+		tool->auxtrace_error = process_event_op2_stub;
+	if (tool->thread_map == NULL)
+		tool->thread_map = process_event_thread_map_stub;
+	if (tool->cpu_map == NULL)
+		tool->cpu_map = process_event_cpu_map_stub;
+	if (tool->stat_config == NULL)
+		tool->stat_config = process_event_stat_config_stub;
+	if (tool->stat == NULL)
+		tool->stat = process_stat_stub;
+	if (tool->stat_round == NULL)
+		tool->stat_round = process_stat_round_stub;
+	if (tool->time_conv == NULL)
+		tool->time_conv = process_event_op2_stub;
+	if (tool->feature == NULL)
+		tool->feature = process_event_op2_stub;
+}
+
+static void swap_sample_id_all(union perf_event *event, void *data)
+{
+	void *end = (void *) event + event->header.size;
+	int size = end - data;
+
+	BUG_ON(size % sizeof(u64));
+	mem_bswap_64(data, size);
+}
+
+static void perf_event__all64_swap(union perf_event *event,
+				   bool sample_id_all __maybe_unused)
+{
+	struct perf_event_header *hdr = &event->header;
+	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
+}
+
+static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
+{
+	event->comm.pid = bswap_32(event->comm.pid);
+	event->comm.tid = bswap_32(event->comm.tid);
+
+	if (sample_id_all) {
+		void *data = &event->comm.comm;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
+static void perf_event__mmap_swap(union perf_event *event,
+				  bool sample_id_all)
+{
+	event->mmap.pid	  = bswap_32(event->mmap.pid);
+	event->mmap.tid	  = bswap_32(event->mmap.tid);
+	event->mmap.start = bswap_64(event->mmap.start);
+	event->mmap.len	  = bswap_64(event->mmap.len);
+	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
+
+	if (sample_id_all) {
+		void *data = &event->mmap.filename;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
+static void perf_event__mmap2_swap(union perf_event *event,
+				  bool sample_id_all)
+{
+	event->mmap2.pid   = bswap_32(event->mmap2.pid);
+	event->mmap2.tid   = bswap_32(event->mmap2.tid);
+	event->mmap2.start = bswap_64(event->mmap2.start);
+	event->mmap2.len   = bswap_64(event->mmap2.len);
+	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
+	event->mmap2.maj   = bswap_32(event->mmap2.maj);
+	event->mmap2.min   = bswap_32(event->mmap2.min);
+	event->mmap2.ino   = bswap_64(event->mmap2.ino);
+
+	if (sample_id_all) {
+		void *data = &event->mmap2.filename;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
+{
+	event->fork.pid	 = bswap_32(event->fork.pid);
+	event->fork.tid	 = bswap_32(event->fork.tid);
+	event->fork.ppid = bswap_32(event->fork.ppid);
+	event->fork.ptid = bswap_32(event->fork.ptid);
+	event->fork.time = bswap_64(event->fork.time);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->fork + 1);
+}
+
+static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
+{
+	event->read.pid		 = bswap_32(event->read.pid);
+	event->read.tid		 = bswap_32(event->read.tid);
+	event->read.value	 = bswap_64(event->read.value);
+	event->read.time_enabled = bswap_64(event->read.time_enabled);
+	event->read.time_running = bswap_64(event->read.time_running);
+	event->read.id		 = bswap_64(event->read.id);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->read + 1);
+}
+
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+	event->aux.aux_size   = bswap_64(event->aux.aux_size);
+	event->aux.flags      = bswap_64(event->aux.flags);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->aux + 1);
+}
+
+static void perf_event__itrace_start_swap(union perf_event *event,
+					  bool sample_id_all)
+{
+	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
+	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
+static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
+{
+	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+		event->context_switch.next_prev_pid =
+				bswap_32(event->context_switch.next_prev_pid);
+		event->context_switch.next_prev_tid =
+				bswap_32(event->context_switch.next_prev_tid);
+	}
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->context_switch + 1);
+}
+
+static void perf_event__throttle_swap(union perf_event *event,
+				      bool sample_id_all)
+{
+	event->throttle.time	  = bswap_64(event->throttle.time);
+	event->throttle.id	  = bswap_64(event->throttle.id);
+	event->throttle.stream_id = bswap_64(event->throttle.stream_id);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->throttle + 1);
+}
+
+static u8 revbyte(u8 b)
+{
+	int rev = (b >> 4) | ((b & 0xf) << 4);
+	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
+	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
+	return (u8) rev;
+}
+
+/*
+ * XXX this is hack in attempt to carry flags bitfield
+ * through endian village. ABI says:
+ *
+ * Bit-fields are allocated from right to left (least to most significant)
+ * on little-endian implementations and from left to right (most to least
+ * significant) on big-endian implementations.
+ *
+ * The above seems to be byte specific, so we need to reverse each
+ * byte of the bitfield. 'Internet' also says this might be implementation
+ * specific and we probably need proper fix and carry perf_event_attr
+ * bitfield flags in separate data file FEAT_ section. Thought this seems
+ * to work for now.
+ */
+static void swap_bitfield(u8 *p, unsigned len)
+{
+	unsigned i;
+
+	for (i = 0; i < len; i++) {
+		*p = revbyte(*p);
+		p++;
+	}
+}
+
+/* exported for swapping attributes in file header */
+void perf_event__attr_swap(struct perf_event_attr *attr)
+{
+	attr->type		= bswap_32(attr->type);
+	attr->size		= bswap_32(attr->size);
+
+#define bswap_safe(f, n) 					\
+	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
+		       sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) 			\
+do { 						\
+	if (bswap_safe(f, 0))			\
+		attr->f = bswap_##sz(attr->f);	\
+} while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+	bswap_field_64(config);
+	bswap_field_64(sample_period);
+	bswap_field_64(sample_type);
+	bswap_field_64(read_format);
+	bswap_field_32(wakeup_events);
+	bswap_field_32(bp_type);
+	bswap_field_64(bp_addr);
+	bswap_field_64(bp_len);
+	bswap_field_64(branch_sample_type);
+	bswap_field_64(sample_regs_user);
+	bswap_field_32(sample_stack_user);
+	bswap_field_32(aux_watermark);
+	bswap_field_16(sample_max_stack);
+
+	/*
+	 * After read_format are bitfields. Check read_format because
+	 * we are unable to use offsetof on bitfield.
+	 */
+	if (bswap_safe(read_format, 1))
+		swap_bitfield((u8 *) (&attr->read_format + 1),
+			      sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
+}
+
+static void perf_event__hdr_attr_swap(union perf_event *event,
+				      bool sample_id_all __maybe_unused)
+{
+	size_t size;
+
+	perf_event__attr_swap(&event->attr.attr);
+
+	size = event->header.size;
+	size -= (void *)&event->attr.id - (void *)event;
+	mem_bswap_64(event->attr.id, size);
+}
+
+static void perf_event__event_update_swap(union perf_event *event,
+					  bool sample_id_all __maybe_unused)
+{
+	event->event_update.type = bswap_64(event->event_update.type);
+	event->event_update.id   = bswap_64(event->event_update.id);
+}
+
+static void perf_event__event_type_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	event->event_type.event_type.event_id =
+		bswap_64(event->event_type.event_type.event_id);
+}
+
+static void perf_event__tracing_data_swap(union perf_event *event,
+					  bool sample_id_all __maybe_unused)
+{
+	event->tracing_data.size = bswap_32(event->tracing_data.size);
+}
+
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+					   bool sample_id_all __maybe_unused)
+{
+	size_t size;
+
+	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+	size = event->header.size;
+	size -= (void *)&event->auxtrace_info.priv - (void *)event;
+	mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+				      bool sample_id_all __maybe_unused)
+{
+	event->auxtrace.size      = bswap_64(event->auxtrace.size);
+	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
+	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
+	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
+	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
+}
+
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+					    bool sample_id_all __maybe_unused)
+{
+	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
+	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
+	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
+	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
+}
+
+static void perf_event__thread_map_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	unsigned i;
+
+	event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+	for (i = 0; i < event->thread_map.nr; i++)
+		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+				     bool sample_id_all __maybe_unused)
+{
+	struct cpu_map_data *data = &event->cpu_map.data;
+	struct cpu_map_entries *cpus;
+	struct cpu_map_mask *mask;
+	unsigned i;
+
+	data->type = bswap_64(data->type);
+
+	switch (data->type) {
+	case PERF_CPU_MAP__CPUS:
+		cpus = (struct cpu_map_entries *)data->data;
+
+		cpus->nr = bswap_16(cpus->nr);
+
+		for (i = 0; i < cpus->nr; i++)
+			cpus->cpu[i] = bswap_16(cpus->cpu[i]);
+		break;
+	case PERF_CPU_MAP__MASK:
+		mask = (struct cpu_map_mask *) data->data;
+
+		mask->nr = bswap_16(mask->nr);
+		mask->long_size = bswap_16(mask->long_size);
+
+		switch (mask->long_size) {
+		case 4: mem_bswap_32(&mask->mask, mask->nr); break;
+		case 8: mem_bswap_64(&mask->mask, mask->nr); break;
+		default:
+			pr_err("cpu_map swap: unsupported long size\n");
+		}
+	default:
+		break;
+	}
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+					 bool sample_id_all __maybe_unused)
+{
+	u64 size;
+
+	size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+	size += 1; /* nr item itself */
+	mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+				  bool sample_id_all __maybe_unused)
+{
+	event->stat.id     = bswap_64(event->stat.id);
+	event->stat.thread = bswap_32(event->stat.thread);
+	event->stat.cpu    = bswap_32(event->stat.cpu);
+	event->stat.val    = bswap_64(event->stat.val);
+	event->stat.ena    = bswap_64(event->stat.ena);
+	event->stat.run    = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+					bool sample_id_all __maybe_unused)
+{
+	event->stat_round.type = bswap_64(event->stat_round.type);
+	event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
+typedef void (*perf_event__swap_op)(union perf_event *event,
+				    bool sample_id_all);
+
+static perf_event__swap_op perf_event__swap_ops[] = {
+	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
+	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap,
+	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
+	[PERF_RECORD_FORK]		  = perf_event__task_swap,
+	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
+	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
+	[PERF_RECORD_READ]		  = perf_event__read_swap,
+	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
+	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
+	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
+	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
+	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
+	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
+	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
+	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
+	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
+	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
+	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
+	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
+	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
+	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
+	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
+	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
+	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
+	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
+	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
+	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
+	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
+	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
+	[PERF_RECORD_TIME_CONV]		  = perf_event__all64_swap,
+	[PERF_RECORD_HEADER_MAX]	  = NULL,
+};
+
+/*
+ * When perf record finishes a pass on every buffers, it records this pseudo
+ * event.
+ * We record the max timestamp t found in the pass n.
+ * Assuming these timestamps are monotonic across cpus, we know that if
+ * a buffer still has events with timestamps below t, they will be all
+ * available and then read in the pass n + 1.
+ * Hence when we start to read the pass n + 2, we can safely flush every
+ * events with timestamps below t.
+ *
+ *    ============ PASS n =================
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          1          |         2
+ *          2          |         3
+ *          -          |         4  <--- max recorded
+ *
+ *    ============ PASS n + 1 ==============
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          3          |         5
+ *          4          |         6
+ *          5          |         7 <---- max recorded
+ *
+ *      Flush every events below timestamp 4
+ *
+ *    ============ PASS n + 2 ==============
+ *       CPU 0         |   CPU 1
+ *                     |
+ *    cnt1 timestamps  |   cnt2 timestamps
+ *          6          |         8
+ *          7          |         9
+ *          -          |         10
+ *
+ *      Flush every events below timestamp 7
+ *      etc...
+ */
+static int process_finished_round(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct ordered_events *oe)
+{
+	if (dump_trace)
+		fprintf(stdout, "\n");
+	return ordered_events__flush(oe, OE_FLUSH__ROUND);
+}
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+			      u64 timestamp, u64 file_offset)
+{
+	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
+}
+
+static void callchain__lbr_callstack_printf(struct perf_sample *sample)
+{
+	struct ip_callchain *callchain = sample->callchain;
+	struct branch_stack *lbr_stack = sample->branch_stack;
+	u64 kernel_callchain_nr = callchain->nr;
+	unsigned int i;
+
+	for (i = 0; i < kernel_callchain_nr; i++) {
+		if (callchain->ips[i] == PERF_CONTEXT_USER)
+			break;
+	}
+
+	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
+		u64 total_nr;
+		/*
+		 * LBR callstack can only get user call chain,
+		 * i is kernel call chain number,
+		 * 1 is PERF_CONTEXT_USER.
+		 *
+		 * The user call chain is stored in LBR registers.
+		 * LBR are pair registers. The caller is stored
+		 * in "from" register, while the callee is stored
+		 * in "to" register.
+		 * For example, there is a call stack
+		 * "A"->"B"->"C"->"D".
+		 * The LBR registers will recorde like
+		 * "C"->"D", "B"->"C", "A"->"B".
+		 * So only the first "to" register and all "from"
+		 * registers are needed to construct the whole stack.
+		 */
+		total_nr = i + 1 + lbr_stack->nr + 1;
+		kernel_callchain_nr = i + 1;
+
+		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);
+
+		for (i = 0; i < kernel_callchain_nr; i++)
+			printf("..... %2d: %016" PRIx64 "\n",
+			       i, callchain->ips[i]);
+
+		printf("..... %2d: %016" PRIx64 "\n",
+		       (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+		for (i = 0; i < lbr_stack->nr; i++)
+			printf("..... %2d: %016" PRIx64 "\n",
+			       (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+	}
+}
+
+static void callchain__printf(struct perf_evsel *evsel,
+			      struct perf_sample *sample)
+{
+	unsigned int i;
+	struct ip_callchain *callchain = sample->callchain;
+
+	if (perf_evsel__has_branch_callstack(evsel))
+		callchain__lbr_callstack_printf(sample);
+
+	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
+
+	for (i = 0; i < callchain->nr; i++)
+		printf("..... %2d: %016" PRIx64 "\n",
+		       i, callchain->ips[i]);
+}
+
+static void branch_stack__printf(struct perf_sample *sample)
+{
+	uint64_t i;
+
+	printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
+
+	for (i = 0; i < sample->branch_stack->nr; i++) {
+		struct branch_entry *e = &sample->branch_stack->entries[i];
+
+		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+			i, e->from, e->to,
+			(unsigned short)e->flags.cycles,
+			e->flags.mispred ? "M" : " ",
+			e->flags.predicted ? "P" : " ",
+			e->flags.abort ? "A" : " ",
+			e->flags.in_tx ? "T" : " ",
+			(unsigned)e->flags.reserved);
+	}
+}
+
+static void regs_dump__printf(u64 mask, u64 *regs)
+{
+	unsigned rid, i = 0;
+
+	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
+		u64 val = regs[i++];
+
+		printf(".... %-5s 0x%" PRIx64 "\n",
+		       perf_reg_name(rid), val);
+	}
+}
+
+static const char *regs_abi[] = {
+	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
+	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
+	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+};
+
+static inline const char *regs_dump_abi(struct regs_dump *d)
+{
+	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+		return "unknown";
+
+	return regs_abi[d->abi];
+}
+
+static void regs__printf(const char *type, struct regs_dump *regs)
+{
+	u64 mask = regs->mask;
+
+	printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
+	       type,
+	       mask,
+	       regs_dump_abi(regs));
+
+	regs_dump__printf(mask, regs->regs);
+}
+
+static void regs_user__printf(struct perf_sample *sample)
+{
+	struct regs_dump *user_regs = &sample->user_regs;
+
+	if (user_regs->regs)
+		regs__printf("user", user_regs);
+}
+
+static void regs_intr__printf(struct perf_sample *sample)
+{
+	struct regs_dump *intr_regs = &sample->intr_regs;
+
+	if (intr_regs->regs)
+		regs__printf("intr", intr_regs);
+}
+
+static void stack_user__printf(struct stack_dump *dump)
+{
+	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
+	       dump->size, dump->offset);
+}
+
+static void perf_evlist__print_tstamp(struct perf_evlist *evlist,
+				       union perf_event *event,
+				       struct perf_sample *sample)
+{
+	u64 sample_type = __perf_evlist__combined_sample_type(evlist);
+
+	if (event->header.type != PERF_RECORD_SAMPLE &&
+	    !perf_evlist__sample_id_all(evlist)) {
+		fputs("-1 -1 ", stdout);
+		return;
+	}
+
+	if ((sample_type & PERF_SAMPLE_CPU))
+		printf("%u ", sample->cpu);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		printf("%" PRIu64 " ", sample->time);
+}
+
+static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+{
+	printf("... sample_read:\n");
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		printf("...... time enabled %016" PRIx64 "\n",
+		       sample->read.time_enabled);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		printf("...... time running %016" PRIx64 "\n",
+		       sample->read.time_running);
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		u64 i;
+
+		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
+
+		for (i = 0; i < sample->read.group.nr; i++) {
+			struct sample_read_value *value;
+
+			value = &sample->read.group.values[i];
+			printf("..... id %016" PRIx64
+			       ", value %016" PRIx64 "\n",
+			       value->id, value->value);
+		}
+	} else
+		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
+			sample->read.one.id, sample->read.one.value);
+}
+
+static void dump_event(struct perf_evlist *evlist, union perf_event *event,
+		       u64 file_offset, struct perf_sample *sample)
+{
+	if (!dump_trace)
+		return;
+
+	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
+	       file_offset, event->header.size, event->header.type);
+
+	trace_event(event);
+
+	if (sample)
+		perf_evlist__print_tstamp(evlist, event, sample);
+
+	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
+	       event->header.size, perf_event__name(event->header.type));
+}
+
+static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
+			struct perf_sample *sample)
+{
+	u64 sample_type;
+
+	if (!dump_trace)
+		return;
+
+	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
+	       event->header.misc, sample->pid, sample->tid, sample->ip,
+	       sample->period, sample->addr);
+
+	sample_type = evsel->attr.sample_type;
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+		callchain__printf(evsel, sample);
+
+	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
+		branch_stack__printf(sample);
+
+	if (sample_type & PERF_SAMPLE_REGS_USER)
+		regs_user__printf(sample);
+
+	if (sample_type & PERF_SAMPLE_REGS_INTR)
+		regs_intr__printf(sample);
+
+	if (sample_type & PERF_SAMPLE_STACK_USER)
+		stack_user__printf(&sample->user_stack);
+
+	if (sample_type & PERF_SAMPLE_WEIGHT)
+		printf("... weight: %" PRIu64 "\n", sample->weight);
+
+	if (sample_type & PERF_SAMPLE_DATA_SRC)
+		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		printf("... transaction: %" PRIx64 "\n", sample->transaction);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		sample_read__printf(sample, evsel->attr.read_format);
+}
+
+static void dump_read(struct perf_evsel *evsel, union perf_event *event)
+{
+	struct read_event *read_event = &event->read;
+	u64 read_format;
+
+	if (!dump_trace)
+		return;
+
+	printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
+	       evsel ? perf_evsel__name(evsel) : "FAIL",
+	       event->read.value);
+
+	read_format = evsel->attr.read_format;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		printf("... time enabled : %" PRIu64 "\n", read_event->time_enabled);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		printf("... time running : %" PRIu64 "\n", read_event->time_running);
+
+	if (read_format & PERF_FORMAT_ID)
+		printf("... id           : %" PRIu64 "\n", read_event->id);
+}
+
+static struct machine *machines__find_for_cpumode(struct machines *machines,
+					       union perf_event *event,
+					       struct perf_sample *sample)
+{
+	struct machine *machine;
+
+	if (perf_guest &&
+	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+		u32 pid;
+
+		if (event->header.type == PERF_RECORD_MMAP
+		    || event->header.type == PERF_RECORD_MMAP2)
+			pid = event->mmap.pid;
+		else
+			pid = sample->pid;
+
+		machine = machines__find(machines, pid);
+		if (!machine)
+			machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
+		return machine;
+	}
+
+	return &machines->host;
+}
+
+static int deliver_sample_value(struct perf_evlist *evlist,
+				struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct sample_read_value *v,
+				struct machine *machine)
+{
+	struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
+
+	if (sid) {
+		sample->id     = v->id;
+		sample->period = v->value - sid->period;
+		sid->period    = v->value;
+	}
+
+	if (!sid || sid->evsel == NULL) {
+		++evlist->stats.nr_unknown_id;
+		return 0;
+	}
+
+	return tool->sample(tool, event, sample, sid->evsel, machine);
+}
+
+static int deliver_sample_group(struct perf_evlist *evlist,
+				struct perf_tool *tool,
+				union  perf_event *event,
+				struct perf_sample *sample,
+				struct machine *machine)
+{
+	int ret = -EINVAL;
+	u64 i;
+
+	for (i = 0; i < sample->read.group.nr; i++) {
+		ret = deliver_sample_value(evlist, tool, event, sample,
+					   &sample->read.group.values[i],
+					   machine);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int
+ perf_evlist__deliver_sample(struct perf_evlist *evlist,
+			     struct perf_tool *tool,
+			     union  perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_evsel *evsel,
+			     struct machine *machine)
+{
+	/* We know evsel != NULL. */
+	u64 sample_type = evsel->attr.sample_type;
+	u64 read_format = evsel->attr.read_format;
+
+	/* Standard sample delivery. */
+	if (!(sample_type & PERF_SAMPLE_READ))
+		return tool->sample(tool, event, sample, evsel, machine);
+
+	/* For PERF_SAMPLE_READ we have either single or group mode. */
+	if (read_format & PERF_FORMAT_GROUP)
+		return deliver_sample_group(evlist, tool, event, sample,
+					    machine);
+	else
+		return deliver_sample_value(evlist, tool, event, sample,
+					    &sample->read.one, machine);
+}
+
+static int machines__deliver_event(struct machines *machines,
+				   struct perf_evlist *evlist,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct perf_tool *tool, u64 file_offset)
+{
+	struct perf_evsel *evsel;
+	struct machine *machine;
+
+	dump_event(evlist, event, file_offset, sample);
+
+	evsel = perf_evlist__id2evsel(evlist, sample->id);
+
+	machine = machines__find_for_cpumode(machines, event, sample);
+
+	switch (event->header.type) {
+	case PERF_RECORD_SAMPLE:
+		if (evsel == NULL) {
+			++evlist->stats.nr_unknown_id;
+			return 0;
+		}
+		dump_sample(evsel, event, sample);
+		if (machine == NULL) {
+			++evlist->stats.nr_unprocessable_samples;
+			return 0;
+		}
+		return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
+	case PERF_RECORD_MMAP:
+		return tool->mmap(tool, event, sample, machine);
+	case PERF_RECORD_MMAP2:
+		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+			++evlist->stats.nr_proc_map_timeout;
+		return tool->mmap2(tool, event, sample, machine);
+	case PERF_RECORD_COMM:
+		return tool->comm(tool, event, sample, machine);
+	case PERF_RECORD_NAMESPACES:
+		return tool->namespaces(tool, event, sample, machine);
+	case PERF_RECORD_FORK:
+		return tool->fork(tool, event, sample, machine);
+	case PERF_RECORD_EXIT:
+		return tool->exit(tool, event, sample, machine);
+	case PERF_RECORD_LOST:
+		if (tool->lost == perf_event__process_lost)
+			evlist->stats.total_lost += event->lost.lost;
+		return tool->lost(tool, event, sample, machine);
+	case PERF_RECORD_LOST_SAMPLES:
+		if (tool->lost_samples == perf_event__process_lost_samples)
+			evlist->stats.total_lost_samples += event->lost_samples.lost;
+		return tool->lost_samples(tool, event, sample, machine);
+	case PERF_RECORD_READ:
+		dump_read(evsel, event);
+		return tool->read(tool, event, sample, evsel, machine);
+	case PERF_RECORD_THROTTLE:
+		return tool->throttle(tool, event, sample, machine);
+	case PERF_RECORD_UNTHROTTLE:
+		return tool->unthrottle(tool, event, sample, machine);
+	case PERF_RECORD_AUX:
+		if (tool->aux == perf_event__process_aux) {
+			if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+				evlist->stats.total_aux_lost += 1;
+			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
+				evlist->stats.total_aux_partial += 1;
+		}
+		return tool->aux(tool, event, sample, machine);
+	case PERF_RECORD_ITRACE_START:
+		return tool->itrace_start(tool, event, sample, machine);
+	case PERF_RECORD_SWITCH:
+	case PERF_RECORD_SWITCH_CPU_WIDE:
+		return tool->context_switch(tool, event, sample, machine);
+	default:
+		++evlist->stats.nr_unknown_events;
+		return -1;
+	}
+}
+
+static int perf_session__deliver_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_tool *tool,
+				       u64 file_offset)
+{
+	struct perf_sample sample;
+	int ret;
+
+	ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+	if (ret) {
+		pr_err("Can't parse sample, err = %d\n", ret);
+		return ret;
+	}
+
+	ret = auxtrace__process_event(session, event, &sample, tool);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		return 0;
+
+	return machines__deliver_event(&session->machines, session->evlist,
+				       event, &sample, tool, file_offset);
+}
+
+static s64 perf_session__process_user_event(struct perf_session *session,
+					    union perf_event *event,
+					    u64 file_offset)
+{
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
+	struct perf_sample sample = { .time = 0, };
+	int fd = perf_data__fd(session->data);
+	int err;
+
+	dump_event(session->evlist, event, file_offset, &sample);
+
+	/* These events are processed right away */
+	switch (event->header.type) {
+	case PERF_RECORD_HEADER_ATTR:
+		err = tool->attr(tool, event, &session->evlist);
+		if (err == 0) {
+			perf_session__set_id_hdr_size(session);
+			perf_session__set_comm_exec(session);
+		}
+		return err;
+	case PERF_RECORD_EVENT_UPDATE:
+		return tool->event_update(tool, event, &session->evlist);
+	case PERF_RECORD_HEADER_EVENT_TYPE:
+		/*
+		 * Depreceated, but we need to handle it for sake
+		 * of old data files create in pipe mode.
+		 */
+		return 0;
+	case PERF_RECORD_HEADER_TRACING_DATA:
+		/* setup for reading amidst mmap */
+		lseek(fd, file_offset, SEEK_SET);
+		return tool->tracing_data(tool, event, session);
+	case PERF_RECORD_HEADER_BUILD_ID:
+		return tool->build_id(tool, event, session);
+	case PERF_RECORD_FINISHED_ROUND:
+		return tool->finished_round(tool, event, oe);
+	case PERF_RECORD_ID_INDEX:
+		return tool->id_index(tool, event, session);
+	case PERF_RECORD_AUXTRACE_INFO:
+		return tool->auxtrace_info(tool, event, session);
+	case PERF_RECORD_AUXTRACE:
+		/* setup for reading amidst mmap */
+		lseek(fd, file_offset + event->header.size, SEEK_SET);
+		return tool->auxtrace(tool, event, session);
+	case PERF_RECORD_AUXTRACE_ERROR:
+		perf_session__auxtrace_error_inc(session, event);
+		return tool->auxtrace_error(tool, event, session);
+	case PERF_RECORD_THREAD_MAP:
+		return tool->thread_map(tool, event, session);
+	case PERF_RECORD_CPU_MAP:
+		return tool->cpu_map(tool, event, session);
+	case PERF_RECORD_STAT_CONFIG:
+		return tool->stat_config(tool, event, session);
+	case PERF_RECORD_STAT:
+		return tool->stat(tool, event, session);
+	case PERF_RECORD_STAT_ROUND:
+		return tool->stat_round(tool, event, session);
+	case PERF_RECORD_TIME_CONV:
+		session->time_conv = event->time_conv;
+		return tool->time_conv(tool, event, session);
+	case PERF_RECORD_HEADER_FEATURE:
+		return tool->feature(tool, event, session);
+	default:
+		return -EINVAL;
+	}
+}
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_sample *sample)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_tool *tool = session->tool;
+
+	events_stats__inc(&evlist->stats, event->header.type);
+
+	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+		return perf_session__process_user_event(session, event, 0);
+
+	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
+}
+
+static void event_swap(union perf_event *event, bool sample_id_all)
+{
+	perf_event__swap_op swap;
+
+	swap = perf_event__swap_ops[event->header.type];
+	if (swap)
+		swap(event, sample_id_all);
+}
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+			     void *buf, size_t buf_sz,
+			     union perf_event **event_ptr,
+			     struct perf_sample *sample)
+{
+	union perf_event *event;
+	size_t hdr_sz, rest;
+	int fd;
+
+	if (session->one_mmap && !session->header.needs_swap) {
+		event = file_offset - session->one_mmap_offset +
+			session->one_mmap_addr;
+		goto out_parse_sample;
+	}
+
+	if (perf_data__is_pipe(session->data))
+		return -1;
+
+	fd = perf_data__fd(session->data);
+	hdr_sz = sizeof(struct perf_event_header);
+
+	if (buf_sz < hdr_sz)
+		return -1;
+
+	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
+	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
+		return -1;
+
+	event = (union perf_event *)buf;
+
+	if (session->header.needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	if (event->header.size < hdr_sz || event->header.size > buf_sz)
+		return -1;
+
+	rest = event->header.size - hdr_sz;
+
+	if (readn(fd, buf, rest) != (ssize_t)rest)
+		return -1;
+
+	if (session->header.needs_swap)
+		event_swap(event, perf_evlist__sample_id_all(session->evlist));
+
+out_parse_sample:
+
+	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
+	    perf_evlist__parse_sample(session->evlist, event, sample))
+		return -1;
+
+	*event_ptr = event;
+
+	return 0;
+}
+
+static s64 perf_session__process_event(struct perf_session *session,
+				       union perf_event *event, u64 file_offset)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_tool *tool = session->tool;
+	int ret;
+
+	if (session->header.needs_swap)
+		event_swap(event, perf_evlist__sample_id_all(evlist));
+
+	if (event->header.type >= PERF_RECORD_HEADER_MAX)
+		return -EINVAL;
+
+	events_stats__inc(&evlist->stats, event->header.type);
+
+	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+		return perf_session__process_user_event(session, event, file_offset);
+
+	if (tool->ordered_events) {
+		u64 timestamp = -1ULL;
+
+		ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+		if (ret && ret != -1)
+			return ret;
+
+		ret = perf_session__queue_event(session, event, timestamp, file_offset);
+		if (ret != -ETIME)
+			return ret;
+	}
+
+	return perf_session__deliver_event(session, event, tool, file_offset);
+}
+
+void perf_event_header__bswap(struct perf_event_header *hdr)
+{
+	hdr->type = bswap_32(hdr->type);
+	hdr->misc = bswap_16(hdr->misc);
+	hdr->size = bswap_16(hdr->size);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
+{
+	return machine__findnew_thread(&session->machines.host, -1, pid);
+}
+
+int perf_session__register_idle_thread(struct perf_session *session)
+{
+	struct thread *thread;
+	int err = 0;
+
+	thread = machine__findnew_thread(&session->machines.host, 0, 0);
+	if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
+		pr_err("problem inserting idle task.\n");
+		err = -1;
+	}
+
+	if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
+		pr_err("problem inserting idle task.\n");
+		err = -1;
+	}
+
+	/* machine__findnew_thread() got the thread, so put it */
+	thread__put(thread);
+	return err;
+}
+
+static void
+perf_session__warn_order(const struct perf_session *session)
+{
+	const struct ordered_events *oe = &session->ordered_events;
+	struct perf_evsel *evsel;
+	bool should_warn = true;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->attr.write_backward)
+			should_warn = false;
+	}
+
+	if (!should_warn)
+		return;
+	if (oe->nr_unordered_events != 0)
+		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+}
+
+static void perf_session__warn_about_errors(const struct perf_session *session)
+{
+	const struct events_stats *stats = &session->evlist->stats;
+
+	if (session->tool->lost == perf_event__process_lost &&
+	    stats->nr_events[PERF_RECORD_LOST] != 0) {
+		ui__warning("Processed %d events and lost %d chunks!\n\n"
+			    "Check IO/CPU overload!\n\n",
+			    stats->nr_events[0],
+			    stats->nr_events[PERF_RECORD_LOST]);
+	}
+
+	if (session->tool->lost_samples == perf_event__process_lost_samples) {
+		double drop_rate;
+
+		drop_rate = (double)stats->total_lost_samples /
+			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+		if (drop_rate > 0.05) {
+			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
+				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+				    drop_rate * 100.0);
+		}
+	}
+
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_lost != 0) {
+		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
+			    stats->total_aux_lost,
+			    stats->nr_events[PERF_RECORD_AUX]);
+	}
+
+	if (session->tool->aux == perf_event__process_aux &&
+	    stats->total_aux_partial != 0) {
+		bool vmm_exclusive = false;
+
+		(void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
+		                       &vmm_exclusive);
+
+		ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
+		            "Are you running a KVM guest in the background?%s\n\n",
+			    stats->total_aux_partial,
+			    stats->nr_events[PERF_RECORD_AUX],
+			    vmm_exclusive ?
+			    "\nReloading kvm_intel module with vmm_exclusive=0\n"
+			    "will reduce the gaps to only guest's timeslices." :
+			    "");
+	}
+
+	if (stats->nr_unknown_events != 0) {
+		ui__warning("Found %u unknown events!\n\n"
+			    "Is this an older tool processing a perf.data "
+			    "file generated by a more recent tool?\n\n"
+			    "If that is not the case, consider "
+			    "reporting to linux-kernel@vger.kernel.org.\n\n",
+			    stats->nr_unknown_events);
+	}
+
+	if (stats->nr_unknown_id != 0) {
+		ui__warning("%u samples with id not present in the header\n",
+			    stats->nr_unknown_id);
+	}
+
+	if (stats->nr_invalid_chains != 0) {
+		ui__warning("Found invalid callchains!\n\n"
+			    "%u out of %u events were discarded for this reason.\n\n"
+			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+			    stats->nr_invalid_chains,
+			    stats->nr_events[PERF_RECORD_SAMPLE]);
+	}
+
+	if (stats->nr_unprocessable_samples != 0) {
+		ui__warning("%u unprocessable samples recorded.\n"
+			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
+			    stats->nr_unprocessable_samples);
+	}
+
+	perf_session__warn_order(session);
+
+	events_stats__auxtrace_error_warn(stats);
+
+	if (stats->nr_proc_map_timeout != 0) {
+		ui__warning("%d map information files for pre-existing threads were\n"
+			    "not processed, if there are samples for addresses they\n"
+			    "will not be resolved, you may find out which are these\n"
+			    "threads by running with -v and redirecting the output\n"
+			    "to a file.\n"
+			    "The time limit to process proc map is too short?\n"
+			    "Increase it by --proc-map-timeout\n",
+			    stats->nr_proc_map_timeout);
+	}
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+					    void *p __maybe_unused)
+{
+	return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+	return machines__for_each_thread(&session->machines,
+					 perf_session__flush_thread_stack,
+					 NULL);
+}
+
+volatile int session_done;
+
+static int __perf_session__process_pipe_events(struct perf_session *session)
+{
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
+	int fd = perf_data__fd(session->data);
+	union perf_event *event;
+	uint32_t size, cur_size = 0;
+	void *buf = NULL;
+	s64 skip = 0;
+	u64 head;
+	ssize_t err;
+	void *p;
+
+	perf_tool__fill_defaults(tool);
+
+	head = 0;
+	cur_size = sizeof(union perf_event);
+
+	buf = malloc(cur_size);
+	if (!buf)
+		return -errno;
+	ordered_events__set_copy_on_queue(oe, true);
+more:
+	event = buf;
+	err = readn(fd, event, sizeof(struct perf_event_header));
+	if (err <= 0) {
+		if (err == 0)
+			goto done;
+
+		pr_err("failed to read event header\n");
+		goto out_err;
+	}
+
+	if (session->header.needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	size = event->header.size;
+	if (size < sizeof(struct perf_event_header)) {
+		pr_err("bad event header size\n");
+		goto out_err;
+	}
+
+	if (size > cur_size) {
+		void *new = realloc(buf, size);
+		if (!new) {
+			pr_err("failed to allocate memory to read event\n");
+			goto out_err;
+		}
+		buf = new;
+		cur_size = size;
+		event = buf;
+	}
+	p = event;
+	p += sizeof(struct perf_event_header);
+
+	if (size - sizeof(struct perf_event_header)) {
+		err = readn(fd, p, size - sizeof(struct perf_event_header));
+		if (err <= 0) {
+			if (err == 0) {
+				pr_err("unexpected end of event stream\n");
+				goto done;
+			}
+
+			pr_err("failed to read event data\n");
+			goto out_err;
+		}
+	}
+
+	if ((skip = perf_session__process_event(session, event, head)) < 0) {
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+		       head, event->header.size, event->header.type);
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	head += size;
+
+	if (skip > 0)
+		head += skip;
+
+	if (!session_done())
+		goto more;
+done:
+	/* do the final flush for ordered samples */
+	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+	if (err)
+		goto out_err;
+	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
+out_err:
+	free(buf);
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
+	ordered_events__free(&session->ordered_events);
+	auxtrace__free_events(session);
+	return err;
+}
+
+static union perf_event *
+fetch_mmaped_event(struct perf_session *session,
+		   u64 head, size_t mmap_size, char *buf)
+{
+	union perf_event *event;
+
+	/*
+	 * Ensure we have enough space remaining to read
+	 * the size of the event in the headers.
+	 */
+	if (head + sizeof(event->header) > mmap_size)
+		return NULL;
+
+	event = (union perf_event *)(buf + head);
+
+	if (session->header.needs_swap)
+		perf_event_header__bswap(&event->header);
+
+	if (head + event->header.size > mmap_size) {
+		/* We're not fetching the event so swap back again */
+		if (session->header.needs_swap)
+			perf_event_header__bswap(&event->header);
+		return NULL;
+	}
+
+	return event;
+}
+
+/*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+#define MMAP_SIZE ULLONG_MAX
+#define NUM_MMAPS 1
+#else
+#define MMAP_SIZE (32 * 1024 * 1024ULL)
+#define NUM_MMAPS 128
+#endif
+
+static int __perf_session__process_events(struct perf_session *session,
+					  u64 data_offset, u64 data_size,
+					  u64 file_size)
+{
+	struct ordered_events *oe = &session->ordered_events;
+	struct perf_tool *tool = session->tool;
+	int fd = perf_data__fd(session->data);
+	u64 head, page_offset, file_offset, file_pos, size;
+	int err, mmap_prot, mmap_flags, map_idx = 0;
+	size_t	mmap_size;
+	char *buf, *mmaps[NUM_MMAPS];
+	union perf_event *event;
+	struct ui_progress prog;
+	s64 skip;
+
+	perf_tool__fill_defaults(tool);
+
+	page_offset = page_size * (data_offset / page_size);
+	file_offset = page_offset;
+	head = data_offset - page_offset;
+
+	if (data_size == 0)
+		goto out;
+
+	if (data_offset + data_size < file_size)
+		file_size = data_offset + data_size;
+
+	ui_progress__init_size(&prog, file_size, "Processing events...");
+
+	mmap_size = MMAP_SIZE;
+	if (mmap_size > file_size) {
+		mmap_size = file_size;
+		session->one_mmap = true;
+	}
+
+	memset(mmaps, 0, sizeof(mmaps));
+
+	mmap_prot  = PROT_READ;
+	mmap_flags = MAP_SHARED;
+
+	if (session->header.needs_swap) {
+		mmap_prot  |= PROT_WRITE;
+		mmap_flags = MAP_PRIVATE;
+	}
+remap:
+	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd,
+		   file_offset);
+	if (buf == MAP_FAILED) {
+		pr_err("failed to mmap file\n");
+		err = -errno;
+		goto out_err;
+	}
+	mmaps[map_idx] = buf;
+	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+	file_pos = file_offset + head;
+	if (session->one_mmap) {
+		session->one_mmap_addr = buf;
+		session->one_mmap_offset = file_offset;
+	}
+
+more:
+	event = fetch_mmaped_event(session, head, mmap_size, buf);
+	if (!event) {
+		if (mmaps[map_idx]) {
+			munmap(mmaps[map_idx], mmap_size);
+			mmaps[map_idx] = NULL;
+		}
+
+		page_offset = page_size * (head / page_size);
+		file_offset += page_offset;
+		head -= page_offset;
+		goto remap;
+	}
+
+	size = event->header.size;
+
+	if (size < sizeof(struct perf_event_header) ||
+	    (skip = perf_session__process_event(session, event, file_pos)) < 0) {
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+		       file_offset + head, event->header.size,
+		       event->header.type);
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	if (skip)
+		size += skip;
+
+	head += size;
+	file_pos += size;
+
+	ui_progress__update(&prog, size);
+
+	if (session_done())
+		goto out;
+
+	if (file_pos < file_size)
+		goto more;
+
+out:
+	/* do the final flush for ordered samples */
+	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+	if (err)
+		goto out_err;
+	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
+out_err:
+	ui_progress__finish();
+	if (!tool->no_warn)
+		perf_session__warn_about_errors(session);
+	/*
+	 * We may switching perf.data output, make ordered_events
+	 * reusable.
+	 */
+	ordered_events__reinit(&session->ordered_events);
+	auxtrace__free_events(session);
+	session->one_mmap = false;
+	return err;
+}
+
+int perf_session__process_events(struct perf_session *session)
+{
+	u64 size = perf_data__size(session->data);
+	int err;
+
+	if (perf_session__register_idle_thread(session) < 0)
+		return -ENOMEM;
+
+	if (!perf_data__is_pipe(session->data))
+		err = __perf_session__process_events(session,
+						     session->header.data_offset,
+						     session->header.data_size, size);
+	else
+		err = __perf_session__process_pipe_events(session);
+
+	return err;
+}
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->attr.type == PERF_TYPE_TRACEPOINT)
+			return true;
+	}
+
+	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+	return false;
+}
+
+int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
+				     const char *symbol_name, u64 addr)
+{
+	char *bracket;
+	int i;
+	struct ref_reloc_sym *ref;
+
+	ref = zalloc(sizeof(struct ref_reloc_sym));
+	if (ref == NULL)
+		return -ENOMEM;
+
+	ref->name = strdup(symbol_name);
+	if (ref->name == NULL) {
+		free(ref);
+		return -ENOMEM;
+	}
+
+	bracket = strchr(ref->name, ']');
+	if (bracket)
+		*bracket = '\0';
+
+	ref->addr = addr;
+
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		struct kmap *kmap = map__kmap(maps[i]);
+
+		if (!kmap)
+			continue;
+		kmap->ref_reloc_sym = ref;
+	}
+
+	return 0;
+}
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
+{
+	return machines__fprintf_dsos(&session->machines, fp);
+}
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+					  bool (skip)(struct dso *dso, int parm), int parm)
+{
+	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
+}
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
+{
+	size_t ret;
+	const char *msg = "";
+
+	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+	ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
+
+	ret += events_stats__fprintf(&session->evlist->stats, fp);
+	return ret;
+}
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
+{
+	/*
+	 * FIXME: Here we have to actually print all the machines in this
+	 * session, not just the host...
+	 */
+	return machine__fprintf(&session->machines.host, fp);
+}
+
+struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
+					      unsigned int type)
+{
+	struct perf_evsel *pos;
+
+	evlist__for_each_entry(session->evlist, pos) {
+		if (pos->attr.type == type)
+			return pos;
+	}
+	return NULL;
+}
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap)
+{
+	int i, err = -1;
+	struct cpu_map *map;
+
+	for (i = 0; i < PERF_TYPE_MAX; ++i) {
+		struct perf_evsel *evsel;
+
+		evsel = perf_session__find_first_evtype(session, i);
+		if (!evsel)
+			continue;
+
+		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+			pr_err("File does not contain CPU events. "
+			       "Remove -C option to proceed.\n");
+			return -1;
+		}
+	}
+
+	map = cpu_map__new(cpu_list);
+	if (map == NULL) {
+		pr_err("Invalid cpu_list\n");
+		return -1;
+	}
+
+	for (i = 0; i < map->nr; i++) {
+		int cpu = map->map[i];
+
+		if (cpu >= MAX_NR_CPUS) {
+			pr_err("Requested CPU %d too large. "
+			       "Consider raising MAX_NR_CPUS\n", cpu);
+			goto out_delete_map;
+		}
+
+		set_bit(cpu, cpu_bitmap);
+	}
+
+	err = 0;
+
+out_delete_map:
+	cpu_map__put(map);
+	return err;
+}
+
+void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
+				bool full)
+{
+	if (session == NULL || fp == NULL)
+		return;
+
+	fprintf(fp, "# ========\n");
+	perf_header__fprintf_info(session, fp, full);
+	fprintf(fp, "# ========\n#\n");
+}
+
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+					     const struct perf_evsel_str_handler *assocs,
+					     size_t nr_assocs)
+{
+	struct perf_evsel *evsel;
+	size_t i;
+	int err;
+
+	for (i = 0; i < nr_assocs; i++) {
+		/*
+		 * Adding a handler for an event not in the session,
+		 * just ignore it.
+		 */
+		evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
+		if (evsel == NULL)
+			continue;
+
+		err = -EEXIST;
+		if (evsel->handler != NULL)
+			goto out;
+		evsel->handler = assocs[i].handler;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+int perf_event__process_id_index(struct perf_tool *tool __maybe_unused,
+				 union perf_event *event,
+				 struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct id_index_event *ie = &event->id_index;
+	size_t i, nr, max_nr;
+
+	max_nr = (ie->header.size - sizeof(struct id_index_event)) /
+		 sizeof(struct id_index_entry);
+	nr = ie->nr;
+	if (nr > max_nr)
+		return -EINVAL;
+
+	if (dump_trace)
+		fprintf(stdout, " nr: %zu\n", nr);
+
+	for (i = 0; i < nr; i++) {
+		struct id_index_entry *e = &ie->entries[i];
+		struct perf_sample_id *sid;
+
+		if (dump_trace) {
+			fprintf(stdout,	" ... id: %"PRIu64, e->id);
+			fprintf(stdout,	"  idx: %"PRIu64, e->idx);
+			fprintf(stdout,	"  cpu: %"PRId64, e->cpu);
+			fprintf(stdout,	"  tid: %"PRId64"\n", e->tid);
+		}
+
+		sid = perf_evlist__id2sid(evlist, e->id);
+		if (!sid)
+			return -ENOENT;
+		sid->idx = e->idx;
+		sid->cpu = e->cpu;
+		sid->tid = e->tid;
+	}
+	return 0;
+}
+
+int perf_event__synthesize_id_index(struct perf_tool *tool,
+				    perf_event__handler_t process,
+				    struct perf_evlist *evlist,
+				    struct machine *machine)
+{
+	union perf_event *ev;
+	struct perf_evsel *evsel;
+	size_t nr = 0, i = 0, sz, max_nr, n;
+	int err;
+
+	pr_debug2("Synthesizing id index\n");
+
+	max_nr = (UINT16_MAX - sizeof(struct id_index_event)) /
+		 sizeof(struct id_index_entry);
+
+	evlist__for_each_entry(evlist, evsel)
+		nr += evsel->ids;
+
+	n = nr > max_nr ? max_nr : nr;
+	sz = sizeof(struct id_index_event) + n * sizeof(struct id_index_entry);
+	ev = zalloc(sz);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->id_index.header.type = PERF_RECORD_ID_INDEX;
+	ev->id_index.header.size = sz;
+	ev->id_index.nr = n;
+
+	evlist__for_each_entry(evlist, evsel) {
+		u32 j;
+
+		for (j = 0; j < evsel->ids; j++) {
+			struct id_index_entry *e;
+			struct perf_sample_id *sid;
+
+			if (i >= n) {
+				err = process(tool, ev, NULL, machine);
+				if (err)
+					goto out_err;
+				nr -= n;
+				i = 0;
+			}
+
+			e = &ev->id_index.entries[i++];
+
+			e->id = evsel->id[j];
+
+			sid = perf_evlist__id2sid(evlist, e->id);
+			if (!sid) {
+				free(ev);
+				return -ENOENT;
+			}
+
+			e->idx = sid->idx;
+			e->cpu = sid->cpu;
+			e->tid = sid->tid;
+		}
+	}
+
+	sz = sizeof(struct id_index_event) + nr * sizeof(struct id_index_entry);
+	ev->id_index.header.size = sz;
+	ev->id_index.nr = nr;
+
+	err = process(tool, ev, NULL, machine);
+out_err:
+	free(ev);
+
+	return err;
+}
diff --git a/util/session.h b/util/session.h
new file mode 100644
index 0000000..da40b4b
--- /dev/null
+++ b/util/session.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SESSION_H
+#define __PERF_SESSION_H
+
+#include "trace-event.h"
+#include "event.h"
+#include "header.h"
+#include "machine.h"
+#include "data.h"
+#include "ordered-events.h"
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/perf_event.h>
+
+struct ip_callchain;
+struct symbol;
+struct thread;
+
+struct auxtrace;
+struct itrace_synth_opts;
+
+struct perf_session {
+	struct perf_header	header;
+	struct machines		machines;
+	struct perf_evlist	*evlist;
+	struct auxtrace		*auxtrace;
+	struct itrace_synth_opts *itrace_synth_opts;
+	struct list_head	auxtrace_index;
+	struct trace_event	tevent;
+	struct time_conv_event	time_conv;
+	bool			repipe;
+	bool			one_mmap;
+	void			*one_mmap_addr;
+	u64			one_mmap_offset;
+	struct ordered_events	ordered_events;
+	struct perf_data	*data;
+	struct perf_tool	*tool;
+};
+
+struct perf_tool;
+
+struct perf_session *perf_session__new(struct perf_data *data,
+				       bool repipe, struct perf_tool *tool);
+void perf_session__delete(struct perf_session *session);
+
+void perf_event_header__bswap(struct perf_event_header *hdr);
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+			     void *buf, size_t buf_sz,
+			     union perf_event **event_ptr,
+			     struct perf_sample *sample);
+
+int perf_session__process_events(struct perf_session *session);
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+			      u64 timestamp, u64 file_offset);
+
+void perf_tool__fill_defaults(struct perf_tool *tool);
+
+int perf_session__resolve_callchain(struct perf_session *session,
+				    struct perf_evsel *evsel,
+				    struct thread *thread,
+				    struct ip_callchain *chain,
+				    struct symbol **parent);
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg);
+
+void perf_event__attr_swap(struct perf_event_attr *attr);
+
+int perf_session__create_kernel_maps(struct perf_session *session);
+
+void perf_session__set_id_hdr_size(struct perf_session *session);
+
+static inline
+struct machine *perf_session__find_machine(struct perf_session *session, pid_t pid)
+{
+	return machines__find(&session->machines, pid);
+}
+
+static inline
+struct machine *perf_session__findnew_machine(struct perf_session *session, pid_t pid)
+{
+	return machines__findnew(&session->machines, pid);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
+int perf_session__register_idle_thread(struct perf_session *session);
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+					  bool (fn)(struct dso *dso, int parm), int parm);
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
+
+struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
+					    unsigned int type);
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+			     const char *cpu_list, unsigned long *cpu_bitmap);
+
+void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+struct perf_evsel_str_handler;
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+					     const struct perf_evsel_str_handler *assocs,
+					     size_t nr_assocs);
+
+#define perf_session__set_tracepoints_handlers(session, array) \
+	__perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
+
+extern volatile int session_done;
+
+#define session_done()	READ_ONCE(session_done)
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_sample *sample);
+
+int perf_event__process_id_index(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session);
+
+int perf_event__synthesize_id_index(struct perf_tool *tool,
+				    perf_event__handler_t process,
+				    struct perf_evlist *evlist,
+				    struct machine *machine);
+
+#endif /* __PERF_SESSION_H */
diff --git a/util/setns.c b/util/setns.c
new file mode 100644
index 0000000..ce8fc29
--- /dev/null
+++ b/util/setns.c
@@ -0,0 +1,8 @@
+#include "util.h"
+#include <unistd.h>
+#include <sys/syscall.h>
+
+int setns(int fd, int nstype)
+{
+	return syscall(__NR_setns, fd, nstype);
+}
diff --git a/util/setup.py b/util/setup.py
new file mode 100644
index 0000000..001be4f
--- /dev/null
+++ b/util/setup.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+
+from os import getenv
+
+cc = getenv("CC")
+if cc == "clang":
+    from _sysconfigdata import build_time_vars
+    from re import sub
+    build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
+
+from distutils.core import setup, Extension
+
+from distutils.command.build_ext   import build_ext   as _build_ext
+from distutils.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+    def finalize_options(self):
+        _build_ext.finalize_options(self)
+        self.build_lib  = build_lib
+        self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+    def finalize_options(self):
+        _install_lib.finalize_options(self)
+        self.build_dir = build_lib
+
+
+cflags = getenv('CFLAGS', '').split()
+# switch off several checks (need to be at the end of cflags list)
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
+if cc != "clang":
+    cflags += ['-Wno-cast-function-type' ]
+
+src_perf  = getenv('srctree') + '/tools/perf'
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+libtraceevent = getenv('LIBTRACEEVENT')
+libapikfs = getenv('LIBAPI')
+
+ext_sources = [f.strip() for f in open('util/python-ext-sources')
+				if len(f.strip()) > 0 and f[0] != '#']
+
+# use full paths with source files
+ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
+
+perf = Extension('perf',
+		  sources = ext_sources,
+		  include_dirs = ['util/include'],
+		  extra_compile_args = cflags,
+		  extra_objects = [libtraceevent, libapikfs],
+                 )
+
+setup(name='perf',
+      version='0.1',
+      description='Interface with the Linux profiling infrastructure',
+      author='Arnaldo Carvalho de Melo',
+      author_email='acme@redhat.com',
+      license='GPLv2',
+      url='http://perf.wiki.kernel.org',
+      ext_modules=[perf],
+      cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
diff --git a/util/smt.c b/util/smt.c
new file mode 100644
index 0000000..453f6f6
--- /dev/null
+++ b/util/smt.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/bitops.h>
+#include "api/fs/fs.h"
+#include "smt.h"
+
+int smt_on(void)
+{
+	static bool cached;
+	static int cached_result;
+	int cpu;
+	int ncpu;
+
+	if (cached)
+		return cached_result;
+
+	ncpu = sysconf(_SC_NPROCESSORS_CONF);
+	for (cpu = 0; cpu < ncpu; cpu++) {
+		unsigned long long siblings;
+		char *str;
+		size_t strlen;
+		char fn[256];
+
+		snprintf(fn, sizeof fn,
+			"devices/system/cpu/cpu%d/topology/thread_siblings",
+			cpu);
+		if (sysfs__read_str(fn, &str, &strlen) < 0)
+			continue;
+		/* Entry is hex, but does not have 0x, so need custom parser */
+		siblings = strtoull(str, NULL, 16);
+		free(str);
+		if (hweight64(siblings) > 1) {
+			cached_result = 1;
+			cached = true;
+			break;
+		}
+	}
+	if (!cached) {
+		cached_result = 0;
+		cached = true;
+	}
+	return cached_result;
+}
diff --git a/util/smt.h b/util/smt.h
new file mode 100644
index 0000000..b8414b7
--- /dev/null
+++ b/util/smt.h
@@ -0,0 +1,6 @@
+#ifndef SMT_H
+#define SMT_H 1
+
+int smt_on(void);
+
+#endif
diff --git a/util/sort.c b/util/sort.c
new file mode 100644
index 0000000..26a68df
--- /dev/null
+++ b/util/sort.c
@@ -0,0 +1,3046 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <sys/mman.h>
+#include "sort.h"
+#include "hist.h"
+#include "comm.h"
+#include "symbol.h"
+#include "thread.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "strlist.h"
+#include <traceevent/event-parse.h>
+#include "mem-events.h"
+#include <linux/kernel.h>
+
+regex_t		parent_regex;
+const char	default_parent_pattern[] = "^sys_|^do_page_fault";
+const char	*parent_pattern = default_parent_pattern;
+const char	*default_sort_order = "comm,dso,symbol";
+const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
+const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
+const char	default_top_sort_order[] = "dso,symbol";
+const char	default_diff_sort_order[] = "dso,symbol";
+const char	default_tracepoint_sort_order[] = "trace";
+const char	*sort_order;
+const char	*field_order;
+regex_t		ignore_callees_regex;
+int		have_ignore_callees = 0;
+enum sort_mode	sort__mode = SORT_MODE__NORMAL;
+
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurances of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
+static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+	int n;
+	va_list ap;
+
+	va_start(ap, fmt);
+	n = vsnprintf(bf, size, fmt, ap);
+	if (symbol_conf.field_sep && n > 0) {
+		char *sep = bf;
+
+		while (1) {
+			sep = strchr(sep, *symbol_conf.field_sep);
+			if (sep == NULL)
+				break;
+			*sep = '.';
+		}
+	}
+	va_end(ap);
+
+	if (n >= (int)size)
+		return size - 1;
+	return n;
+}
+
+static int64_t cmp_null(const void *l, const void *r)
+{
+	if (!l && !r)
+		return 0;
+	else if (!l)
+		return -1;
+	else
+		return 1;
+}
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->thread->tid - left->thread->tid;
+}
+
+static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	const char *comm = thread__comm_str(he->thread);
+
+	width = max(7U, width) - 8;
+	return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
+			       width, width, comm ?: "");
+}
+
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct thread *th = arg;
+
+	if (type != HIST_FILTER__THREAD)
+		return -1;
+
+	return th && he->thread != th;
+}
+
+struct sort_entry sort_thread = {
+	.se_header	= "    Pid:Command",
+	.se_cmp		= sort__thread_cmp,
+	.se_snprintf	= hist_entry__thread_snprintf,
+	.se_filter	= hist_entry__thread_filter,
+	.se_width_idx	= HISTC_THREAD,
+};
+
+/* --sort comm */
+
+/*
+ * We can't use pointer comparison in functions below,
+ * because it gives different results based on pointer
+ * values, which could break some sorting assumptions.
+ */
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
+				     size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, comm__str(he->comm));
+}
+
+struct sort_entry sort_comm = {
+	.se_header	= "Command",
+	.se_cmp		= sort__comm_cmp,
+	.se_collapse	= sort__comm_collapse,
+	.se_sort	= sort__comm_sort,
+	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_filter	= hist_entry__thread_filter,
+	.se_width_idx	= HISTC_COMM,
+};
+
+/* --sort dso */
+
+static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
+{
+	struct dso *dso_l = map_l ? map_l->dso : NULL;
+	struct dso *dso_r = map_r ? map_r->dso : NULL;
+	const char *dso_name_l, *dso_name_r;
+
+	if (!dso_l || !dso_r)
+		return cmp_null(dso_r, dso_l);
+
+	if (verbose > 0) {
+		dso_name_l = dso_l->long_name;
+		dso_name_r = dso_r->long_name;
+	} else {
+		dso_name_l = dso_l->short_name;
+		dso_name_r = dso_r->short_name;
+	}
+
+	return strcmp(dso_name_l, dso_name_r);
+}
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__dso_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_snprintf(struct map *map, char *bf,
+				     size_t size, unsigned int width)
+{
+	if (map && map->dso) {
+		const char *dso_name = verbose > 0 ? map->dso->long_name :
+			map->dso->short_name;
+		return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "[unknown]");
+}
+
+static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
+}
+
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
+struct sort_entry sort_dso = {
+	.se_header	= "Shared Object",
+	.se_cmp		= sort__dso_cmp,
+	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_filter	= hist_entry__dso_filter,
+	.se_width_idx	= HISTC_DSO,
+};
+
+/* --sort symbol */
+
+static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
+{
+	return (int64_t)(right_ip - left_ip);
+}
+
+static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+	if (!sym_l || !sym_r)
+		return cmp_null(sym_l, sym_r);
+
+	if (sym_l == sym_r)
+		return 0;
+
+	if (sym_l->inlined || sym_r->inlined)
+		return strcmp(sym_l->name, sym_r->name);
+
+	if (sym_l->start != sym_r->start)
+		return (int64_t)(sym_r->start - sym_l->start);
+
+	return (int64_t)(sym_r->end - sym_l->end);
+}
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	int64_t ret;
+
+	if (!left->ms.sym && !right->ms.sym)
+		return _sort__addr_cmp(left->ip, right->ip);
+
+	/*
+	 * comparing symbol address alone is not enough since it's a
+	 * relative address within a dso.
+	 */
+	if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
+		ret = sort__dso_cmp(left, right);
+		if (ret != 0)
+			return ret;
+	}
+
+	return _sort__sym_cmp(left->ms.sym, right->ms.sym);
+}
+
+static int64_t
+sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->ms.sym || !right->ms.sym)
+		return cmp_null(left->ms.sym, right->ms.sym);
+
+	return strcmp(right->ms.sym->name, left->ms.sym->name);
+}
+
+static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
+				     u64 ip, char level, char *bf, size_t size,
+				     unsigned int width)
+{
+	size_t ret = 0;
+
+	if (verbose > 0) {
+		char o = map ? dso__symtab_origin(map->dso) : '!';
+		ret += repsep_snprintf(bf, size, "%-#*llx %c ",
+				       BITS_PER_LONG / 4 + 2, ip, o);
+	}
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+	if (sym && map) {
+		if (map->type == MAP__VARIABLE) {
+			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+					ip - map->unmap_ip(map, sym->start));
+		} else {
+			ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+					       width - ret,
+					       sym->name);
+			if (sym->inlined)
+				ret += repsep_snprintf(bf + ret, size - ret,
+						       " (inlined)");
+		}
+	} else {
+		size_t len = BITS_PER_LONG / 4;
+		ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+				       len, ip);
+	}
+
+	return ret;
+}
+
+static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return _hist_entry__sym_snprintf(he->ms.map, he->ms.sym, he->ip,
+					 he->level, bf, size, width);
+}
+
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
+struct sort_entry sort_sym = {
+	.se_header	= "Symbol",
+	.se_cmp		= sort__sym_cmp,
+	.se_sort	= sort__sym_sort,
+	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_filter	= hist_entry__sym_filter,
+	.se_width_idx	= HISTC_SYMBOL,
+};
+
+/* --sort srcline */
+
+char *hist_entry__get_srcline(struct hist_entry *he)
+{
+	struct map *map = he->ms.map;
+
+	if (!map)
+		return SRCLINE_UNKNOWN;
+
+	return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
+			   he->ms.sym, true, true, he->ip);
+}
+
+static int64_t
+sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->srcline)
+		left->srcline = hist_entry__get_srcline(left);
+	if (!right->srcline)
+		right->srcline = hist_entry__get_srcline(right);
+
+	return strcmp(right->srcline, left->srcline);
+}
+
+static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	if (!he->srcline)
+		he->srcline = hist_entry__get_srcline(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
+}
+
+struct sort_entry sort_srcline = {
+	.se_header	= "Source:Line",
+	.se_cmp		= sort__srcline_cmp,
+	.se_snprintf	= hist_entry__srcline_snprintf,
+	.se_width_idx	= HISTC_SRCLINE,
+};
+
+/* --sort srcline_from */
+
+static int64_t
+sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info->srcline_from) {
+		struct map *map = left->branch_info->from.map;
+		if (!map)
+			left->branch_info->srcline_from = SRCLINE_UNKNOWN;
+		else
+			left->branch_info->srcline_from = get_srcline(map->dso,
+					   map__rip_2objdump(map,
+							     left->branch_info->from.al_addr),
+							 left->branch_info->from.sym,
+							 true, true,
+							 left->branch_info->from.al_addr);
+	}
+	if (!right->branch_info->srcline_from) {
+		struct map *map = right->branch_info->from.map;
+		if (!map)
+			right->branch_info->srcline_from = SRCLINE_UNKNOWN;
+		else
+			right->branch_info->srcline_from = get_srcline(map->dso,
+					     map__rip_2objdump(map,
+							       right->branch_info->from.al_addr),
+						     right->branch_info->from.sym,
+						     true, true,
+						     right->branch_info->from.al_addr);
+	}
+	return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
+}
+
+static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_from);
+}
+
+struct sort_entry sort_srcline_from = {
+	.se_header	= "From Source:Line",
+	.se_cmp		= sort__srcline_from_cmp,
+	.se_snprintf	= hist_entry__srcline_from_snprintf,
+	.se_width_idx	= HISTC_SRCLINE_FROM,
+};
+
+/* --sort srcline_to */
+
+static int64_t
+sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info->srcline_to) {
+		struct map *map = left->branch_info->to.map;
+		if (!map)
+			left->branch_info->srcline_to = SRCLINE_UNKNOWN;
+		else
+			left->branch_info->srcline_to = get_srcline(map->dso,
+					   map__rip_2objdump(map,
+							     left->branch_info->to.al_addr),
+							 left->branch_info->from.sym,
+							 true, true,
+							 left->branch_info->to.al_addr);
+	}
+	if (!right->branch_info->srcline_to) {
+		struct map *map = right->branch_info->to.map;
+		if (!map)
+			right->branch_info->srcline_to = SRCLINE_UNKNOWN;
+		else
+			right->branch_info->srcline_to = get_srcline(map->dso,
+					     map__rip_2objdump(map,
+							       right->branch_info->to.al_addr),
+						     right->branch_info->to.sym,
+						     true, true,
+						     right->branch_info->to.al_addr);
+	}
+	return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
+}
+
+static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_to);
+}
+
+struct sort_entry sort_srcline_to = {
+	.se_header	= "To Source:Line",
+	.se_cmp		= sort__srcline_to_cmp,
+	.se_snprintf	= hist_entry__srcline_to_snprintf,
+	.se_width_idx	= HISTC_SRCLINE_TO,
+};
+
+/* --sort srcfile */
+
+static char no_srcfile[1];
+
+static char *hist_entry__get_srcfile(struct hist_entry *e)
+{
+	char *sf, *p;
+	struct map *map = e->ms.map;
+
+	if (!map)
+		return no_srcfile;
+
+	sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
+			 e->ms.sym, false, true, true, e->ip);
+	if (!strcmp(sf, SRCLINE_UNKNOWN))
+		return no_srcfile;
+	p = strchr(sf, ':');
+	if (p && *sf) {
+		*p = 0;
+		return sf;
+	}
+	free(sf);
+	return no_srcfile;
+}
+
+static int64_t
+sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->srcfile)
+		left->srcfile = hist_entry__get_srcfile(left);
+	if (!right->srcfile)
+		right->srcfile = hist_entry__get_srcfile(right);
+
+	return strcmp(right->srcfile, left->srcfile);
+}
+
+static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
+					size_t size, unsigned int width)
+{
+	if (!he->srcfile)
+		he->srcfile = hist_entry__get_srcfile(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
+}
+
+struct sort_entry sort_srcfile = {
+	.se_header	= "Source File",
+	.se_cmp		= sort__srcfile_cmp,
+	.se_snprintf	= hist_entry__srcfile_snprintf,
+	.se_width_idx	= HISTC_SRCFILE,
+};
+
+/* --sort parent */
+
+static int64_t
+sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct symbol *sym_l = left->parent;
+	struct symbol *sym_r = right->parent;
+
+	if (!sym_l || !sym_r)
+		return cmp_null(sym_l, sym_r);
+
+	return strcmp(sym_r->name, sym_l->name);
+}
+
+static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*.*s", width, width,
+			      he->parent ? he->parent->name : "[other]");
+}
+
+struct sort_entry sort_parent = {
+	.se_header	= "Parent symbol",
+	.se_cmp		= sort__parent_cmp,
+	.se_snprintf	= hist_entry__parent_snprintf,
+	.se_width_idx	= HISTC_PARENT,
+};
+
+/* --sort cpu */
+
+static int64_t
+sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->cpu - left->cpu;
+}
+
+static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*.*d", width, width, he->cpu);
+}
+
+struct sort_entry sort_cpu = {
+	.se_header      = "CPU",
+	.se_cmp	        = sort__cpu_cmp,
+	.se_snprintf    = hist_entry__cpu_snprintf,
+	.se_width_idx	= HISTC_CPU,
+};
+
+/* --sort cgroup_id */
+
+static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
+{
+	return (int64_t)(right_dev - left_dev);
+}
+
+static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
+{
+	return (int64_t)(right_ino - left_ino);
+}
+
+static int64_t
+sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	int64_t ret;
+
+	ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
+	if (ret != 0)
+		return ret;
+
+	return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
+				       left->cgroup_id.ino);
+}
+
+static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
+					  char *bf, size_t size,
+					  unsigned int width __maybe_unused)
+{
+	return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
+			       he->cgroup_id.ino);
+}
+
+struct sort_entry sort_cgroup_id = {
+	.se_header      = "cgroup id (dev/inode)",
+	.se_cmp	        = sort__cgroup_id_cmp,
+	.se_snprintf    = hist_entry__cgroup_id_snprintf,
+	.se_width_idx	= HISTC_CGROUP_ID,
+};
+
+/* --sort socket */
+
+static int64_t
+sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->socket - left->socket;
+}
+
+static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
+}
+
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+	int sk = *(const int *)arg;
+
+	if (type != HIST_FILTER__SOCKET)
+		return -1;
+
+	return sk >= 0 && he->socket != sk;
+}
+
+struct sort_entry sort_socket = {
+	.se_header      = "Socket",
+	.se_cmp	        = sort__socket_cmp,
+	.se_snprintf    = hist_entry__socket_snprintf,
+	.se_filter      = hist_entry__socket_filter,
+	.se_width_idx	= HISTC_SOCKET,
+};
+
+/* --sort trace */
+
+static char *get_trace_output(struct hist_entry *he)
+{
+	struct trace_seq seq;
+	struct perf_evsel *evsel;
+	struct pevent_record rec = {
+		.data = he->raw_data,
+		.size = he->raw_size,
+	};
+
+	evsel = hists_to_evsel(he->hists);
+
+	trace_seq_init(&seq);
+	if (symbol_conf.raw_trace) {
+		pevent_print_fields(&seq, he->raw_data, he->raw_size,
+				    evsel->tp_format);
+	} else {
+		pevent_event_info(&seq, evsel->tp_format, &rec);
+	}
+	/*
+	 * Trim the buffer, it starts at 4KB and we're not going to
+	 * add anything more to this buffer.
+	 */
+	return realloc(seq.buffer, seq.len + 1);
+}
+
+static int64_t
+sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct perf_evsel *evsel;
+
+	evsel = hists_to_evsel(left->hists);
+	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+		return 0;
+
+	if (left->trace_output == NULL)
+		left->trace_output = get_trace_output(left);
+	if (right->trace_output == NULL)
+		right->trace_output = get_trace_output(right);
+
+	return strcmp(right->trace_output, left->trace_output);
+}
+
+static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	struct perf_evsel *evsel;
+
+	evsel = hists_to_evsel(he->hists);
+	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+		return scnprintf(bf, size, "%-.*s", width, "N/A");
+
+	if (he->trace_output == NULL)
+		he->trace_output = get_trace_output(he);
+	return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
+}
+
+struct sort_entry sort_trace = {
+	.se_header      = "Trace output",
+	.se_cmp	        = sort__trace_cmp,
+	.se_snprintf    = hist_entry__trace_snprintf,
+	.se_width_idx	= HISTC_TRACE,
+};
+
+/* sort keys for branch stacks */
+
+static int64_t
+sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return _sort__dso_cmp(left->branch_info->from.map,
+			      right->branch_info->from.map);
+}
+
+static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	if (he->branch_info)
+		return _hist_entry__dso_snprintf(he->branch_info->from.map,
+						 bf, size, width);
+	else
+		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->from.map ||
+		       he->branch_info->from.map->dso != dso);
+}
+
+static int64_t
+sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return _sort__dso_cmp(left->branch_info->to.map,
+			      right->branch_info->to.map);
+}
+
+static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	if (he->branch_info)
+		return _hist_entry__dso_snprintf(he->branch_info->to.map,
+						 bf, size, width);
+	else
+		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+				     const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->to.map ||
+		       he->branch_info->to.map->dso != dso);
+}
+
+static int64_t
+sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct addr_map_symbol *from_l = &left->branch_info->from;
+	struct addr_map_symbol *from_r = &right->branch_info->from;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	from_l = &left->branch_info->from;
+	from_r = &right->branch_info->from;
+
+	if (!from_l->sym && !from_r->sym)
+		return _sort__addr_cmp(from_l->addr, from_r->addr);
+
+	return _sort__sym_cmp(from_l->sym, from_r->sym);
+}
+
+static int64_t
+sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct addr_map_symbol *to_l, *to_r;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	to_l = &left->branch_info->to;
+	to_r = &right->branch_info->to;
+
+	if (!to_l->sym && !to_r->sym)
+		return _sort__addr_cmp(to_l->addr, to_r->addr);
+
+	return _sort__sym_cmp(to_l->sym, to_r->sym);
+}
+
+static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	if (he->branch_info) {
+		struct addr_map_symbol *from = &he->branch_info->from;
+
+		return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
+						 he->level, bf, size, width);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
+				       size_t size, unsigned int width)
+{
+	if (he->branch_info) {
+		struct addr_map_symbol *to = &he->branch_info->to;
+
+		return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
+						 he->level, bf, size, width);
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->from.sym &&
+			strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->to.sym &&
+		        strstr(he->branch_info->to.sym->name, sym));
+}
+
+struct sort_entry sort_dso_from = {
+	.se_header	= "Source Shared Object",
+	.se_cmp		= sort__dso_from_cmp,
+	.se_snprintf	= hist_entry__dso_from_snprintf,
+	.se_filter	= hist_entry__dso_from_filter,
+	.se_width_idx	= HISTC_DSO_FROM,
+};
+
+struct sort_entry sort_dso_to = {
+	.se_header	= "Target Shared Object",
+	.se_cmp		= sort__dso_to_cmp,
+	.se_snprintf	= hist_entry__dso_to_snprintf,
+	.se_filter	= hist_entry__dso_to_filter,
+	.se_width_idx	= HISTC_DSO_TO,
+};
+
+struct sort_entry sort_sym_from = {
+	.se_header	= "Source Symbol",
+	.se_cmp		= sort__sym_from_cmp,
+	.se_snprintf	= hist_entry__sym_from_snprintf,
+	.se_filter	= hist_entry__sym_from_filter,
+	.se_width_idx	= HISTC_SYMBOL_FROM,
+};
+
+struct sort_entry sort_sym_to = {
+	.se_header	= "Target Symbol",
+	.se_cmp		= sort__sym_to_cmp,
+	.se_snprintf	= hist_entry__sym_to_snprintf,
+	.se_filter	= hist_entry__sym_to_filter,
+	.se_width_idx	= HISTC_SYMBOL_TO,
+};
+
+static int64_t
+sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	unsigned char mp, p;
+
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	mp = left->branch_info->flags.mispred != right->branch_info->flags.mispred;
+	p  = left->branch_info->flags.predicted != right->branch_info->flags.predicted;
+	return mp || p;
+}
+
+static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width){
+	static const char *out = "N/A";
+
+	if (he->branch_info) {
+		if (he->branch_info->flags.predicted)
+			out = "N";
+		else if (he->branch_info->flags.mispred)
+			out = "Y";
+	}
+
+	return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
+}
+
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return left->branch_info->flags.cycles -
+		right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	if (!he->branch_info)
+		return scnprintf(bf, size, "%-.*s", width, "N/A");
+	if (he->branch_info->flags.cycles == 0)
+		return repsep_snprintf(bf, size, "%-*s", width, "-");
+	return repsep_snprintf(bf, size, "%-*hd", width,
+			       he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+	.se_header	= "Basic Block Cycles",
+	.se_cmp		= sort__cycles_cmp,
+	.se_snprintf	= hist_entry__cycles_snprintf,
+	.se_width_idx	= HISTC_CYCLES,
+};
+
+/* --sort daddr_sym */
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.addr;
+	if (right->mem_info)
+		r = right->mem_info->daddr.addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+
+	if (he->mem_info) {
+		addr = he->mem_info->daddr.addr;
+		map = he->mem_info->daddr.map;
+		sym = he->mem_info->daddr.sym;
+	}
+	return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
+					 width);
+}
+
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->iaddr.addr;
+	if (right->mem_info)
+		r = right->mem_info->iaddr.addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+
+	if (he->mem_info) {
+		addr = he->mem_info->iaddr.addr;
+		map  = he->mem_info->iaddr.map;
+		sym  = he->mem_info->iaddr.sym;
+	}
+	return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
+					 width);
+}
+
+static int64_t
+sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	struct map *map_l = NULL;
+	struct map *map_r = NULL;
+
+	if (left->mem_info)
+		map_l = left->mem_info->daddr.map;
+	if (right->mem_info)
+		map_r = right->mem_info->daddr.map;
+
+	return _sort__dso_cmp(map_l, map_r);
+}
+
+static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	struct map *map = NULL;
+
+	if (he->mem_info)
+		map = he->mem_info->daddr.map;
+
+	return _hist_entry__dso_snprintf(map, bf, size, width);
+}
+
+static int64_t
+sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_lock = PERF_MEM_LOCK_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_lock = PERF_MEM_LOCK_NA;
+
+	return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
+}
+
+static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[10];
+
+	perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+static int64_t
+sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
+
+	return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
+}
+
+static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[64];
+
+	perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_lvl = PERF_MEM_LVL_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_lvl = PERF_MEM_LVL_NA;
+
+	return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
+}
+
+static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[64];
+
+	perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	union perf_mem_data_src data_src_l;
+	union perf_mem_data_src data_src_r;
+
+	if (left->mem_info)
+		data_src_l = left->mem_info->data_src;
+	else
+		data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
+
+	if (right->mem_info)
+		data_src_r = right->mem_info->data_src;
+	else
+		data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
+
+	return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
+}
+
+static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	char out[64];
+
+	perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	u64 l, r;
+	struct map *l_map, *r_map;
+
+	if (!left->mem_info)  return -1;
+	if (!right->mem_info) return 1;
+
+	/* group event types together */
+	if (left->cpumode > right->cpumode) return -1;
+	if (left->cpumode < right->cpumode) return 1;
+
+	l_map = left->mem_info->daddr.map;
+	r_map = right->mem_info->daddr.map;
+
+	/* if both are NULL, jump to sort on al_addr instead */
+	if (!l_map && !r_map)
+		goto addr;
+
+	if (!l_map) return -1;
+	if (!r_map) return 1;
+
+	if (l_map->maj > r_map->maj) return -1;
+	if (l_map->maj < r_map->maj) return 1;
+
+	if (l_map->min > r_map->min) return -1;
+	if (l_map->min < r_map->min) return 1;
+
+	if (l_map->ino > r_map->ino) return -1;
+	if (l_map->ino < r_map->ino) return 1;
+
+	if (l_map->ino_generation > r_map->ino_generation) return -1;
+	if (l_map->ino_generation < r_map->ino_generation) return 1;
+
+	/*
+	 * Addresses with no major/minor numbers are assumed to be
+	 * anonymous in userspace.  Sort those on pid then address.
+	 *
+	 * The kernel and non-zero major/minor mapped areas are
+	 * assumed to be unity mapped.  Sort those on address.
+	 */
+
+	if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
+	    (!(l_map->flags & MAP_SHARED)) &&
+	    !l_map->maj && !l_map->min && !l_map->ino &&
+	    !l_map->ino_generation) {
+		/* userspace anonymous */
+
+		if (left->thread->pid_ > right->thread->pid_) return -1;
+		if (left->thread->pid_ < right->thread->pid_) return 1;
+	}
+
+addr:
+	/* al_addr does all the right addr - start + offset calculations */
+	l = cl_address(left->mem_info->daddr.al_addr);
+	r = cl_address(right->mem_info->daddr.al_addr);
+
+	if (l > r) return -1;
+	if (l < r) return 1;
+
+	return 0;
+}
+
+static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
+					  size_t size, unsigned int width)
+{
+
+	uint64_t addr = 0;
+	struct map *map = NULL;
+	struct symbol *sym = NULL;
+	char level = he->level;
+
+	if (he->mem_info) {
+		addr = cl_address(he->mem_info->daddr.al_addr);
+		map = he->mem_info->daddr.map;
+		sym = he->mem_info->daddr.sym;
+
+		/* print [s] for shared data mmaps */
+		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
+		     map && (map->type == MAP__VARIABLE) &&
+		    (map->flags & MAP_SHARED) &&
+		    (map->maj || map->min || map->ino ||
+		     map->ino_generation))
+			level = 's';
+		else if (!map)
+			level = 'X';
+	}
+	return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
+					 width);
+}
+
+struct sort_entry sort_mispredict = {
+	.se_header	= "Branch Mispredicted",
+	.se_cmp		= sort__mispredict_cmp,
+	.se_snprintf	= hist_entry__mispredict_snprintf,
+	.se_width_idx	= HISTC_MISPREDICT,
+};
+
+static u64 he_weight(struct hist_entry *he)
+{
+	return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
+}
+
+static int64_t
+sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return he_weight(left) - he_weight(right);
+}
+
+static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
+}
+
+struct sort_entry sort_local_weight = {
+	.se_header	= "Local Weight",
+	.se_cmp		= sort__local_weight_cmp,
+	.se_snprintf	= hist_entry__local_weight_snprintf,
+	.se_width_idx	= HISTC_LOCAL_WEIGHT,
+};
+
+static int64_t
+sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->stat.weight - right->stat.weight;
+}
+
+static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
+					      size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
+}
+
+struct sort_entry sort_global_weight = {
+	.se_header	= "Weight",
+	.se_cmp		= sort__global_weight_cmp,
+	.se_snprintf	= hist_entry__global_weight_snprintf,
+	.se_width_idx	= HISTC_GLOBAL_WEIGHT,
+};
+
+struct sort_entry sort_mem_daddr_sym = {
+	.se_header	= "Data Symbol",
+	.se_cmp		= sort__daddr_cmp,
+	.se_snprintf	= hist_entry__daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_DADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_iaddr_sym = {
+	.se_header	= "Code Symbol",
+	.se_cmp		= sort__iaddr_cmp,
+	.se_snprintf	= hist_entry__iaddr_snprintf,
+	.se_width_idx	= HISTC_MEM_IADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_daddr_dso = {
+	.se_header	= "Data Object",
+	.se_cmp		= sort__dso_daddr_cmp,
+	.se_snprintf	= hist_entry__dso_daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_DADDR_DSO,
+};
+
+struct sort_entry sort_mem_locked = {
+	.se_header	= "Locked",
+	.se_cmp		= sort__locked_cmp,
+	.se_snprintf	= hist_entry__locked_snprintf,
+	.se_width_idx	= HISTC_MEM_LOCKED,
+};
+
+struct sort_entry sort_mem_tlb = {
+	.se_header	= "TLB access",
+	.se_cmp		= sort__tlb_cmp,
+	.se_snprintf	= hist_entry__tlb_snprintf,
+	.se_width_idx	= HISTC_MEM_TLB,
+};
+
+struct sort_entry sort_mem_lvl = {
+	.se_header	= "Memory access",
+	.se_cmp		= sort__lvl_cmp,
+	.se_snprintf	= hist_entry__lvl_snprintf,
+	.se_width_idx	= HISTC_MEM_LVL,
+};
+
+struct sort_entry sort_mem_snoop = {
+	.se_header	= "Snoop",
+	.se_cmp		= sort__snoop_cmp,
+	.se_snprintf	= hist_entry__snoop_snprintf,
+	.se_width_idx	= HISTC_MEM_SNOOP,
+};
+
+struct sort_entry sort_mem_dcacheline = {
+	.se_header	= "Data Cacheline",
+	.se_cmp		= sort__dcacheline_cmp,
+	.se_snprintf	= hist_entry__dcacheline_snprintf,
+	.se_width_idx	= HISTC_MEM_DCACHELINE,
+};
+
+static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.phys_addr;
+	if (right->mem_info)
+		r = right->mem_info->daddr.phys_addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+					   size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	size_t ret = 0;
+	size_t len = BITS_PER_LONG / 4;
+
+	addr = he->mem_info->daddr.phys_addr;
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+	if (ret > width)
+		bf[width] = '\0';
+
+	return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+	.se_header	= "Data Physical Address",
+	.se_cmp		= sort__phys_daddr_cmp,
+	.se_snprintf	= hist_entry__phys_daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_PHYS_DADDR,
+};
+
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return left->branch_info->flags.abort !=
+		right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = "N/A";
+
+	if (he->branch_info) {
+		if (he->branch_info->flags.abort)
+			out = "A";
+		else
+			out = ".";
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+	.se_header	= "Transaction abort",
+	.se_cmp		= sort__abort_cmp,
+	.se_snprintf	= hist_entry__abort_snprintf,
+	.se_width_idx	= HISTC_ABORT,
+};
+
+static int64_t
+sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (!left->branch_info || !right->branch_info)
+		return cmp_null(left->branch_info, right->branch_info);
+
+	return left->branch_info->flags.in_tx !=
+		right->branch_info->flags.in_tx;
+}
+
+static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = "N/A";
+
+	if (he->branch_info) {
+		if (he->branch_info->flags.in_tx)
+			out = "T";
+		else
+			out = ".";
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_in_tx = {
+	.se_header	= "Branch in transaction",
+	.se_cmp		= sort__in_tx_cmp,
+	.se_snprintf	= hist_entry__in_tx_snprintf,
+	.se_width_idx	= HISTC_IN_TX,
+};
+
+static int64_t
+sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->transaction - right->transaction;
+}
+
+static inline char *add_str(char *p, const char *str)
+{
+	strcpy(p, str);
+	return p + strlen(str);
+}
+
+static struct txbit {
+	unsigned flag;
+	const char *name;
+	int skip_for_len;
+} txbits[] = {
+	{ PERF_TXN_ELISION,        "EL ",        0 },
+	{ PERF_TXN_TRANSACTION,    "TX ",        1 },
+	{ PERF_TXN_SYNC,           "SYNC ",      1 },
+	{ PERF_TXN_ASYNC,          "ASYNC ",     0 },
+	{ PERF_TXN_RETRY,          "RETRY ",     0 },
+	{ PERF_TXN_CONFLICT,       "CON ",       0 },
+	{ PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
+	{ PERF_TXN_CAPACITY_READ,  "CAP-READ ",  0 },
+	{ 0, NULL, 0 }
+};
+
+int hist_entry__transaction_len(void)
+{
+	int i;
+	int len = 0;
+
+	for (i = 0; txbits[i].name; i++) {
+		if (!txbits[i].skip_for_len)
+			len += strlen(txbits[i].name);
+	}
+	len += 4; /* :XX<space> */
+	return len;
+}
+
+static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	u64 t = he->transaction;
+	char buf[128];
+	char *p = buf;
+	int i;
+
+	buf[0] = 0;
+	for (i = 0; txbits[i].name; i++)
+		if (txbits[i].flag & t)
+			p = add_str(p, txbits[i].name);
+	if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
+		p = add_str(p, "NEITHER ");
+	if (t & PERF_TXN_ABORT_MASK) {
+		sprintf(p, ":%" PRIx64,
+			(t & PERF_TXN_ABORT_MASK) >>
+			PERF_TXN_ABORT_SHIFT);
+		p += strlen(p);
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, buf);
+}
+
+struct sort_entry sort_transaction = {
+	.se_header	= "Transaction                ",
+	.se_cmp		= sort__transaction_cmp,
+	.se_snprintf	= hist_entry__transaction_snprintf,
+	.se_width_idx	= HISTC_TRANSACTION,
+};
+
+/* --sort symbol_size */
+
+static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+	int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0;
+	int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0;
+
+	return size_l < size_r ? -1 :
+		size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__sym_size_cmp(right->ms.sym, left->ms.sym);
+}
+
+static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf,
+					  size_t bf_size, unsigned int width)
+{
+	if (sym)
+		return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym));
+
+	return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width);
+}
+
+struct sort_entry sort_sym_size = {
+	.se_header	= "Symbol size",
+	.se_cmp		= sort__sym_size_cmp,
+	.se_snprintf	= hist_entry__sym_size_snprintf,
+	.se_width_idx	= HISTC_SYM_SIZE,
+};
+
+/* --sort dso_size */
+
+static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r)
+{
+	int64_t size_l = map_l != NULL ? map__size(map_l) : 0;
+	int64_t size_r = map_r != NULL ? map__size(map_r) : 0;
+
+	return size_l < size_r ? -1 :
+		size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__dso_size_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
+					  size_t bf_size, unsigned int width)
+{
+	if (map && map->dso)
+		return repsep_snprintf(bf, bf_size, "%*d", width,
+				       map__size(map));
+
+	return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width);
+}
+
+struct sort_entry sort_dso_size = {
+	.se_header	= "DSO size",
+	.se_cmp		= sort__dso_size_cmp,
+	.se_snprintf	= hist_entry__dso_size_snprintf,
+	.se_width_idx	= HISTC_DSO_SIZE,
+};
+
+
+struct sort_dimension {
+	const char		*name;
+	struct sort_entry	*entry;
+	int			taken;
+};
+
+#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension common_sort_dimensions[] = {
+	DIM(SORT_PID, "pid", sort_thread),
+	DIM(SORT_COMM, "comm", sort_comm),
+	DIM(SORT_DSO, "dso", sort_dso),
+	DIM(SORT_SYM, "symbol", sort_sym),
+	DIM(SORT_PARENT, "parent", sort_parent),
+	DIM(SORT_CPU, "cpu", sort_cpu),
+	DIM(SORT_SOCKET, "socket", sort_socket),
+	DIM(SORT_SRCLINE, "srcline", sort_srcline),
+	DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
+	DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
+	DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
+	DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+	DIM(SORT_TRACE, "trace", sort_trace),
+	DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+	DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+	DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension bstack_sort_dimensions[] = {
+	DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
+	DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
+	DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
+	DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
+	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_IN_TX, "in_tx", sort_in_tx),
+	DIM(SORT_ABORT, "abort", sort_abort),
+	DIM(SORT_CYCLES, "cycles", sort_cycles),
+	DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
+	DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension memory_sort_dimensions[] = {
+	DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
+	DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym),
+	DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
+	DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
+	DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
+	DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
+	DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
+	DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+	DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
+};
+
+#undef DIM
+
+struct hpp_dimension {
+	const char		*name;
+	struct perf_hpp_fmt	*fmt;
+	int			taken;
+};
+
+#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
+
+static struct hpp_dimension hpp_sort_dimensions[] = {
+	DIM(PERF_HPP__OVERHEAD, "overhead"),
+	DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
+	DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
+	DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
+	DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+	DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+	DIM(PERF_HPP__SAMPLES, "sample"),
+	DIM(PERF_HPP__PERIOD, "period"),
+};
+
+#undef DIM
+
+struct hpp_sort_entry {
+	struct perf_hpp_fmt hpp;
+	struct sort_entry *se;
+};
+
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+	struct hpp_sort_entry *hse;
+
+	if (!perf_hpp__is_sort_entry(fmt))
+		return;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	hists__new_col_len(hists, hse->se->se_width_idx, strlen(fmt->name));
+}
+
+static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			      struct hists *hists, int line __maybe_unused,
+			      int *span __maybe_unused)
+{
+	struct hpp_sort_entry *hse;
+	size_t len = fmt->user_len;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+	if (!len)
+		len = hists__col_len(hists, hse->se->se_width_idx);
+
+	return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
+}
+
+static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
+			     struct perf_hpp *hpp __maybe_unused,
+			     struct hists *hists)
+{
+	struct hpp_sort_entry *hse;
+	size_t len = fmt->user_len;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+	if (!len)
+		len = hists__col_len(hists, hse->se->se_width_idx);
+
+	return len;
+}
+
+static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct hpp_sort_entry *hse;
+	size_t len = fmt->user_len;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+	if (!len)
+		len = hists__col_len(he->hists, hse->se->se_width_idx);
+
+	return hse->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
+			       struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	return hse->se->se_cmp(a, b);
+}
+
+static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
+				    struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_sort_entry *hse;
+	int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp;
+	return collapse_fn(a, b);
+}
+
+static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
+				struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_sort_entry *hse;
+	int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *);
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	sort_fn = hse->se->se_sort ?: hse->se->se_cmp;
+	return sort_fn(a, b);
+}
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+	return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key)					\
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
+{								\
+	struct hpp_sort_entry *hse;				\
+								\
+	if (!perf_hpp__is_sort_entry(fmt))			\
+		return false;					\
+								\
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);	\
+	return hse->se == &sort_ ## key ;			\
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_sort_entry *hse_a;
+	struct hpp_sort_entry *hse_b;
+
+	if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+		return false;
+
+	hse_a = container_of(a, struct hpp_sort_entry, hpp);
+	hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+	return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	free(hse);
+}
+
+static struct hpp_sort_entry *
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = malloc(sizeof(*hse));
+	if (hse == NULL) {
+		pr_err("Memory allocation failed\n");
+		return NULL;
+	}
+
+	hse->se = sd->entry;
+	hse->hpp.name = sd->entry->se_header;
+	hse->hpp.header = __sort__hpp_header;
+	hse->hpp.width = __sort__hpp_width;
+	hse->hpp.entry = __sort__hpp_entry;
+	hse->hpp.color = NULL;
+
+	hse->hpp.cmp = __sort__hpp_cmp;
+	hse->hpp.collapse = __sort__hpp_collapse;
+	hse->hpp.sort = __sort__hpp_sort;
+	hse->hpp.equal = __sort__hpp_equal;
+	hse->hpp.free = hse_free;
+
+	INIT_LIST_HEAD(&hse->hpp.list);
+	INIT_LIST_HEAD(&hse->hpp.sort_list);
+	hse->hpp.elide = false;
+	hse->hpp.len = 0;
+	hse->hpp.user_len = 0;
+	hse->hpp.level = level;
+
+	return hse;
+}
+
+static void hpp_free(struct perf_hpp_fmt *fmt)
+{
+	free(fmt);
+}
+
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+						       int level)
+{
+	struct perf_hpp_fmt *fmt;
+
+	fmt = memdup(hd->fmt, sizeof(*fmt));
+	if (fmt) {
+		INIT_LIST_HEAD(&fmt->list);
+		INIT_LIST_HEAD(&fmt->sort_list);
+		fmt->free = hpp_free;
+		fmt->level = level;
+	}
+
+	return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+	int ret = -1;
+	int r;
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_filter == NULL)
+			continue;
+
+		/*
+		 * hist entry is filtered if any of sort key in the hpp list
+		 * is applied.  But it should skip non-matched filter types.
+		 */
+		r = hse->se->se_filter(he, type, arg);
+		if (r >= 0) {
+			if (ret < 0)
+				ret = 0;
+			ret |= r;
+		}
+	}
+
+	return ret;
+}
+
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+					  struct perf_hpp_list *list,
+					  int level)
+{
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
+
+	if (hse == NULL)
+		return -1;
+
+	perf_hpp_list__register_sort_field(list, &hse->hpp);
+	return 0;
+}
+
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+					    struct perf_hpp_list *list)
+{
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
+
+	if (hse == NULL)
+		return -1;
+
+	perf_hpp_list__column_register(list, &hse->hpp);
+	return 0;
+}
+
+struct hpp_dynamic_entry {
+	struct perf_hpp_fmt hpp;
+	struct perf_evsel *evsel;
+	struct format_field *field;
+	unsigned dynamic_len;
+	bool raw_trace;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+	if (!hde->hpp.len) {
+		int len = hde->dynamic_len;
+		int namelen = strlen(hde->field->name);
+		int fieldlen = hde->field->size;
+
+		if (namelen > len)
+			len = namelen;
+
+		if (!(hde->field->flags & FIELD_IS_STRING)) {
+			/* length for print hex numbers */
+			fieldlen = hde->field->size * 2 + 2;
+		}
+		if (fieldlen > len)
+			len = fieldlen;
+
+		hde->hpp.len = len;
+	}
+	return hde->hpp.len;
+}
+
+static void update_dynamic_len(struct hpp_dynamic_entry *hde,
+			       struct hist_entry *he)
+{
+	char *str, *pos;
+	struct format_field *field = hde->field;
+	size_t namelen;
+	bool last = false;
+
+	if (hde->raw_trace)
+		return;
+
+	/* parse pretty print result and update max length */
+	if (!he->trace_output)
+		he->trace_output = get_trace_output(he);
+
+	namelen = strlen(field->name);
+	str = he->trace_output;
+
+	while (str) {
+		pos = strchr(str, ' ');
+		if (pos == NULL) {
+			last = true;
+			pos = str + strlen(str);
+		}
+
+		if (!strncmp(str, field->name, namelen)) {
+			size_t len;
+
+			str += namelen + 1;
+			len = pos - str;
+
+			if (len > hde->dynamic_len)
+				hde->dynamic_len = len;
+			break;
+		}
+
+		if (last)
+			str = NULL;
+		else
+			str = pos + 1;
+	}
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			      struct hists *hists __maybe_unused,
+			      int line __maybe_unused,
+			      int *span __maybe_unused)
+{
+	struct hpp_dynamic_entry *hde;
+	size_t len = fmt->user_len;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (!len)
+		len = hde_width(hde);
+
+	return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+			     struct perf_hpp *hpp __maybe_unused,
+			     struct hists *hists __maybe_unused)
+{
+	struct hpp_dynamic_entry *hde;
+	size_t len = fmt->user_len;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (!len)
+		len = hde_width(hde);
+
+	return len;
+}
+
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	return hists_to_evsel(hists) == hde->evsel;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct hpp_dynamic_entry *hde;
+	size_t len = fmt->user_len;
+	char *str, *pos;
+	struct format_field *field;
+	size_t namelen;
+	bool last = false;
+	int ret;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (!len)
+		len = hde_width(hde);
+
+	if (hde->raw_trace)
+		goto raw_field;
+
+	if (!he->trace_output)
+		he->trace_output = get_trace_output(he);
+
+	field = hde->field;
+	namelen = strlen(field->name);
+	str = he->trace_output;
+
+	while (str) {
+		pos = strchr(str, ' ');
+		if (pos == NULL) {
+			last = true;
+			pos = str + strlen(str);
+		}
+
+		if (!strncmp(str, field->name, namelen)) {
+			str += namelen + 1;
+			str = strndup(str, pos - str);
+
+			if (str == NULL)
+				return scnprintf(hpp->buf, hpp->size,
+						 "%*.*s", len, len, "ERROR");
+			break;
+		}
+
+		if (last)
+			str = NULL;
+		else
+			str = pos + 1;
+	}
+
+	if (str == NULL) {
+		struct trace_seq seq;
+raw_field:
+		trace_seq_init(&seq);
+		pevent_print_field(&seq, he->raw_data, hde->field);
+		str = seq.buffer;
+	}
+
+	ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
+	free(str);
+	return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+			       struct hist_entry *a, struct hist_entry *b)
+{
+	struct hpp_dynamic_entry *hde;
+	struct format_field *field;
+	unsigned offset, size;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+	if (b == NULL) {
+		update_dynamic_len(hde, a);
+		return 0;
+	}
+
+	field = hde->field;
+	if (field->flags & FIELD_IS_DYNAMIC) {
+		unsigned long long dyn;
+
+		pevent_read_number_field(field, a->raw_data, &dyn);
+		offset = dyn & 0xffff;
+		size = (dyn >> 16) & 0xffff;
+
+		/* record max width for output */
+		if (size > hde->dynamic_len)
+			hde->dynamic_len = size;
+	} else {
+		offset = field->offset;
+		size = field->size;
+	}
+
+	return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
+{
+	return fmt->cmp == __sort__hde_cmp;
+}
+
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_dynamic_entry *hde_a;
+	struct hpp_dynamic_entry *hde_b;
+
+	if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+		return false;
+
+	hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+	hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+	return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+	free(hde);
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+		      int level)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = malloc(sizeof(*hde));
+	if (hde == NULL) {
+		pr_debug("Memory allocation failed\n");
+		return NULL;
+	}
+
+	hde->evsel = evsel;
+	hde->field = field;
+	hde->dynamic_len = 0;
+
+	hde->hpp.name = field->name;
+	hde->hpp.header = __sort__hde_header;
+	hde->hpp.width  = __sort__hde_width;
+	hde->hpp.entry  = __sort__hde_entry;
+	hde->hpp.color  = NULL;
+
+	hde->hpp.cmp = __sort__hde_cmp;
+	hde->hpp.collapse = __sort__hde_cmp;
+	hde->hpp.sort = __sort__hde_cmp;
+	hde->hpp.equal = __sort__hde_equal;
+	hde->hpp.free = hde_free;
+
+	INIT_LIST_HEAD(&hde->hpp.list);
+	INIT_LIST_HEAD(&hde->hpp.sort_list);
+	hde->hpp.elide = false;
+	hde->hpp.len = 0;
+	hde->hpp.user_len = 0;
+	hde->hpp.level = level;
+
+	return hde;
+}
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_fmt *new_fmt = NULL;
+
+	if (perf_hpp__is_sort_entry(fmt)) {
+		struct hpp_sort_entry *hse, *new_hse;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		new_hse = memdup(hse, sizeof(*hse));
+		if (new_hse)
+			new_fmt = &new_hse->hpp;
+	} else if (perf_hpp__is_dynamic_entry(fmt)) {
+		struct hpp_dynamic_entry *hde, *new_hde;
+
+		hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+		new_hde = memdup(hde, sizeof(*hde));
+		if (new_hde)
+			new_fmt = &new_hde->hpp;
+	} else {
+		new_fmt = memdup(fmt, sizeof(*fmt));
+	}
+
+	INIT_LIST_HEAD(&new_fmt->list);
+	INIT_LIST_HEAD(&new_fmt->sort_list);
+
+	return new_fmt;
+}
+
+static int parse_field_name(char *str, char **event, char **field, char **opt)
+{
+	char *event_name, *field_name, *opt_name;
+
+	event_name = str;
+	field_name = strchr(str, '.');
+
+	if (field_name) {
+		*field_name++ = '\0';
+	} else {
+		event_name = NULL;
+		field_name = str;
+	}
+
+	opt_name = strchr(field_name, '/');
+	if (opt_name)
+		*opt_name++ = '\0';
+
+	*event = event_name;
+	*field = field_name;
+	*opt   = opt_name;
+
+	return 0;
+}
+
+/* find match evsel using a given event name.  The event name can be:
+ *   1. '%' + event index (e.g. '%1' for first event)
+ *   2. full event name (e.g. sched:sched_switch)
+ *   3. partial event name (should not contain ':')
+ */
+static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_name)
+{
+	struct perf_evsel *evsel = NULL;
+	struct perf_evsel *pos;
+	bool full_name;
+
+	/* case 1 */
+	if (event_name[0] == '%') {
+		int nr = strtol(event_name+1, NULL, 0);
+
+		if (nr > evlist->nr_entries)
+			return NULL;
+
+		evsel = perf_evlist__first(evlist);
+		while (--nr > 0)
+			evsel = perf_evsel__next(evsel);
+
+		return evsel;
+	}
+
+	full_name = !!strchr(event_name, ':');
+	evlist__for_each_entry(evlist, pos) {
+		/* case 2 */
+		if (full_name && !strcmp(pos->name, event_name))
+			return pos;
+		/* case 3 */
+		if (!full_name && strstr(pos->name, event_name)) {
+			if (evsel) {
+				pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
+					 event_name, evsel->name, pos->name);
+				return NULL;
+			}
+			evsel = pos;
+		}
+	}
+
+	return evsel;
+}
+
+static int __dynamic_dimension__add(struct perf_evsel *evsel,
+				    struct format_field *field,
+				    bool raw_trace, int level)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = __alloc_dynamic_entry(evsel, field, level);
+	if (hde == NULL)
+		return -ENOMEM;
+
+	hde->raw_trace = raw_trace;
+
+	perf_hpp__register_sort_field(&hde->hpp);
+	return 0;
+}
+
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
+{
+	int ret;
+	struct format_field *field;
+
+	field = evsel->tp_format->format.fields;
+	while (field) {
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+		if (ret < 0)
+			return ret;
+
+		field = field->next;
+	}
+	return 0;
+}
+
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
+				  int level)
+{
+	int ret;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		ret = add_evsel_fields(evsel, raw_trace, level);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int add_all_matching_fields(struct perf_evlist *evlist,
+				   char *field_name, bool raw_trace, int level)
+{
+	int ret = -ESRCH;
+	struct perf_evsel *evsel;
+	struct format_field *field;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
+		field = pevent_find_any_field(evsel->tp_format, field_name);
+		if (field == NULL)
+			continue;
+
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
+			     int level)
+{
+	char *str, *event_name, *field_name, *opt_name;
+	struct perf_evsel *evsel;
+	struct format_field *field;
+	bool raw_trace = symbol_conf.raw_trace;
+	int ret = 0;
+
+	if (evlist == NULL)
+		return -ENOENT;
+
+	str = strdup(tok);
+	if (str == NULL)
+		return -ENOMEM;
+
+	if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (opt_name) {
+		if (strcmp(opt_name, "raw")) {
+			pr_debug("unsupported field option %s\n", opt_name);
+			ret = -EINVAL;
+			goto out;
+		}
+		raw_trace = true;
+	}
+
+	if (!strcmp(field_name, "trace_fields")) {
+		ret = add_all_dynamic_fields(evlist, raw_trace, level);
+		goto out;
+	}
+
+	if (event_name == NULL) {
+		ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
+		goto out;
+	}
+
+	evsel = find_evsel(evlist, event_name);
+	if (evsel == NULL) {
+		pr_debug("Cannot find event: %s\n", event_name);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+		pr_debug("%s is not a tracepoint event\n", event_name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!strcmp(field_name, "*")) {
+		ret = add_evsel_fields(evsel, raw_trace, level);
+	} else {
+		field = pevent_find_any_field(evsel->tp_format, field_name);
+		if (field == NULL) {
+			pr_debug("Cannot find event field for %s.%s\n",
+				 event_name, field_name);
+			return -ENOENT;
+		}
+
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+	}
+
+out:
+	free(str);
+	return ret;
+}
+
+static int __sort_dimension__add(struct sort_dimension *sd,
+				 struct perf_hpp_list *list,
+				 int level)
+{
+	if (sd->taken)
+		return 0;
+
+	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
+		return -1;
+
+	if (sd->entry->se_collapse)
+		list->need_collapse = 1;
+
+	sd->taken = 1;
+
+	return 0;
+}
+
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+				struct perf_hpp_list *list,
+				int level)
+{
+	struct perf_hpp_fmt *fmt;
+
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, level);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__register_sort_field(list, fmt);
+	return 0;
+}
+
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+					struct sort_dimension *sd)
+{
+	if (sd->taken)
+		return 0;
+
+	if (__sort_dimension__add_hpp_output(sd, list) < 0)
+		return -1;
+
+	sd->taken = 1;
+	return 0;
+}
+
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+				       struct hpp_dimension *hd)
+{
+	struct perf_hpp_fmt *fmt;
+
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, 0);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__column_register(list, fmt);
+	return 0;
+}
+
+int hpp_dimension__add_output(unsigned col)
+{
+	BUG_ON(col >= PERF_HPP__MAX_INDEX);
+	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+}
+
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct perf_evlist *evlist,
+			int level)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+		struct sort_dimension *sd = &common_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sd->entry == &sort_parent) {
+			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+			if (ret) {
+				char err[BUFSIZ];
+
+				regerror(ret, &parent_regex, err, sizeof(err));
+				pr_err("Invalid regex: %s\n%s", parent_pattern, err);
+				return -EINVAL;
+			}
+			list->parent = 1;
+		} else if (sd->entry == &sort_sym) {
+			list->sym = 1;
+			/*
+			 * perf diff displays the performance difference amongst
+			 * two or more perf.data files. Those files could come
+			 * from different binaries. So we should not compare
+			 * their ips, but the name of symbol.
+			 */
+			if (sort__mode == SORT_MODE__DIFF)
+				sd->entry->se_collapse = sort__sym_sort;
+
+		} else if (sd->entry == &sort_dso) {
+			list->dso = 1;
+		} else if (sd->entry == &sort_socket) {
+			list->socket = 1;
+		} else if (sd->entry == &sort_thread) {
+			list->thread = 1;
+		} else if (sd->entry == &sort_comm) {
+			list->comm = 1;
+		}
+
+		return __sort_dimension__add(sd, list, level);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+		struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+		if (strncasecmp(tok, hd->name, strlen(tok)))
+			continue;
+
+		return __hpp_dimension__add(hd, list, level);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+		struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__BRANCH)
+			return -EINVAL;
+
+		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
+			list->sym = 1;
+
+		__sort_dimension__add(sd, list, level);
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+		struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		if (sort__mode != SORT_MODE__MEMORY)
+			return -EINVAL;
+
+		if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0)
+			return -EINVAL;
+
+		if (sd->entry == &sort_mem_daddr_sym)
+			list->sym = 1;
+
+		__sort_dimension__add(sd, list, level);
+		return 0;
+	}
+
+	if (!add_dynamic_entry(evlist, tok, level))
+		return 0;
+
+	return -ESRCH;
+}
+
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+			   struct perf_evlist *evlist)
+{
+	char *tmp, *tok;
+	int ret = 0;
+	int level = 0;
+	int next_level = 1;
+	bool in_group = false;
+
+	do {
+		tok = str;
+		tmp = strpbrk(str, "{}, ");
+		if (tmp) {
+			if (in_group)
+				next_level = level;
+			else
+				next_level = level + 1;
+
+			if (*tmp == '{')
+				in_group = true;
+			else if (*tmp == '}')
+				in_group = false;
+
+			*tmp = '\0';
+			str = tmp + 1;
+		}
+
+		if (*tok) {
+			ret = sort_dimension__add(list, tok, evlist, level);
+			if (ret == -EINVAL) {
+				if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
+					pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+				else
+					pr_err("Invalid --sort key: `%s'", tok);
+				break;
+			} else if (ret == -ESRCH) {
+				pr_err("Unknown --sort key: `%s'", tok);
+				break;
+			}
+		}
+
+		level = next_level;
+	} while (tmp);
+
+	return ret;
+}
+
+static const char *get_default_sort_order(struct perf_evlist *evlist)
+{
+	const char *default_sort_orders[] = {
+		default_sort_order,
+		default_branch_sort_order,
+		default_mem_sort_order,
+		default_top_sort_order,
+		default_diff_sort_order,
+		default_tracepoint_sort_order,
+	};
+	bool use_trace = true;
+	struct perf_evsel *evsel;
+
+	BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
+
+	if (evlist == NULL || perf_evlist__empty(evlist))
+		goto out_no_evlist;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+			use_trace = false;
+			break;
+		}
+	}
+
+	if (use_trace) {
+		sort__mode = SORT_MODE__TRACEPOINT;
+		if (symbol_conf.raw_trace)
+			return "trace_fields";
+	}
+out_no_evlist:
+	return default_sort_orders[sort__mode];
+}
+
+static int setup_sort_order(struct perf_evlist *evlist)
+{
+	char *new_sort_order;
+
+	/*
+	 * Append '+'-prefixed sort order to the default sort
+	 * order string.
+	 */
+	if (!sort_order || is_strict_order(sort_order))
+		return 0;
+
+	if (sort_order[1] == '\0') {
+		pr_err("Invalid --sort key: `+'");
+		return -EINVAL;
+	}
+
+	/*
+	 * We allocate new sort_order string, but we never free it,
+	 * because it's checked over the rest of the code.
+	 */
+	if (asprintf(&new_sort_order, "%s,%s",
+		     get_default_sort_order(evlist), sort_order + 1) < 0) {
+		pr_err("Not enough memory to set up --sort");
+		return -ENOMEM;
+	}
+
+	sort_order = new_sort_order;
+	return 0;
+}
+
+/*
+ * Adds 'pre,' prefix into 'str' is 'pre' is
+ * not already part of 'str'.
+ */
+static char *prefix_if_not_in(const char *pre, char *str)
+{
+	char *n;
+
+	if (!str || strstr(str, pre))
+		return str;
+
+	if (asprintf(&n, "%s,%s", pre, str) < 0)
+		return NULL;
+
+	free(str);
+	return n;
+}
+
+static char *setup_overhead(char *keys)
+{
+	if (sort__mode == SORT_MODE__DIFF)
+		return keys;
+
+	keys = prefix_if_not_in("overhead", keys);
+
+	if (symbol_conf.cumulate_callchain)
+		keys = prefix_if_not_in("overhead_children", keys);
+
+	return keys;
+}
+
+static int __setup_sorting(struct perf_evlist *evlist)
+{
+	char *str;
+	const char *sort_keys;
+	int ret = 0;
+
+	ret = setup_sort_order(evlist);
+	if (ret)
+		return ret;
+
+	sort_keys = sort_order;
+	if (sort_keys == NULL) {
+		if (is_strict_order(field_order)) {
+			/*
+			 * If user specified field order but no sort order,
+			 * we'll honor it and not add default sort orders.
+			 */
+			return 0;
+		}
+
+		sort_keys = get_default_sort_order(evlist);
+	}
+
+	str = strdup(sort_keys);
+	if (str == NULL) {
+		pr_err("Not enough memory to setup sort keys");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Prepend overhead fields for backward compatibility.
+	 */
+	if (!is_strict_order(field_order)) {
+		str = setup_overhead(str);
+		if (str == NULL) {
+			pr_err("Not enough memory to setup overhead keys");
+			return -ENOMEM;
+		}
+	}
+
+	ret = setup_sort_list(&perf_hpp_list, str, evlist);
+
+	free(str);
+	return ret;
+}
+
+void perf_hpp__set_elide(int idx, bool elide)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_width_idx == idx) {
+			fmt->elide = elide;
+			break;
+		}
+	}
+}
+
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
+{
+	if (list && strlist__nr_entries(list) == 1) {
+		if (fp != NULL)
+			fprintf(fp, "# %s: %s\n", list_name,
+				strlist__entry(list, 0)->s);
+		return true;
+	}
+	return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+	switch (idx) {
+	case HISTC_SYMBOL:
+		return __get_elide(symbol_conf.sym_list, "symbol", output);
+	case HISTC_DSO:
+		return __get_elide(symbol_conf.dso_list, "dso", output);
+	case HISTC_COMM:
+		return __get_elide(symbol_conf.comm_list, "comm", output);
+	default:
+		break;
+	}
+
+	if (sort__mode != SORT_MODE__BRANCH)
+		return false;
+
+	switch (idx) {
+	case HISTC_SYMBOL_FROM:
+		return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+	case HISTC_SYMBOL_TO:
+		return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+	case HISTC_DSO_FROM:
+		return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+	case HISTC_DSO_TO:
+		return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+	default:
+		break;
+	}
+
+	return false;
+}
+
+void sort__setup_elide(FILE *output)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		fmt->elide = get_elide(hse->se->se_width_idx, output);
+	}
+
+	/*
+	 * It makes no sense to elide all of sort entries.
+	 * Just revert them to show up again.
+	 */
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		if (!fmt->elide)
+			return;
+	}
+
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		fmt->elide = false;
+	}
+}
+
+int output_field_add(struct perf_hpp_list *list, char *tok)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+		struct sort_dimension *sd = &common_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		return __sort_dimension__add_output(list, sd);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+		struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+		if (strncasecmp(tok, hd->name, strlen(tok)))
+			continue;
+
+		return __hpp_dimension__add_output(list, hd);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+		struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		return __sort_dimension__add_output(list, sd);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+		struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+		if (strncasecmp(tok, sd->name, strlen(tok)))
+			continue;
+
+		return __sort_dimension__add_output(list, sd);
+	}
+
+	return -ESRCH;
+}
+
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+	char *tmp, *tok;
+	int ret = 0;
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		ret = output_field_add(list, tok);
+		if (ret == -EINVAL) {
+			ui__error("Invalid --fields key: `%s'", tok);
+			break;
+		} else if (ret == -ESRCH) {
+			ui__error("Unknown --fields key: `%s'", tok);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+void reset_dimensions(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++)
+		common_sort_dimensions[i].taken = 0;
+
+	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++)
+		hpp_sort_dimensions[i].taken = 0;
+
+	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++)
+		bstack_sort_dimensions[i].taken = 0;
+
+	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++)
+		memory_sort_dimensions[i].taken = 0;
+}
+
+bool is_strict_order(const char *order)
+{
+	return order && (*order != '+');
+}
+
+static int __setup_output_field(void)
+{
+	char *str, *strp;
+	int ret = -EINVAL;
+
+	if (field_order == NULL)
+		return 0;
+
+	strp = str = strdup(field_order);
+	if (str == NULL) {
+		pr_err("Not enough memory to setup output fields");
+		return -ENOMEM;
+	}
+
+	if (!is_strict_order(field_order))
+		strp++;
+
+	if (!strlen(strp)) {
+		pr_err("Invalid --fields key: `+'");
+		goto out;
+	}
+
+	ret = setup_output_list(&perf_hpp_list, strp);
+
+out:
+	free(str);
+	return ret;
+}
+
+int setup_sorting(struct perf_evlist *evlist)
+{
+	int err;
+
+	err = __setup_sorting(evlist);
+	if (err < 0)
+		return err;
+
+	if (parent_pattern != default_parent_pattern) {
+		err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
+		if (err < 0)
+			return err;
+	}
+
+	reset_dimensions();
+
+	/*
+	 * perf diff doesn't use default hpp output fields.
+	 */
+	if (sort__mode != SORT_MODE__DIFF)
+		perf_hpp__init();
+
+	err = __setup_output_field();
+	if (err < 0)
+		return err;
+
+	/* copy sort keys to output fields */
+	perf_hpp__setup_output_field(&perf_hpp_list);
+	/* and then copy output fields to sort keys */
+	perf_hpp__append_sort_keys(&perf_hpp_list);
+
+	/* setup hists-specific output fields */
+	if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+		return -1;
+
+	return 0;
+}
+
+void reset_output_field(void)
+{
+	perf_hpp_list.need_collapse = 0;
+	perf_hpp_list.parent = 0;
+	perf_hpp_list.sym = 0;
+	perf_hpp_list.dso = 0;
+
+	field_order = NULL;
+	sort_order = NULL;
+
+	reset_dimensions();
+	perf_hpp__reset_output_field(&perf_hpp_list);
+}
diff --git a/util/sort.h b/util/sort.h
new file mode 100644
index 0000000..035b62e
--- /dev/null
+++ b/util/sort.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SORT_H
+#define __PERF_SORT_H
+#include "../builtin.h"
+
+#include <regex.h>
+
+#include "color.h"
+#include <linux/list.h>
+#include "cache.h"
+#include <linux/rbtree.h>
+#include "symbol.h"
+#include "string.h"
+#include "callchain.h"
+#include "values.h"
+
+#include "../perf.h"
+#include "debug.h"
+#include "header.h"
+
+#include <subcmd/parse-options.h>
+#include "parse-events.h"
+#include "hist.h"
+#include "srcline.h"
+
+struct thread;
+
+extern regex_t parent_regex;
+extern const char *sort_order;
+extern const char *field_order;
+extern const char default_parent_pattern[];
+extern const char *parent_pattern;
+extern const char *default_sort_order;
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
+extern enum sort_mode sort__mode;
+extern struct sort_entry sort_comm;
+extern struct sort_entry sort_dso;
+extern struct sort_entry sort_sym;
+extern struct sort_entry sort_parent;
+extern struct sort_entry sort_dso_from;
+extern struct sort_entry sort_dso_to;
+extern struct sort_entry sort_sym_from;
+extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
+extern enum sort_type sort__first_dimension;
+extern const char default_mem_sort_order[];
+
+struct he_stat {
+	u64			period;
+	u64			period_sys;
+	u64			period_us;
+	u64			period_guest_sys;
+	u64			period_guest_us;
+	u64			weight;
+	u32			nr_events;
+};
+
+struct namespace_id {
+	u64			dev;
+	u64			ino;
+};
+
+struct hist_entry_diff {
+	bool	computed;
+	union {
+		/* PERF_HPP__DELTA */
+		double	period_ratio_delta;
+
+		/* PERF_HPP__RATIO */
+		double	period_ratio;
+
+		/* HISTC_WEIGHTED_DIFF */
+		s64	wdiff;
+	};
+};
+
+struct hist_entry_ops {
+	void	*(*new)(size_t size);
+	void	(*free)(void *ptr);
+};
+
+/**
+ * struct hist_entry - histogram entry
+ *
+ * @row_offset - offset from the first callchain expanded to appear on screen
+ * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
+ */
+struct hist_entry {
+	struct rb_node		rb_node_in;
+	struct rb_node		rb_node;
+	union {
+		struct list_head node;
+		struct list_head head;
+	} pairs;
+	struct he_stat		stat;
+	struct he_stat		*stat_acc;
+	struct map_symbol	ms;
+	struct thread		*thread;
+	struct comm		*comm;
+	struct namespace_id	cgroup_id;
+	u64			ip;
+	u64			transaction;
+	s32			socket;
+	s32			cpu;
+	u8			cpumode;
+	u8			depth;
+
+	/* We are added by hists__add_dummy_entry. */
+	bool			dummy;
+	bool			leaf;
+
+	char			level;
+	u8			filtered;
+	union {
+		/*
+		 * Since perf diff only supports the stdio output, TUI
+		 * fields are only accessed from perf report (or perf
+		 * top).  So make it a union to reduce memory usage.
+		 */
+		struct hist_entry_diff	diff;
+		struct /* for TUI */ {
+			u16	row_offset;
+			u16	nr_rows;
+			bool	init_have_children;
+			bool	unfolded;
+			bool	has_children;
+			bool	has_no_entry;
+		};
+	};
+	char			*srcline;
+	char			*srcfile;
+	struct symbol		*parent;
+	struct branch_info	*branch_info;
+	struct hists		*hists;
+	struct mem_info		*mem_info;
+	void			*raw_data;
+	u32			raw_size;
+	void			*trace_output;
+	struct perf_hpp_list	*hpp_list;
+	struct hist_entry	*parent_he;
+	struct hist_entry_ops	*ops;
+	union {
+		/* this is for hierarchical entry structure */
+		struct {
+			struct rb_root	hroot_in;
+			struct rb_root  hroot_out;
+		};				/* non-leaf entries */
+		struct rb_root	sorted_chain;	/* leaf entry has callchains */
+	};
+	struct callchain_root	callchain[0]; /* must be last member */
+};
+
+static inline bool hist_entry__has_pairs(struct hist_entry *he)
+{
+	return !list_empty(&he->pairs.node);
+}
+
+static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
+{
+	if (hist_entry__has_pairs(he))
+		return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
+	return NULL;
+}
+
+static inline void hist_entry__add_pair(struct hist_entry *pair,
+					struct hist_entry *he)
+{
+	list_add_tail(&pair->pairs.node, &he->pairs.head);
+}
+
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+	u64 period = he->stat.period;
+	u64 total_period = hists__total_period(he->hists);
+
+	if (unlikely(total_period == 0))
+		return 0;
+
+	if (symbol_conf.cumulate_callchain)
+		period = he->stat_acc->period;
+
+	return period * 100.0 / total_period;
+}
+
+static inline u64 cl_address(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & ~(cacheline_size - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & (cacheline_size - 1));
+}
+
+enum sort_mode {
+	SORT_MODE__NORMAL,
+	SORT_MODE__BRANCH,
+	SORT_MODE__MEMORY,
+	SORT_MODE__TOP,
+	SORT_MODE__DIFF,
+	SORT_MODE__TRACEPOINT,
+};
+
+enum sort_type {
+	/* common sort keys */
+	SORT_PID,
+	SORT_COMM,
+	SORT_DSO,
+	SORT_SYM,
+	SORT_PARENT,
+	SORT_CPU,
+	SORT_SOCKET,
+	SORT_SRCLINE,
+	SORT_SRCFILE,
+	SORT_LOCAL_WEIGHT,
+	SORT_GLOBAL_WEIGHT,
+	SORT_TRANSACTION,
+	SORT_TRACE,
+	SORT_SYM_SIZE,
+	SORT_DSO_SIZE,
+	SORT_CGROUP_ID,
+
+	/* branch stack specific sort keys */
+	__SORT_BRANCH_STACK,
+	SORT_DSO_FROM = __SORT_BRANCH_STACK,
+	SORT_DSO_TO,
+	SORT_SYM_FROM,
+	SORT_SYM_TO,
+	SORT_MISPREDICT,
+	SORT_ABORT,
+	SORT_IN_TX,
+	SORT_CYCLES,
+	SORT_SRCLINE_FROM,
+	SORT_SRCLINE_TO,
+
+	/* memory mode specific sort keys */
+	__SORT_MEMORY_MODE,
+	SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE,
+	SORT_MEM_DADDR_DSO,
+	SORT_MEM_LOCKED,
+	SORT_MEM_TLB,
+	SORT_MEM_LVL,
+	SORT_MEM_SNOOP,
+	SORT_MEM_DCACHELINE,
+	SORT_MEM_IADDR_SYMBOL,
+	SORT_MEM_PHYS_DADDR,
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+	const char *se_header;
+
+	int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
+	int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
+	int64_t	(*se_sort)(struct hist_entry *, struct hist_entry *);
+	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
+			       unsigned int width);
+	int	(*se_filter)(struct hist_entry *he, int type, const void *arg);
+	u8	se_width_idx;
+};
+
+extern struct sort_entry sort_thread;
+extern struct list_head hist_entry__sort_list;
+
+struct perf_evlist;
+struct pevent;
+int setup_sorting(struct perf_evlist *evlist);
+int setup_output_field(void);
+void reset_output_field(void);
+void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
+
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
+bool is_strict_order(const char *order);
+
+int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct perf_evlist *evlist,
+			int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+char *hist_entry__get_srcline(struct hist_entry *he);
+#endif	/* __PERF_SORT_H */
diff --git a/util/srcline.c b/util/srcline.c
new file mode 100644
index 0000000..3c21fd0
--- /dev/null
+++ b/util/srcline.c
@@ -0,0 +1,702 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/kernel.h>
+
+#include "util/dso.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/callchain.h"
+#include "srcline.h"
+#include "string2.h"
+#include "symbol.h"
+
+bool srcline_full_filename;
+
+static const char *dso__name(struct dso *dso)
+{
+	const char *dso_name;
+
+	if (dso->symsrc_filename)
+		dso_name = dso->symsrc_filename;
+	else
+		dso_name = dso->long_name;
+
+	if (dso_name[0] == '[')
+		return NULL;
+
+	if (!strncmp(dso_name, "/tmp/perf-", 10))
+		return NULL;
+
+	return dso_name;
+}
+
+static int inline_list__append(struct symbol *symbol, char *srcline,
+			       struct inline_node *node)
+{
+	struct inline_list *ilist;
+
+	ilist = zalloc(sizeof(*ilist));
+	if (ilist == NULL)
+		return -1;
+
+	ilist->symbol = symbol;
+	ilist->srcline = srcline;
+
+	if (callchain_param.order == ORDER_CALLEE)
+		list_add_tail(&ilist->list, &node->val);
+	else
+		list_add(&ilist->list, &node->val);
+
+	return 0;
+}
+
+/* basename version that takes a const input string */
+static const char *gnu_basename(const char *path)
+{
+	const char *base = strrchr(path, '/');
+
+	return base ? base + 1 : path;
+}
+
+static char *srcline_from_fileline(const char *file, unsigned int line)
+{
+	char *srcline;
+
+	if (!file)
+		return NULL;
+
+	if (!srcline_full_filename)
+		file = gnu_basename(file);
+
+	if (asprintf(&srcline, "%s:%u", file, line) < 0)
+		return NULL;
+
+	return srcline;
+}
+
+static struct symbol *new_inline_sym(struct dso *dso,
+				     struct symbol *base_sym,
+				     const char *funcname)
+{
+	struct symbol *inline_sym;
+	char *demangled = NULL;
+
+	if (dso) {
+		demangled = dso__demangle_sym(dso, 0, funcname);
+		if (demangled)
+			funcname = demangled;
+	}
+
+	if (base_sym && strcmp(funcname, base_sym->name) == 0) {
+		/* reuse the real, existing symbol */
+		inline_sym = base_sym;
+		/* ensure that we don't alias an inlined symbol, which could
+		 * lead to double frees in inline_node__delete
+		 */
+		assert(!base_sym->inlined);
+	} else {
+		/* create a fake symbol for the inline frame */
+		inline_sym = symbol__new(base_sym ? base_sym->start : 0,
+					 base_sym ? base_sym->end : 0,
+					 base_sym ? base_sym->binding : 0,
+					 funcname);
+		if (inline_sym)
+			inline_sym->inlined = 1;
+	}
+
+	free(demangled);
+
+	return inline_sym;
+}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+/*
+ * Implement addr2line using libbfd.
+ */
+#define PACKAGE "perf"
+#include <bfd.h>
+
+struct a2l_data {
+	const char 	*input;
+	u64	 	addr;
+
+	bool 		found;
+	const char 	*filename;
+	const char 	*funcname;
+	unsigned 	line;
+
+	bfd 		*abfd;
+	asymbol 	**syms;
+};
+
+static int bfd_error(const char *string)
+{
+	const char *errmsg;
+
+	errmsg = bfd_errmsg(bfd_get_error());
+	fflush(stdout);
+
+	if (string)
+		pr_debug("%s: %s\n", string, errmsg);
+	else
+		pr_debug("%s\n", errmsg);
+
+	return -1;
+}
+
+static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
+{
+	long storage;
+	long symcount;
+	asymbol **syms;
+	bfd_boolean dynamic = FALSE;
+
+	if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
+		return bfd_error(bfd_get_filename(abfd));
+
+	storage = bfd_get_symtab_upper_bound(abfd);
+	if (storage == 0L) {
+		storage = bfd_get_dynamic_symtab_upper_bound(abfd);
+		dynamic = TRUE;
+	}
+	if (storage < 0L)
+		return bfd_error(bfd_get_filename(abfd));
+
+	syms = malloc(storage);
+	if (dynamic)
+		symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
+	else
+		symcount = bfd_canonicalize_symtab(abfd, syms);
+
+	if (symcount < 0) {
+		free(syms);
+		return bfd_error(bfd_get_filename(abfd));
+	}
+
+	a2l->syms = syms;
+	return 0;
+}
+
+static void find_address_in_section(bfd *abfd, asection *section, void *data)
+{
+	bfd_vma pc, vma;
+	bfd_size_type size;
+	struct a2l_data *a2l = data;
+
+	if (a2l->found)
+		return;
+
+	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
+		return;
+
+	pc = a2l->addr;
+	vma = bfd_get_section_vma(abfd, section);
+	size = bfd_get_section_size(section);
+
+	if (pc < vma || pc >= vma + size)
+		return;
+
+	a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
+					   &a2l->filename, &a2l->funcname,
+					   &a2l->line);
+
+	if (a2l->filename && !strlen(a2l->filename))
+		a2l->filename = NULL;
+}
+
+static struct a2l_data *addr2line_init(const char *path)
+{
+	bfd *abfd;
+	struct a2l_data *a2l = NULL;
+
+	abfd = bfd_openr(path, NULL);
+	if (abfd == NULL)
+		return NULL;
+
+	if (!bfd_check_format(abfd, bfd_object))
+		goto out;
+
+	a2l = zalloc(sizeof(*a2l));
+	if (a2l == NULL)
+		goto out;
+
+	a2l->abfd = abfd;
+	a2l->input = strdup(path);
+	if (a2l->input == NULL)
+		goto out;
+
+	if (slurp_symtab(abfd, a2l))
+		goto out;
+
+	return a2l;
+
+out:
+	if (a2l) {
+		zfree((char **)&a2l->input);
+		free(a2l);
+	}
+	bfd_close(abfd);
+	return NULL;
+}
+
+static void addr2line_cleanup(struct a2l_data *a2l)
+{
+	if (a2l->abfd)
+		bfd_close(a2l->abfd);
+	zfree((char **)&a2l->input);
+	zfree(&a2l->syms);
+	free(a2l);
+}
+
+#define MAX_INLINE_NEST 1024
+
+static int inline_list__append_dso_a2l(struct dso *dso,
+				       struct inline_node *node,
+				       struct symbol *sym)
+{
+	struct a2l_data *a2l = dso->a2l;
+	struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
+	char *srcline = NULL;
+
+	if (a2l->filename)
+		srcline = srcline_from_fileline(a2l->filename, a2l->line);
+
+	return inline_list__append(inline_sym, srcline, node);
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+		     char **file, unsigned int *line, struct dso *dso,
+		     bool unwind_inlines, struct inline_node *node,
+		     struct symbol *sym)
+{
+	int ret = 0;
+	struct a2l_data *a2l = dso->a2l;
+
+	if (!a2l) {
+		dso->a2l = addr2line_init(dso_name);
+		a2l = dso->a2l;
+	}
+
+	if (a2l == NULL) {
+		pr_warning("addr2line_init failed for %s\n", dso_name);
+		return 0;
+	}
+
+	a2l->addr = addr;
+	a2l->found = false;
+
+	bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
+
+	if (!a2l->found)
+		return 0;
+
+	if (unwind_inlines) {
+		int cnt = 0;
+
+		if (node && inline_list__append_dso_a2l(dso, node, sym))
+			return 0;
+
+		while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
+					     &a2l->funcname, &a2l->line) &&
+		       cnt++ < MAX_INLINE_NEST) {
+
+			if (a2l->filename && !strlen(a2l->filename))
+				a2l->filename = NULL;
+
+			if (node != NULL) {
+				if (inline_list__append_dso_a2l(dso, node, sym))
+					return 0;
+				// found at least one inline frame
+				ret = 1;
+			}
+		}
+	}
+
+	if (file) {
+		*file = a2l->filename ? strdup(a2l->filename) : NULL;
+		ret = *file ? 1 : 0;
+	}
+
+	if (line)
+		*line = a2l->line;
+
+	return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+	struct a2l_data *a2l = dso->a2l;
+
+	if (!a2l)
+		return;
+
+	addr2line_cleanup(a2l);
+
+	dso->a2l = NULL;
+}
+
+static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
+					struct dso *dso, struct symbol *sym)
+{
+	struct inline_node *node;
+
+	node = zalloc(sizeof(*node));
+	if (node == NULL) {
+		perror("not enough memory for the inline node");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&node->val);
+	node->addr = addr;
+
+	addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
+	return node;
+}
+
+#else /* HAVE_LIBBFD_SUPPORT */
+
+static int filename_split(char *filename, unsigned int *line_nr)
+{
+	char *sep;
+
+	sep = strchr(filename, '\n');
+	if (sep)
+		*sep = '\0';
+
+	if (!strcmp(filename, "??:0"))
+		return 0;
+
+	sep = strchr(filename, ':');
+	if (sep) {
+		*sep++ = '\0';
+		*line_nr = strtoul(sep, NULL, 0);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+		     char **file, unsigned int *line_nr,
+		     struct dso *dso __maybe_unused,
+		     bool unwind_inlines __maybe_unused,
+		     struct inline_node *node __maybe_unused,
+		     struct symbol *sym __maybe_unused)
+{
+	FILE *fp;
+	char cmd[PATH_MAX];
+	char *filename = NULL;
+	size_t len;
+	int ret = 0;
+
+	scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64,
+		  dso_name, addr);
+
+	fp = popen(cmd, "r");
+	if (fp == NULL) {
+		pr_warning("popen failed for %s\n", dso_name);
+		return 0;
+	}
+
+	if (getline(&filename, &len, fp) < 0 || !len) {
+		pr_warning("addr2line has no output for %s\n", dso_name);
+		goto out;
+	}
+
+	ret = filename_split(filename, line_nr);
+	if (ret != 1) {
+		free(filename);
+		goto out;
+	}
+
+	*file = filename;
+
+out:
+	pclose(fp);
+	return ret;
+}
+
+void dso__free_a2l(struct dso *dso __maybe_unused)
+{
+}
+
+static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
+					struct dso *dso __maybe_unused,
+					struct symbol *sym)
+{
+	FILE *fp;
+	char cmd[PATH_MAX];
+	struct inline_node *node;
+	char *filename = NULL;
+	char *funcname = NULL;
+	size_t filelen, funclen;
+	unsigned int line_nr = 0;
+
+	scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64,
+		  dso_name, addr);
+
+	fp = popen(cmd, "r");
+	if (fp == NULL) {
+		pr_err("popen failed for %s\n", dso_name);
+		return NULL;
+	}
+
+	node = zalloc(sizeof(*node));
+	if (node == NULL) {
+		perror("not enough memory for the inline node");
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&node->val);
+	node->addr = addr;
+
+	/* addr2line -f generates two lines for each inlined functions */
+	while (getline(&funcname, &funclen, fp) != -1) {
+		char *srcline;
+		struct symbol *inline_sym;
+
+		rtrim(funcname);
+
+		if (getline(&filename, &filelen, fp) == -1)
+			goto out;
+
+		if (filename_split(filename, &line_nr) != 1)
+			goto out;
+
+		srcline = srcline_from_fileline(filename, line_nr);
+		inline_sym = new_inline_sym(dso, sym, funcname);
+
+		if (inline_list__append(inline_sym, srcline, node) != 0) {
+			free(srcline);
+			if (inline_sym && inline_sym->inlined)
+				symbol__delete(inline_sym);
+			goto out;
+		}
+	}
+
+out:
+	pclose(fp);
+	free(filename);
+	free(funcname);
+
+	return node;
+}
+
+#endif /* HAVE_LIBBFD_SUPPORT */
+
+/*
+ * Number of addr2line failures (without success) before disabling it for that
+ * dso.
+ */
+#define A2L_FAIL_LIMIT 123
+
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, bool unwind_inlines,
+		  u64 ip)
+{
+	char *file = NULL;
+	unsigned line = 0;
+	char *srcline;
+	const char *dso_name;
+
+	if (!dso->has_srcline)
+		goto out;
+
+	dso_name = dso__name(dso);
+	if (dso_name == NULL)
+		goto out;
+
+	if (!addr2line(dso_name, addr, &file, &line, dso,
+		       unwind_inlines, NULL, sym))
+		goto out;
+
+	srcline = srcline_from_fileline(file, line);
+	free(file);
+
+	if (!srcline)
+		goto out;
+
+	dso->a2l_fails = 0;
+
+	return srcline;
+
+out:
+	if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+		dso->has_srcline = 0;
+		dso__free_a2l(dso);
+	}
+
+	if (!show_addr)
+		return (show_sym && sym) ?
+			    strndup(sym->name, sym->namelen) : NULL;
+
+	if (sym) {
+		if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
+					ip - sym->start) < 0)
+			return SRCLINE_UNKNOWN;
+	} else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
+		return SRCLINE_UNKNOWN;
+	return srcline;
+}
+
+void free_srcline(char *srcline)
+{
+	if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0)
+		free(srcline);
+}
+
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, u64 ip)
+{
+	return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip);
+}
+
+struct srcline_node {
+	u64			addr;
+	char			*srcline;
+	struct rb_node		rb_node;
+};
+
+void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
+{
+	struct rb_node **p = &tree->rb_node;
+	struct rb_node *parent = NULL;
+	struct srcline_node *i, *node;
+
+	node = zalloc(sizeof(struct srcline_node));
+	if (!node) {
+		perror("not enough memory for the srcline node");
+		return;
+	}
+
+	node->addr = addr;
+	node->srcline = srcline;
+
+	while (*p != NULL) {
+		parent = *p;
+		i = rb_entry(parent, struct srcline_node, rb_node);
+		if (addr < i->addr)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->rb_node, parent, p);
+	rb_insert_color(&node->rb_node, tree);
+}
+
+char *srcline__tree_find(struct rb_root *tree, u64 addr)
+{
+	struct rb_node *n = tree->rb_node;
+
+	while (n) {
+		struct srcline_node *i = rb_entry(n, struct srcline_node,
+						  rb_node);
+
+		if (addr < i->addr)
+			n = n->rb_left;
+		else if (addr > i->addr)
+			n = n->rb_right;
+		else
+			return i->srcline;
+	}
+
+	return NULL;
+}
+
+void srcline__tree_delete(struct rb_root *tree)
+{
+	struct srcline_node *pos;
+	struct rb_node *next = rb_first(tree);
+
+	while (next) {
+		pos = rb_entry(next, struct srcline_node, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, tree);
+		free_srcline(pos->srcline);
+		zfree(&pos);
+	}
+}
+
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+					    struct symbol *sym)
+{
+	const char *dso_name;
+
+	dso_name = dso__name(dso);
+	if (dso_name == NULL)
+		return NULL;
+
+	return addr2inlines(dso_name, addr, dso, sym);
+}
+
+void inline_node__delete(struct inline_node *node)
+{
+	struct inline_list *ilist, *tmp;
+
+	list_for_each_entry_safe(ilist, tmp, &node->val, list) {
+		list_del_init(&ilist->list);
+		free_srcline(ilist->srcline);
+		/* only the inlined symbols are owned by the list */
+		if (ilist->symbol && ilist->symbol->inlined)
+			symbol__delete(ilist->symbol);
+		free(ilist);
+	}
+
+	free(node);
+}
+
+void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines)
+{
+	struct rb_node **p = &tree->rb_node;
+	struct rb_node *parent = NULL;
+	const u64 addr = inlines->addr;
+	struct inline_node *i;
+
+	while (*p != NULL) {
+		parent = *p;
+		i = rb_entry(parent, struct inline_node, rb_node);
+		if (addr < i->addr)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&inlines->rb_node, parent, p);
+	rb_insert_color(&inlines->rb_node, tree);
+}
+
+struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
+{
+	struct rb_node *n = tree->rb_node;
+
+	while (n) {
+		struct inline_node *i = rb_entry(n, struct inline_node,
+						 rb_node);
+
+		if (addr < i->addr)
+			n = n->rb_left;
+		else if (addr > i->addr)
+			n = n->rb_right;
+		else
+			return i;
+	}
+
+	return NULL;
+}
+
+void inlines__tree_delete(struct rb_root *tree)
+{
+	struct inline_node *pos;
+	struct rb_node *next = rb_first(tree);
+
+	while (next) {
+		pos = rb_entry(next, struct inline_node, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, tree);
+		inline_node__delete(pos);
+	}
+}
diff --git a/util/srcline.h b/util/srcline.h
new file mode 100644
index 0000000..b2bb550
--- /dev/null
+++ b/util/srcline.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_SRCLINE_H
+#define PERF_SRCLINE_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct dso;
+struct symbol;
+
+extern bool srcline_full_filename;
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, u64 ip);
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+		  bool show_sym, bool show_addr, bool unwind_inlines,
+		  u64 ip);
+void free_srcline(char *srcline);
+
+/* insert the srcline into the DSO, which will take ownership */
+void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline);
+/* find previously inserted srcline */
+char *srcline__tree_find(struct rb_root *tree, u64 addr);
+/* delete all srclines within the tree */
+void srcline__tree_delete(struct rb_root *tree);
+
+#define SRCLINE_UNKNOWN  ((char *) "??:0")
+
+struct inline_list {
+	struct symbol		*symbol;
+	char			*srcline;
+	struct list_head	list;
+};
+
+struct inline_node {
+	u64			addr;
+	struct list_head	val;
+	struct rb_node		rb_node;
+};
+
+/* parse inlined frames for the given address */
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+					    struct symbol *sym);
+/* free resources associated to the inline node list */
+void inline_node__delete(struct inline_node *node);
+
+/* insert the inline node list into the DSO, which will take ownership */
+void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines);
+/* find previously inserted inline node list */
+struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr);
+/* delete all nodes within the tree of inline_node s */
+void inlines__tree_delete(struct rb_root *tree);
+
+#endif /* PERF_SRCLINE_H */
diff --git a/util/stat-shadow.c b/util/stat-shadow.c
new file mode 100644
index 0000000..594d14a
--- /dev/null
+++ b/util/stat-shadow.c
@@ -0,0 +1,999 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+#include "pmu.h"
+#include "rblist.h"
+#include "evlist.h"
+#include "expr.h"
+#include "metricgroup.h"
+
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
+static bool have_frontend_stalled;
+
+struct runtime_stat rt_stat;
+struct stats walltime_nsecs_stats;
+
+struct saved_value {
+	struct rb_node rb_node;
+	struct perf_evsel *evsel;
+	enum stat_type type;
+	int ctx;
+	int cpu;
+	struct runtime_stat *stat;
+	struct stats stats;
+};
+
+static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
+{
+	struct saved_value *a = container_of(rb_node,
+					     struct saved_value,
+					     rb_node);
+	const struct saved_value *b = entry;
+
+	if (a->cpu != b->cpu)
+		return a->cpu - b->cpu;
+
+	/*
+	 * Previously the rbtree was used to link generic metrics.
+	 * The keys were evsel/cpu. Now the rbtree is extended to support
+	 * per-thread shadow stats. For shadow stats case, the keys
+	 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
+	 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
+	 */
+	if (a->type != b->type)
+		return a->type - b->type;
+
+	if (a->ctx != b->ctx)
+		return a->ctx - b->ctx;
+
+	if (a->evsel == NULL && b->evsel == NULL) {
+		if (a->stat == b->stat)
+			return 0;
+
+		if ((char *)a->stat < (char *)b->stat)
+			return -1;
+
+		return 1;
+	}
+
+	if (a->evsel == b->evsel)
+		return 0;
+	if ((char *)a->evsel < (char *)b->evsel)
+		return -1;
+	return +1;
+}
+
+static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
+				     const void *entry)
+{
+	struct saved_value *nd = malloc(sizeof(struct saved_value));
+
+	if (!nd)
+		return NULL;
+	memcpy(nd, entry, sizeof(struct saved_value));
+	return &nd->rb_node;
+}
+
+static void saved_value_delete(struct rblist *rblist __maybe_unused,
+			       struct rb_node *rb_node)
+{
+	struct saved_value *v;
+
+	BUG_ON(!rb_node);
+	v = container_of(rb_node, struct saved_value, rb_node);
+	free(v);
+}
+
+static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
+					      int cpu,
+					      bool create,
+					      enum stat_type type,
+					      int ctx,
+					      struct runtime_stat *st)
+{
+	struct rblist *rblist;
+	struct rb_node *nd;
+	struct saved_value dm = {
+		.cpu = cpu,
+		.evsel = evsel,
+		.type = type,
+		.ctx = ctx,
+		.stat = st,
+	};
+
+	rblist = &st->value_list;
+
+	nd = rblist__find(rblist, &dm);
+	if (nd)
+		return container_of(nd, struct saved_value, rb_node);
+	if (create) {
+		rblist__add_node(rblist, &dm);
+		nd = rblist__find(rblist, &dm);
+		if (nd)
+			return container_of(nd, struct saved_value, rb_node);
+	}
+	return NULL;
+}
+
+void runtime_stat__init(struct runtime_stat *st)
+{
+	struct rblist *rblist = &st->value_list;
+
+	rblist__init(rblist);
+	rblist->node_cmp = saved_value_cmp;
+	rblist->node_new = saved_value_new;
+	rblist->node_delete = saved_value_delete;
+}
+
+void runtime_stat__exit(struct runtime_stat *st)
+{
+	rblist__exit(&st->value_list);
+}
+
+void perf_stat__init_shadow_stats(void)
+{
+	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+	runtime_stat__init(&rt_stat);
+}
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+	int ctx = 0;
+
+	if (evsel->attr.exclude_kernel)
+		ctx |= CTX_BIT_KERNEL;
+	if (evsel->attr.exclude_user)
+		ctx |= CTX_BIT_USER;
+	if (evsel->attr.exclude_hv)
+		ctx |= CTX_BIT_HV;
+	if (evsel->attr.exclude_host)
+		ctx |= CTX_BIT_HOST;
+	if (evsel->attr.exclude_idle)
+		ctx |= CTX_BIT_IDLE;
+
+	return ctx;
+}
+
+static void reset_stat(struct runtime_stat *st)
+{
+	struct rblist *rblist;
+	struct rb_node *pos, *next;
+
+	rblist = &st->value_list;
+	next = rb_first(&rblist->entries);
+	while (next) {
+		pos = next;
+		next = rb_next(pos);
+		memset(&container_of(pos, struct saved_value, rb_node)->stats,
+		       0,
+		       sizeof(struct stats));
+	}
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+	reset_stat(&rt_stat);
+	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+}
+
+void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
+{
+	reset_stat(st);
+}
+
+static void update_runtime_stat(struct runtime_stat *st,
+				enum stat_type type,
+				int ctx, int cpu, u64 count)
+{
+	struct saved_value *v = saved_value_lookup(NULL, cpu, true,
+						   type, ctx, st);
+
+	if (v)
+		update_stats(&v->stats, count);
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
+				    int cpu, struct runtime_stat *st)
+{
+	int ctx = evsel_context(counter);
+
+	count *= counter->scale;
+
+	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
+	    perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
+		update_runtime_stat(st, STAT_NSECS, 0, cpu, count);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+		update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+		update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+		update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, ELISION_START))
+		update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
+				    ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
+				    ctx, cpu, count);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
+				    ctx, cpu, count);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
+				    ctx, cpu, count);
+	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+		update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+		update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+		update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+		update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+		update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+		update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+		update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
+
+	if (counter->collect_stat) {
+		struct saved_value *v = saved_value_lookup(counter, cpu, true,
+							   STAT_NONE, 0, st);
+		update_stats(&v->stats, count);
+	}
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+	GRC_STALLED_CYCLES_FE,
+	GRC_STALLED_CYCLES_BE,
+	GRC_CACHE_MISSES,
+	GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+	static const double grc_table[GRC_MAX_NR][3] = {
+		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
+	};
+	const char *color = PERF_COLOR_NORMAL;
+
+	if (ratio > grc_table[type][0])
+		color = PERF_COLOR_RED;
+	else if (ratio > grc_table[type][1])
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > grc_table[type][2])
+		color = PERF_COLOR_YELLOW;
+
+	return color;
+}
+
+static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
+						const char *name)
+{
+	struct perf_evsel *c2;
+
+	evlist__for_each_entry (evsel_list, c2) {
+		if (!strcasecmp(c2->name, name))
+			return c2;
+	}
+	return NULL;
+}
+
+/* Mark MetricExpr target events and link events using them to them. */
+void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
+{
+	struct perf_evsel *counter, *leader, **metric_events, *oc;
+	bool found;
+	const char **metric_names;
+	int i;
+	int num_metric_names;
+
+	evlist__for_each_entry(evsel_list, counter) {
+		bool invalid = false;
+
+		leader = counter->leader;
+		if (!counter->metric_expr)
+			continue;
+		metric_events = counter->metric_events;
+		if (!metric_events) {
+			if (expr__find_other(counter->metric_expr, counter->name,
+						&metric_names, &num_metric_names) < 0)
+				continue;
+
+			metric_events = calloc(sizeof(struct perf_evsel *),
+					       num_metric_names + 1);
+			if (!metric_events)
+				return;
+			counter->metric_events = metric_events;
+		}
+
+		for (i = 0; i < num_metric_names; i++) {
+			found = false;
+			if (leader) {
+				/* Search in group */
+				for_each_group_member (oc, leader) {
+					if (!strcasecmp(oc->name, metric_names[i])) {
+						found = true;
+						break;
+					}
+				}
+			}
+			if (!found) {
+				/* Search ignoring groups */
+				oc = perf_stat__find_event(evsel_list, metric_names[i]);
+			}
+			if (!oc) {
+				/* Deduping one is good enough to handle duplicated PMUs. */
+				static char *printed;
+
+				/*
+				 * Adding events automatically would be difficult, because
+				 * it would risk creating groups that are not schedulable.
+				 * perf stat doesn't understand all the scheduling constraints
+				 * of events. So we ask the user instead to add the missing
+				 * events.
+				 */
+				if (!printed || strcasecmp(printed, metric_names[i])) {
+					fprintf(stderr,
+						"Add %s event to groups to get metric expression for %s\n",
+						metric_names[i],
+						counter->name);
+					printed = strdup(metric_names[i]);
+				}
+				invalid = true;
+				continue;
+			}
+			metric_events[i] = oc;
+			oc->collect_stat = true;
+		}
+		metric_events[i] = NULL;
+		free(metric_names);
+		if (invalid) {
+			free(metric_events);
+			counter->metric_events = NULL;
+			counter->metric_expr = NULL;
+		}
+	}
+}
+
+static double runtime_stat_avg(struct runtime_stat *st,
+			       enum stat_type type, int ctx, int cpu)
+{
+	struct saved_value *v;
+
+	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+	if (!v)
+		return 0.0;
+
+	return avg_stats(&v->stats);
+}
+
+static double runtime_stat_n(struct runtime_stat *st,
+			     enum stat_type type, int ctx, int cpu)
+{
+	struct saved_value *v;
+
+	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+	if (!v)
+		return 0.0;
+
+	return v->stats.n;
+}
+
+static void print_stalled_cycles_frontend(int cpu,
+					  struct perf_evsel *evsel, double avg,
+					  struct perf_stat_output_ctx *out,
+					  struct runtime_stat *st)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+	if (ratio)
+		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+				  ratio);
+	else
+		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
+}
+
+static void print_stalled_cycles_backend(int cpu,
+					 struct perf_evsel *evsel, double avg,
+					 struct perf_stat_output_ctx *out,
+					 struct runtime_stat *st)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+	out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
+}
+
+static void print_branch_misses(int cpu,
+				struct perf_evsel *evsel,
+				double avg,
+				struct perf_stat_output_ctx *out,
+				struct runtime_stat *st)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
+}
+
+static void print_l1_dcache_misses(int cpu,
+				   struct perf_evsel *evsel,
+				   double avg,
+				   struct perf_stat_output_ctx *out,
+				   struct runtime_stat *st)
+
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+}
+
+static void print_l1_icache_misses(int cpu,
+				   struct perf_evsel *evsel,
+				   double avg,
+				   struct perf_stat_output_ctx *out,
+				   struct runtime_stat *st)
+
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+}
+
+static void print_dtlb_cache_misses(int cpu,
+				    struct perf_evsel *evsel,
+				    double avg,
+				    struct perf_stat_output_ctx *out,
+				    struct runtime_stat *st)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+}
+
+static void print_itlb_cache_misses(int cpu,
+				    struct perf_evsel *evsel,
+				    double avg,
+				    struct perf_stat_output_ctx *out,
+				    struct runtime_stat *st)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+}
+
+static void print_ll_cache_misses(int cpu,
+				  struct perf_evsel *evsel,
+				  double avg,
+				  struct perf_stat_output_ctx *out,
+				  struct runtime_stat *st)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+}
+
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ *			TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+	if (x < 0 && x >= -0.02)
+		return 0.0;
+	return x;
+}
+
+static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
+{
+	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
+}
+
+static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
+{
+	double bad_spec = 0;
+	double total_slots;
+	double total;
+
+	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
+		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
+		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
+
+	total_slots = td_total_slots(ctx, cpu, st);
+	if (total_slots)
+		bad_spec = total / total_slots;
+	return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
+{
+	double retiring = 0;
+	double total_slots = td_total_slots(ctx, cpu, st);
+	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
+					    ctx, cpu);
+
+	if (total_slots)
+		retiring = ret_slots / total_slots;
+	return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
+{
+	double fe_bound = 0;
+	double total_slots = td_total_slots(ctx, cpu, st);
+	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
+					    ctx, cpu);
+
+	if (total_slots)
+		fe_bound = fetch_bub / total_slots;
+	return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
+{
+	double sum = (td_fe_bound(ctx, cpu, st) +
+		      td_bad_spec(ctx, cpu, st) +
+		      td_retiring(ctx, cpu, st));
+	if (sum == 0)
+		return 0;
+	return sanitize_val(1.0 - sum);
+}
+
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	int ctx = evsel_context(evsel);
+	const char *color = NULL;
+
+	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
+	aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
+	cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
+static void generic_metric(const char *metric_expr,
+			   struct perf_evsel **metric_events,
+			   char *name,
+			   const char *metric_name,
+			   double avg,
+			   int cpu,
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st)
+{
+	print_metric_t print_metric = out->print_metric;
+	struct parse_ctx pctx;
+	double ratio;
+	int i;
+	void *ctxp = out->ctx;
+
+	expr__ctx_init(&pctx);
+	expr__add_id(&pctx, name, avg);
+	for (i = 0; metric_events[i]; i++) {
+		struct saved_value *v;
+		struct stats *stats;
+		double scale;
+
+		if (!strcmp(metric_events[i]->name, "duration_time")) {
+			stats = &walltime_nsecs_stats;
+			scale = 1e-9;
+		} else {
+			v = saved_value_lookup(metric_events[i], cpu, false,
+					       STAT_NONE, 0, st);
+			if (!v)
+				break;
+			stats = &v->stats;
+			scale = 1.0;
+		}
+		expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
+	}
+	if (!metric_events[i]) {
+		const char *p = metric_expr;
+
+		if (expr__parse(&ratio, &pctx, &p) == 0)
+			print_metric(ctxp, NULL, "%8.1f",
+				metric_name ?
+				metric_name :
+				out->force_header ?  name : "",
+				ratio);
+		else
+			print_metric(ctxp, NULL, NULL,
+				     out->force_header ?
+				     (metric_name ? metric_name : name) : "", 0);
+	} else
+		print_metric(ctxp, NULL, NULL, "", 0);
+}
+
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+				   double avg, int cpu,
+				   struct perf_stat_output_ctx *out,
+				   struct rblist *metric_events,
+				   struct runtime_stat *st)
+{
+	void *ctxp = out->ctx;
+	print_metric_t print_metric = out->print_metric;
+	double total, ratio = 0.0, total2;
+	const char *color = NULL;
+	int ctx = evsel_context(evsel);
+	struct metric_event *me;
+	int num = 1;
+
+	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+		if (total) {
+			ratio = avg / total;
+			print_metric(ctxp, NULL, "%7.2f ",
+					"insn per cycle", ratio);
+		} else {
+			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
+		}
+
+		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
+					 ctx, cpu);
+
+		total = max(total, runtime_stat_avg(st,
+						    STAT_STALLED_CYCLES_BACK,
+						    ctx, cpu));
+
+		if (total && avg) {
+			out->new_line(ctxp);
+			ratio = total / avg;
+			print_metric(ctxp, NULL, "%7.2f ",
+					"stalled cycles per insn",
+					ratio);
+		} else if (have_frontend_stalled) {
+			print_metric(ctxp, NULL, NULL,
+				     "stalled cycles per insn", 0);
+		}
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
+			print_branch_misses(cpu, evsel, avg, out, st);
+		else
+			print_metric(ctxp, NULL, NULL, "of all branches", 0);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
+			print_l1_dcache_misses(cpu, evsel, avg, out, st);
+		else
+			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
+			print_l1_icache_misses(cpu, evsel, avg, out, st);
+		else
+			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
+			print_dtlb_cache_misses(cpu, evsel, avg, out, st);
+		else
+			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
+			print_itlb_cache_misses(cpu, evsel, avg, out, st);
+		else
+			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
+			print_ll_cache_misses(cpu, evsel, avg, out, st);
+		else
+			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
+		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
+
+		if (total)
+			ratio = avg * 100 / total;
+
+		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
+			print_metric(ctxp, NULL, "%8.3f %%",
+				     "of all cache refs", ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+		print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+		print_stalled_cycles_backend(cpu, evsel, avg, out, st);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+
+		if (total) {
+			ratio = avg / total;
+			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
+		} else {
+			print_metric(ctxp, NULL, NULL, "Ghz", 0);
+		}
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+		if (total)
+			print_metric(ctxp, NULL,
+					"%7.2f%%", "transactional cycles",
+					100.0 * (avg / total));
+		else
+			print_metric(ctxp, NULL, NULL, "transactional cycles",
+				     0);
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
+
+		if (total2 < avg)
+			total2 = avg;
+		if (total)
+			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
+				100.0 * ((total2-avg) / total));
+		else
+			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
+					 ctx, cpu);
+
+		if (avg)
+			ratio = total / avg;
+
+		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
+			print_metric(ctxp, NULL, "%8.0f",
+				     "cycles / transaction", ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "cycles / transaction",
+				      0);
+	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
+					 ctx, cpu);
+
+		if (avg)
+			ratio = total / avg;
+
+		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
+	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
+		   perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
+		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
+			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+				     avg / ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+		double fe_bound = td_fe_bound(ctx, cpu, st);
+
+		if (fe_bound > 0.2)
+			color = PERF_COLOR_RED;
+		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+				fe_bound * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+		double retiring = td_retiring(ctx, cpu, st);
+
+		if (retiring > 0.7)
+			color = PERF_COLOR_GREEN;
+		print_metric(ctxp, color, "%8.1f%%", "retiring",
+				retiring * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+		double bad_spec = td_bad_spec(ctx, cpu, st);
+
+		if (bad_spec > 0.1)
+			color = PERF_COLOR_RED;
+		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+				bad_spec * 100.);
+	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+		double be_bound = td_be_bound(ctx, cpu, st);
+		const char *name = "backend bound";
+		static int have_recovery_bubbles = -1;
+
+		/* In case the CPU does not support topdown-recovery-bubbles */
+		if (have_recovery_bubbles < 0)
+			have_recovery_bubbles = pmu_have_event("cpu",
+					"topdown-recovery-bubbles");
+		if (!have_recovery_bubbles)
+			name = "backend bound/bad spec";
+
+		if (be_bound > 0.2)
+			color = PERF_COLOR_RED;
+		if (td_total_slots(ctx, cpu, st) > 0)
+			print_metric(ctxp, color, "%8.1f%%", name,
+					be_bound * 100.);
+		else
+			print_metric(ctxp, NULL, NULL, name, 0);
+	} else if (evsel->metric_expr) {
+		generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
+				evsel->metric_name, avg, cpu, out, st);
+	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
+		char unit = 'M';
+		char unit_buf[10];
+
+		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+
+		if (total)
+			ratio = 1000.0 * avg / total;
+		if (ratio < 0.001) {
+			ratio *= 1000;
+			unit = 'K';
+		}
+		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(cpu, evsel, out, st);
+	} else {
+		num = 0;
+	}
+
+	if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
+		struct metric_expr *mexp;
+
+		list_for_each_entry (mexp, &me->head, nd) {
+			if (num++ > 0)
+				out->new_line(ctxp);
+			generic_metric(mexp->metric_expr, mexp->metric_events,
+					evsel->name, mexp->metric_name,
+					avg, cpu, out, st);
+		}
+	}
+	if (num == 0)
+		print_metric(ctxp, NULL, NULL, NULL, 0);
+}
diff --git a/util/stat.c b/util/stat.c
new file mode 100644
index 0000000..a0061e0
--- /dev/null
+++ b/util/stat.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+#include "stat.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+
+void update_stats(struct stats *stats, u64 val)
+{
+	double delta;
+
+	stats->n++;
+	delta = val - stats->mean;
+	stats->mean += delta / stats->n;
+	stats->M2 += delta*(val - stats->mean);
+
+	if (val > stats->max)
+		stats->max = val;
+
+	if (val < stats->min)
+		stats->min = val;
+}
+
+double avg_stats(struct stats *stats)
+{
+	return stats->mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = -------------------------------
+ *                  n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ *             s
+ * s_mean = -------
+ *          sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+	double variance, variance_mean;
+
+	if (stats->n < 2)
+		return 0.0;
+
+	variance = stats->M2 / (stats->n - 1);
+	variance_mean = variance / stats->n;
+
+	return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+	double pct = 0.0;
+
+	if (avg)
+		pct = 100.0 * stddev/avg;
+
+	return pct;
+}
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+			   enum perf_stat_evsel_id id)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+	ID(NONE,		x),
+	ID(CYCLES_IN_TX,	cpu/cycles-t/),
+	ID(TRANSACTION_START,	cpu/tx-start/),
+	ID(ELISION_START,	cpu/el-start/),
+	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
+	ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+	ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
+};
+#undef ID
+
+static void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+	int i;
+
+	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+		if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+			ps->id = i;
+			break;
+		}
+	}
+}
+
+static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+{
+	int i;
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	for (i = 0; i < 3; i++)
+		init_stats(&ps->res_stats[i]);
+
+	perf_stat_evsel_id_init(evsel);
+}
+
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+{
+	evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
+	if (evsel->stats == NULL)
+		return -ENOMEM;
+	perf_evsel__reset_stat_priv(evsel);
+	return 0;
+}
+
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	if (ps)
+		free(ps->group_data);
+	zfree(&evsel->stats);
+}
+
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+					     int ncpus, int nthreads)
+{
+	struct perf_counts *counts;
+
+	counts = perf_counts__new(ncpus, nthreads);
+	if (counts)
+		evsel->prev_raw_counts = counts;
+
+	return counts ? 0 : -ENOMEM;
+}
+
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+{
+	perf_counts__delete(evsel->prev_raw_counts);
+	evsel->prev_raw_counts = NULL;
+}
+
+static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+{
+	int ncpus = perf_evsel__nr_cpus(evsel);
+	int nthreads = thread_map__nr(evsel->threads);
+
+	if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
+	    perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
+	    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+		return -ENOMEM;
+
+	return 0;
+}
+
+int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (perf_evsel__alloc_stats(evsel, alloc_raw))
+			goto out_free;
+	}
+
+	return 0;
+
+out_free:
+	perf_evlist__free_stats(evlist);
+	return -1;
+}
+
+void perf_evlist__free_stats(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		perf_evsel__free_stat_priv(evsel);
+		perf_evsel__free_counts(evsel);
+		perf_evsel__free_prev_raw_counts(evsel);
+	}
+}
+
+void perf_evlist__reset_stats(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		perf_evsel__reset_stat_priv(evsel);
+		perf_evsel__reset_counts(evsel);
+	}
+}
+
+static void zero_per_pkg(struct perf_evsel *counter)
+{
+	if (counter->per_pkg_mask)
+		memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+}
+
+static int check_per_pkg(struct perf_evsel *counter,
+			 struct perf_counts_values *vals, int cpu, bool *skip)
+{
+	unsigned long *mask = counter->per_pkg_mask;
+	struct cpu_map *cpus = perf_evsel__cpus(counter);
+	int s;
+
+	*skip = false;
+
+	if (!counter->per_pkg)
+		return 0;
+
+	if (cpu_map__empty(cpus))
+		return 0;
+
+	if (!mask) {
+		mask = zalloc(MAX_NR_CPUS);
+		if (!mask)
+			return -ENOMEM;
+
+		counter->per_pkg_mask = mask;
+	}
+
+	/*
+	 * we do not consider an event that has not run as a good
+	 * instance to mark a package as used (skip=1). Otherwise
+	 * we may run into a situation where the first CPU in a package
+	 * is not running anything, yet the second is, and this function
+	 * would mark the package as used after the first CPU and would
+	 * not read the values from the second CPU.
+	 */
+	if (!(vals->run && vals->ena))
+		return 0;
+
+	s = cpu_map__get_socket(cpus, cpu, NULL);
+	if (s < 0)
+		return -1;
+
+	*skip = test_and_set_bit(s, mask) == 1;
+	return 0;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
+		       int cpu, int thread,
+		       struct perf_counts_values *count)
+{
+	struct perf_counts_values *aggr = &evsel->counts->aggr;
+	static struct perf_counts_values zero;
+	bool skip = false;
+
+	if (check_per_pkg(evsel, count, cpu, &skip)) {
+		pr_err("failed to read per-pkg counter\n");
+		return -1;
+	}
+
+	if (skip)
+		count = &zero;
+
+	switch (config->aggr_mode) {
+	case AGGR_THREAD:
+	case AGGR_CORE:
+	case AGGR_SOCKET:
+	case AGGR_NONE:
+		if (!evsel->snapshot)
+			perf_evsel__compute_deltas(evsel, cpu, thread, count);
+		perf_counts_values__scale(count, config->scale, NULL);
+		if (config->aggr_mode == AGGR_NONE)
+			perf_stat__update_shadow_stats(evsel, count->val, cpu,
+						       &rt_stat);
+		if (config->aggr_mode == AGGR_THREAD) {
+			if (config->stats)
+				perf_stat__update_shadow_stats(evsel,
+					count->val, 0, &config->stats[thread]);
+			else
+				perf_stat__update_shadow_stats(evsel,
+					count->val, 0, &rt_stat);
+		}
+		break;
+	case AGGR_GLOBAL:
+		aggr->val += count->val;
+		if (config->scale) {
+			aggr->ena += count->ena;
+			aggr->run += count->run;
+		}
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+				struct perf_evsel *counter)
+{
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = perf_evsel__nr_cpus(counter);
+	int cpu, thread;
+
+	if (counter->system_wide)
+		nthreads = 1;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			if (process_counter_values(config, counter, cpu, thread,
+						   perf_counts(counter->counts, cpu, thread)))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+			      struct perf_evsel *counter)
+{
+	struct perf_counts_values *aggr = &counter->counts->aggr;
+	struct perf_stat_evsel *ps = counter->stats;
+	u64 *count = counter->counts->aggr.values;
+	int i, ret;
+
+	aggr->val = aggr->ena = aggr->run = 0;
+
+	/*
+	 * We calculate counter's data every interval,
+	 * and the display code shows ps->res_stats
+	 * avg value. We need to zero the stats for
+	 * interval mode, otherwise overall avg running
+	 * averages will be shown for each interval.
+	 */
+	if (config->interval)
+		init_stats(ps->res_stats);
+
+	if (counter->per_pkg)
+		zero_per_pkg(counter);
+
+	ret = process_counter_maps(config, counter);
+	if (ret)
+		return ret;
+
+	if (config->aggr_mode != AGGR_GLOBAL)
+		return 0;
+
+	if (!counter->snapshot)
+		perf_evsel__compute_deltas(counter, -1, -1, aggr);
+	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+
+	for (i = 0; i < 3; i++)
+		update_stats(&ps->res_stats[i], count[i]);
+
+	if (verbose > 0) {
+		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+			perf_evsel__name(counter), count[0], count[1], count[2]);
+	}
+
+	/*
+	 * Save the full runtime - to allow normalization during printout:
+	 */
+	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
+
+	return 0;
+}
+
+int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event,
+				   struct perf_session *session)
+{
+	struct perf_counts_values count;
+	struct stat_event *st = &event->stat;
+	struct perf_evsel *counter;
+
+	count.val = st->val;
+	count.ena = st->ena;
+	count.run = st->run;
+
+	counter = perf_evlist__id2evsel(session->evlist, st->id);
+	if (!counter) {
+		pr_err("Failed to resolve counter for stat event.\n");
+		return -EINVAL;
+	}
+
+	*perf_counts(counter->counts, st->cpu, st->thread) = count;
+	counter->supported = true;
+	return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+	struct stat_event *st = (struct stat_event *) event;
+	size_t ret;
+
+	ret  = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
+		       st->id, st->cpu, st->thread);
+	ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
+		       st->val, st->ena, st->run);
+
+	return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+	struct stat_round_event *rd = (struct stat_round_event *)event;
+	size_t ret;
+
+	ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
+		      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+	return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+	struct perf_stat_config sc;
+	size_t ret;
+
+	perf_event__read_stat_config(&sc, &event->stat_config);
+
+	ret  = fprintf(fp, "\n");
+	ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+	ret += fprintf(fp, "... scale     %d\n", sc.scale);
+	ret += fprintf(fp, "... interval  %u\n", sc.interval);
+
+	return ret;
+}
diff --git a/util/stat.h b/util/stat.h
new file mode 100644
index 0000000..8f56ba4
--- /dev/null
+++ b/util/stat.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STATS_H
+#define __PERF_STATS_H
+
+#include <linux/types.h>
+#include <stdio.h>
+#include "xyarray.h"
+#include "rblist.h"
+
+struct stats
+{
+	double n, mean, M2;
+	u64 max, min;
+};
+
+enum perf_stat_evsel_id {
+	PERF_STAT_EVSEL_ID__NONE = 0,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+	PERF_STAT_EVSEL_ID__TRANSACTION_START,
+	PERF_STAT_EVSEL_ID__ELISION_START,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+	PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
+	PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat_evsel {
+	struct stats		 res_stats[3];
+	enum perf_stat_evsel_id	 id;
+	u64			*group_data;
+};
+
+enum aggr_mode {
+	AGGR_NONE,
+	AGGR_GLOBAL,
+	AGGR_SOCKET,
+	AGGR_CORE,
+	AGGR_THREAD,
+	AGGR_UNSET,
+};
+
+enum {
+	CTX_BIT_USER	= 1 << 0,
+	CTX_BIT_KERNEL	= 1 << 1,
+	CTX_BIT_HV	= 1 << 2,
+	CTX_BIT_HOST	= 1 << 3,
+	CTX_BIT_IDLE	= 1 << 4,
+	CTX_BIT_MAX	= 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+enum stat_type {
+	STAT_NONE = 0,
+	STAT_NSECS,
+	STAT_CYCLES,
+	STAT_STALLED_CYCLES_FRONT,
+	STAT_STALLED_CYCLES_BACK,
+	STAT_BRANCHES,
+	STAT_CACHEREFS,
+	STAT_L1_DCACHE,
+	STAT_L1_ICACHE,
+	STAT_LL_CACHE,
+	STAT_ITLB_CACHE,
+	STAT_DTLB_CACHE,
+	STAT_CYCLES_IN_TX,
+	STAT_TRANSACTION,
+	STAT_ELISION,
+	STAT_TOPDOWN_TOTAL_SLOTS,
+	STAT_TOPDOWN_SLOTS_ISSUED,
+	STAT_TOPDOWN_SLOTS_RETIRED,
+	STAT_TOPDOWN_FETCH_BUBBLES,
+	STAT_TOPDOWN_RECOVERY_BUBBLES,
+	STAT_SMI_NUM,
+	STAT_APERF,
+	STAT_MAX
+};
+
+struct runtime_stat {
+	struct rblist value_list;
+};
+
+struct perf_stat_config {
+	enum aggr_mode	aggr_mode;
+	bool		scale;
+	FILE		*output;
+	unsigned int	interval;
+	unsigned int	timeout;
+	int		times;
+	struct runtime_stat *stats;
+	int		stats_num;
+};
+
+void update_stats(struct stats *stats, u64 val);
+double avg_stats(struct stats *stats);
+double stddev_stats(struct stats *stats);
+double rel_stddev_stats(double stddev, double avg);
+
+static inline void init_stats(struct stats *stats)
+{
+	stats->n    = 0.0;
+	stats->mean = 0.0;
+	stats->M2   = 0.0;
+	stats->min  = (u64) -1;
+	stats->max  = 0;
+}
+
+struct perf_evsel;
+struct perf_evlist;
+
+struct perf_aggr_thread_value {
+	struct perf_evsel *counter;
+	int id;
+	double uval;
+	u64 val;
+	u64 run;
+	u64 ena;
+};
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+			   enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+	__perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+extern struct runtime_stat rt_stat;
+extern struct stats walltime_nsecs_stats;
+
+typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+			       const char *fmt, double val);
+typedef void (*new_line_t )(void *ctx);
+
+void runtime_stat__init(struct runtime_stat *st);
+void runtime_stat__exit(struct runtime_stat *st);
+void perf_stat__init_shadow_stats(void);
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
+				    int cpu, struct runtime_stat *st);
+struct perf_stat_output_ctx {
+	void *ctx;
+	print_metric_t print_metric;
+	new_line_t new_line;
+	bool force_header;
+};
+
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+				   double avg, int cpu,
+				   struct perf_stat_output_ctx *out,
+				   struct rblist *metric_events,
+				   struct runtime_stat *st);
+void perf_stat__collect_metric_expr(struct perf_evlist *);
+
+int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
+void perf_evlist__free_stats(struct perf_evlist *evlist);
+void perf_evlist__reset_stats(struct perf_evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+			      struct perf_evsel *counter);
+struct perf_tool;
+union perf_event;
+struct perf_session;
+int perf_event__process_stat_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_session *session);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
+#endif
diff --git a/util/strbuf.c b/util/strbuf.c
new file mode 100644
index 0000000..3d1cf5b
--- /dev/null
+++ b/util/strbuf.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "util.h"
+#include <linux/kernel.h>
+#include <errno.h>
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+int strbuf_init(struct strbuf *sb, ssize_t hint)
+{
+	sb->alloc = sb->len = 0;
+	sb->buf = strbuf_slopbuf;
+	if (hint)
+		return strbuf_grow(sb, hint);
+	return 0;
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+	if (sb->alloc) {
+		zfree(&sb->buf);
+		strbuf_init(sb, 0);
+	}
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+	char *res = sb->alloc ? sb->buf : NULL;
+	if (sz)
+		*sz = sb->len;
+	strbuf_init(sb, 0);
+	return res;
+}
+
+int strbuf_grow(struct strbuf *sb, size_t extra)
+{
+	char *buf;
+	size_t nr = sb->len + extra + 1;
+
+	if (nr < sb->alloc)
+		return 0;
+
+	if (nr <= sb->len)
+		return -E2BIG;
+
+	if (alloc_nr(sb->alloc) > nr)
+		nr = alloc_nr(sb->alloc);
+
+	/*
+	 * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is
+	 * a static variable. Thus we have to avoid passing it to realloc.
+	 */
+	buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf));
+	if (!buf)
+		return -ENOMEM;
+
+	sb->buf = buf;
+	sb->alloc = nr;
+	return 0;
+}
+
+int strbuf_addch(struct strbuf *sb, int c)
+{
+	int ret = strbuf_grow(sb, 1);
+	if (ret)
+		return ret;
+
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+	return 0;
+}
+
+int strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+	int ret = strbuf_grow(sb, len);
+	if (ret)
+		return ret;
+
+	memcpy(sb->buf + sb->len, data, len);
+	return strbuf_setlen(sb, sb->len + len);
+}
+
+static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+{
+	int len, ret;
+	va_list ap_saved;
+
+	if (!strbuf_avail(sb)) {
+		ret = strbuf_grow(sb, 64);
+		if (ret)
+			return ret;
+	}
+
+	va_copy(ap_saved, ap);
+	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+	if (len < 0)
+		return len;
+	if (len > strbuf_avail(sb)) {
+		ret = strbuf_grow(sb, len);
+		if (ret)
+			return ret;
+		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
+		va_end(ap_saved);
+		if (len > strbuf_avail(sb)) {
+			pr_debug("this should not happen, your vsnprintf is broken");
+			return -EINVAL;
+		}
+	}
+	return strbuf_setlen(sb, sb->len + len);
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = strbuf_addv(sb, fmt, ap);
+	va_end(ap);
+	return ret;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint)
+{
+	size_t oldlen = sb->len;
+	size_t oldalloc = sb->alloc;
+	int ret;
+
+	ret = strbuf_grow(sb, hint ? hint : 8192);
+	if (ret)
+		return ret;
+
+	for (;;) {
+		ssize_t cnt;
+
+		cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+		if (cnt < 0) {
+			if (oldalloc == 0)
+				strbuf_release(sb);
+			else
+				strbuf_setlen(sb, oldlen);
+			return cnt;
+		}
+		if (!cnt)
+			break;
+		sb->len += cnt;
+		ret = strbuf_grow(sb, 8192);
+		if (ret)
+			return ret;
+	}
+
+	sb->buf[sb->len] = '\0';
+	return sb->len - oldlen;
+}
diff --git a/util/strbuf.h b/util/strbuf.h
new file mode 100644
index 0000000..ea94d86
--- /dev/null
+++ b/util/strbuf.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRBUF_H
+#define __PERF_STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ *    build complex strings/buffers whose final size isn't easily known.
+ *
+ *    It is NOT legal to copy the ->buf pointer away.
+ *    `strbuf_detach' is the operation that detachs a buffer from its shell
+ *    while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ *    allocated. The extra byte is used to store a '\0', allowing the ->buf
+ *    member to be a valid C-string. Every strbuf function ensure this
+ *    invariant is preserved.
+ *
+ *    Note that it is OK to "play" with the buffer directly if you work it
+ *    that way:
+ *
+ *    strbuf_grow(sb, SOME_SIZE);
+ *       ... Here, the memory array starting at sb->buf, and of length
+ *       ... strbuf_avail(sb) is all yours, and you are sure that
+ *       ... strbuf_avail(sb) is at least SOME_SIZE.
+ *    strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ *    Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ *    Doing so is safe, though if it has to be done in many places, adding the
+ *    missing API to the strbuf module is the way to go.
+ *
+ *    XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ *         even if it's true in the current implementation. Alloc is somehow a
+ *         "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+	size_t alloc;
+	size_t len;
+	char *buf;
+};
+
+#define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+int strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
+
+/*----- strbuf size related -----*/
+static inline ssize_t strbuf_avail(const struct strbuf *sb) {
+	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+int strbuf_grow(struct strbuf *buf, size_t);
+
+static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
+	if (!sb->alloc) {
+		int ret = strbuf_grow(sb, 0);
+		if (ret)
+			return ret;
+	}
+	assert(len < sb->alloc);
+	sb->len = len;
+	sb->buf[len] = '\0';
+	return 0;
+}
+
+/*----- add data in your buffer -----*/
+int strbuf_addch(struct strbuf *sb, int c);
+
+int strbuf_add(struct strbuf *buf, const void *, size_t);
+static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
+	return strbuf_add(sb, s, strlen(s));
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
+
+/* XXX: if read fails, any partial read is undone */
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+
+#endif /* __PERF_STRBUF_H */
diff --git a/util/strfilter.c b/util/strfilter.c
new file mode 100644
index 0000000..7f3253d
--- /dev/null
+++ b/util/strfilter.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "string2.h"
+#include "strfilter.h"
+
+#include <errno.h>
+#include "sane_ctype.h"
+
+/* Operators */
+static const char *OP_and	= "&";	/* Logical AND */
+static const char *OP_or	= "|";	/* Logical OR */
+static const char *OP_not	= "!";	/* Logical NOT */
+
+#define is_operator(c)	((c) == '|' || (c) == '&' || (c) == '!')
+#define is_separator(c)	(is_operator(c) || (c) == '(' || (c) == ')')
+
+static void strfilter_node__delete(struct strfilter_node *node)
+{
+	if (node) {
+		if (node->p && !is_operator(*node->p))
+			zfree((char **)&node->p);
+		strfilter_node__delete(node->l);
+		strfilter_node__delete(node->r);
+		free(node);
+	}
+}
+
+void strfilter__delete(struct strfilter *filter)
+{
+	if (filter) {
+		strfilter_node__delete(filter->root);
+		free(filter);
+	}
+}
+
+static const char *get_token(const char *s, const char **e)
+{
+	const char *p;
+
+	while (isspace(*s))	/* Skip spaces */
+		s++;
+
+	if (*s == '\0') {
+		p = s;
+		goto end;
+	}
+
+	p = s + 1;
+	if (!is_separator(*s)) {
+		/* End search */
+retry:
+		while (*p && !is_separator(*p) && !isspace(*p))
+			p++;
+		/* Escape and special case: '!' is also used in glob pattern */
+		if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
+			p++;
+			goto retry;
+		}
+	}
+end:
+	*e = p;
+	return s;
+}
+
+static struct strfilter_node *strfilter_node__alloc(const char *op,
+						    struct strfilter_node *l,
+						    struct strfilter_node *r)
+{
+	struct strfilter_node *node = zalloc(sizeof(*node));
+
+	if (node) {
+		node->p = op;
+		node->l = l;
+		node->r = r;
+	}
+
+	return node;
+}
+
+static struct strfilter_node *strfilter_node__new(const char *s,
+						  const char **ep)
+{
+	struct strfilter_node root, *cur, *last_op;
+	const char *e;
+
+	if (!s)
+		return NULL;
+
+	memset(&root, 0, sizeof(root));
+	last_op = cur = &root;
+
+	s = get_token(s, &e);
+	while (*s != '\0' && *s != ')') {
+		switch (*s) {
+		case '&':	/* Exchg last OP->r with AND */
+			if (!cur->r || !last_op->r)
+				goto error;
+			cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
+			if (!cur)
+				goto nomem;
+			last_op->r = cur;
+			last_op = cur;
+			break;
+		case '|':	/* Exchg the root with OR */
+			if (!cur->r || !root.r)
+				goto error;
+			cur = strfilter_node__alloc(OP_or, root.r, NULL);
+			if (!cur)
+				goto nomem;
+			root.r = cur;
+			last_op = cur;
+			break;
+		case '!':	/* Add NOT as a leaf node */
+			if (cur->r)
+				goto error;
+			cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
+			if (!cur->r)
+				goto nomem;
+			cur = cur->r;
+			break;
+		case '(':	/* Recursively parses inside the parenthesis */
+			if (cur->r)
+				goto error;
+			cur->r = strfilter_node__new(s + 1, &s);
+			if (!s)
+				goto nomem;
+			if (!cur->r || *s != ')')
+				goto error;
+			e = s + 1;
+			break;
+		default:
+			if (cur->r)
+				goto error;
+			cur->r = strfilter_node__alloc(NULL, NULL, NULL);
+			if (!cur->r)
+				goto nomem;
+			cur->r->p = strndup(s, e - s);
+			if (!cur->r->p)
+				goto nomem;
+		}
+		s = get_token(e, &e);
+	}
+	if (!cur->r)
+		goto error;
+	*ep = s;
+	return root.r;
+nomem:
+	s = NULL;
+error:
+	*ep = s;
+	strfilter_node__delete(root.r);
+	return NULL;
+}
+
+/*
+ * Parse filter rule and return new strfilter.
+ * Return NULL if fail, and *ep == NULL if memory allocation failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err)
+{
+	struct strfilter *filter = zalloc(sizeof(*filter));
+	const char *ep = NULL;
+
+	if (filter)
+		filter->root = strfilter_node__new(rules, &ep);
+
+	if (!filter || !filter->root || *ep != '\0') {
+		if (err)
+			*err = ep;
+		strfilter__delete(filter);
+		filter = NULL;
+	}
+
+	return filter;
+}
+
+static int strfilter__append(struct strfilter *filter, bool _or,
+			     const char *rules, const char **err)
+{
+	struct strfilter_node *right, *root;
+	const char *ep = NULL;
+
+	if (!filter || !rules)
+		return -EINVAL;
+
+	right = strfilter_node__new(rules, &ep);
+	if (!right || *ep != '\0') {
+		if (err)
+			*err = ep;
+		goto error;
+	}
+	root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+	if (!root) {
+		ep = NULL;
+		goto error;
+	}
+
+	filter->root = root;
+	return 0;
+
+error:
+	strfilter_node__delete(right);
+	return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+	return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+		   const char **err)
+{
+	return strfilter__append(filter, false, rules, err);
+}
+
+static bool strfilter_node__compare(struct strfilter_node *node,
+				    const char *str)
+{
+	if (!node || !node->p)
+		return false;
+
+	switch (*node->p) {
+	case '|':	/* OR */
+		return strfilter_node__compare(node->l, str) ||
+			strfilter_node__compare(node->r, str);
+	case '&':	/* AND */
+		return strfilter_node__compare(node->l, str) &&
+			strfilter_node__compare(node->r, str);
+	case '!':	/* NOT */
+		return !strfilter_node__compare(node->r, str);
+	default:
+		return strglobmatch(str, node->p);
+	}
+}
+
+/* Return true if STR matches the filter rules */
+bool strfilter__compare(struct strfilter *filter, const char *str)
+{
+	if (!filter)
+		return false;
+	return strfilter_node__compare(filter->root, str);
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+	int len;
+	int pt = node->r ? 2 : 0;	/* don't need to check node->l */
+
+	if (buf && pt)
+		*buf++ = '(';
+	len = strfilter_node__sprint(node, buf);
+	if (len < 0)
+		return len;
+	if (buf && pt)
+		*(buf + len) = ')';
+	return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+	int len = 0, rlen;
+
+	if (!node || !node->p)
+		return -EINVAL;
+
+	switch (*node->p) {
+	case '|':
+	case '&':
+		len = strfilter_node__sprint_pt(node->l, buf);
+		if (len < 0)
+			return len;
+		__fallthrough;
+	case '!':
+		if (buf) {
+			*(buf + len++) = *node->p;
+			buf += len;
+		} else
+			len++;
+		rlen = strfilter_node__sprint_pt(node->r, buf);
+		if (rlen < 0)
+			return rlen;
+		len += rlen;
+		break;
+	default:
+		len = strlen(node->p);
+		if (buf)
+			strcpy(buf, node->p);
+	}
+
+	return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+	int len;
+	char *ret = NULL;
+
+	len = strfilter_node__sprint(filter->root, NULL);
+	if (len < 0)
+		return NULL;
+
+	ret = malloc(len + 1);
+	if (ret)
+		strfilter_node__sprint(filter->root, ret);
+
+	return ret;
+}
diff --git a/util/strfilter.h b/util/strfilter.h
new file mode 100644
index 0000000..e0c25a4
--- /dev/null
+++ b/util/strfilter.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRFILTER_H
+#define __PERF_STRFILTER_H
+/* General purpose glob matching filter */
+
+#include <linux/list.h>
+#include <stdbool.h>
+
+/* A node of string filter */
+struct strfilter_node {
+	struct strfilter_node *l;	/* Tree left branche (for &,|) */
+	struct strfilter_node *r;	/* Tree right branche (for !,&,|) */
+	const char *p;		/* Operator or rule */
+};
+
+/* String filter */
+struct strfilter {
+	struct strfilter_node *root;
+};
+
+/**
+ * strfilter__new - Create a new string filter
+ * @rules: Filter rule, which is a combination of glob expressions.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and return new strfilter. Return NULL if an error detected.
+ * In that case, *@err will indicate where it is detected, and *@err is NULL
+ * if a memory allocation is failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err);
+
+/**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+		  const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+		   const char *rules, const char **err);
+
+/**
+ * strfilter__compare - compare given string and a string filter
+ * @filter: String filter
+ * @str: target string
+ *
+ * Compare @str and @filter. Return true if the str match the rule
+ */
+bool strfilter__compare(struct strfilter *filter, const char *str);
+
+/**
+ * strfilter__delete - delete a string filter
+ * @filter: String filter to delete
+ *
+ * Delete @filter.
+ */
+void strfilter__delete(struct strfilter *filter);
+
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
+#endif
diff --git a/util/string.c b/util/string.c
new file mode 100644
index 0000000..d8bfd0c
--- /dev/null
+++ b/util/string.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "string2.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include "sane_ctype.h"
+
+#define K 1024LL
+/*
+ * perf_atoll()
+ * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
+ * and return its numeric value
+ */
+s64 perf_atoll(const char *str)
+{
+	s64 length;
+	char *p;
+	char c;
+
+	if (!isdigit(str[0]))
+		goto out_err;
+
+	length = strtoll(str, &p, 10);
+	switch (c = *p++) {
+		case 'b': case 'B':
+			if (*p)
+				goto out_err;
+
+			__fallthrough;
+		case '\0':
+			return length;
+		default:
+			goto out_err;
+		/* two-letter suffices */
+		case 'k': case 'K':
+			length <<= 10;
+			break;
+		case 'm': case 'M':
+			length <<= 20;
+			break;
+		case 'g': case 'G':
+			length <<= 30;
+			break;
+		case 't': case 'T':
+			length <<= 40;
+			break;
+	}
+	/* we want the cases to match */
+	if (islower(c)) {
+		if (strcmp(p, "b") != 0)
+			goto out_err;
+	} else {
+		if (strcmp(p, "B") != 0)
+			goto out_err;
+	}
+	return length;
+
+out_err:
+	return -1;
+}
+
+/*
+ * Helper function for splitting a string into an argv-like array.
+ * originally copied from lib/argv_split.c
+ */
+static const char *skip_sep(const char *cp)
+{
+	while (*cp && isspace(*cp))
+		cp++;
+
+	return cp;
+}
+
+static const char *skip_arg(const char *cp)
+{
+	while (*cp && !isspace(*cp))
+		cp++;
+
+	return cp;
+}
+
+static int count_argc(const char *str)
+{
+	int count = 0;
+
+	while (*str) {
+		str = skip_sep(str);
+		if (*str) {
+			count++;
+			str = skip_arg(str);
+		}
+	}
+
+	return count;
+}
+
+/**
+ * argv_free - free an argv
+ * @argv - the argument vector to be freed
+ *
+ * Frees an argv and the strings it points to.
+ */
+void argv_free(char **argv)
+{
+	char **p;
+	for (p = argv; *p; p++) {
+		free(*p);
+		*p = NULL;
+	}
+
+	free(argv);
+}
+
+/**
+ * argv_split - split a string at whitespace, returning an argv
+ * @str: the string to be split
+ * @argcp: returned argument count
+ *
+ * Returns an array of pointers to strings which are split out from
+ * @str.  This is performed by strictly splitting on white-space; no
+ * quote processing is performed.  Multiple whitespace characters are
+ * considered to be a single argument separator.  The returned array
+ * is always NULL-terminated.  Returns NULL on memory allocation
+ * failure.
+ */
+char **argv_split(const char *str, int *argcp)
+{
+	int argc = count_argc(str);
+	char **argv = calloc(argc + 1, sizeof(*argv));
+	char **argvp;
+
+	if (argv == NULL)
+		goto out;
+
+	if (argcp)
+		*argcp = argc;
+
+	argvp = argv;
+
+	while (*str) {
+		str = skip_sep(str);
+
+		if (*str) {
+			const char *p = str;
+			char *t;
+
+			str = skip_arg(str);
+
+			t = strndup(p, str-p);
+			if (t == NULL)
+				goto fail;
+			*argvp++ = t;
+		}
+	}
+	*argvp = NULL;
+
+out:
+	return argv;
+
+fail:
+	argv_free(argv);
+	return NULL;
+}
+
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+	bool complement = false, ret = true;
+
+	if (*pat == '!') {
+		complement = true;
+		pat++;
+	}
+	if (*pat++ == c)	/* First character is special */
+		goto end;
+
+	while (*pat && *pat != ']') {	/* Matching */
+		if (*pat == '-' && *(pat + 1) != ']') {	/* Range */
+			if (*(pat - 1) <= c && c <= *(pat + 1))
+				goto end;
+			if (*(pat - 1) > *(pat + 1))
+				goto error;
+			pat += 2;
+		} else if (*pat++ == c)
+			goto end;
+	}
+	if (!*pat)
+		goto error;
+	ret = false;
+
+end:
+	while (*pat && *pat != ']')	/* Searching closing */
+		pat++;
+	if (!*pat)
+		goto error;
+	*npat = pat + 1;
+	return complement ? !ret : ret;
+
+error:
+	return false;
+}
+
+/* Glob/lazy pattern matching */
+static bool __match_glob(const char *str, const char *pat, bool ignore_space,
+			bool case_ins)
+{
+	while (*str && *pat && *pat != '*') {
+		if (ignore_space) {
+			/* Ignore spaces for lazy matching */
+			if (isspace(*str)) {
+				str++;
+				continue;
+			}
+			if (isspace(*pat)) {
+				pat++;
+				continue;
+			}
+		}
+		if (*pat == '?') {	/* Matches any single character */
+			str++;
+			pat++;
+			continue;
+		} else if (*pat == '[')	/* Character classes/Ranges */
+			if (__match_charclass(pat + 1, *str, &pat)) {
+				str++;
+				continue;
+			} else
+				return false;
+		else if (*pat == '\\') /* Escaped char match as normal char */
+			pat++;
+		if (case_ins) {
+			if (tolower(*str) != tolower(*pat))
+				return false;
+		} else if (*str != *pat)
+			return false;
+		str++;
+		pat++;
+	}
+	/* Check wild card */
+	if (*pat == '*') {
+		while (*pat == '*')
+			pat++;
+		if (!*pat)	/* Tail wild card matches all */
+			return true;
+		while (*str)
+			if (__match_glob(str++, pat, ignore_space, case_ins))
+				return true;
+	}
+	return !*str && !*pat;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
+bool strglobmatch(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, false, false);
+}
+
+bool strglobmatch_nocase(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, false, true);
+}
+
+/**
+ * strlazymatch - matching pattern strings lazily with glob pattern
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This is similar to strglobmatch, except this ignores spaces in
+ * the target string.
+ */
+bool strlazymatch(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, true, false);
+}
+
+/**
+ * strtailcmp - Compare the tail of two strings
+ * @s1: 1st string to be compared
+ * @s2: 2nd string to be compared
+ *
+ * Return 0 if whole of either string is same as another's tail part.
+ */
+int strtailcmp(const char *s1, const char *s2)
+{
+	int i1 = strlen(s1);
+	int i2 = strlen(s2);
+	while (--i1 >= 0 && --i2 >= 0) {
+		if (s1[i1] != s2[i2])
+			return s1[i1] - s2[i2];
+	}
+	return 0;
+}
+
+/**
+ * strxfrchar - Locate and replace character in @s
+ * @s:    The string to be searched/changed.
+ * @from: Source character to be replaced.
+ * @to:   Destination character.
+ *
+ * Return pointer to the changed string.
+ */
+char *strxfrchar(char *s, char from, char to)
+{
+	char *p = s;
+
+	while ((p = strchr(p, from)) != NULL)
+		*p++ = to;
+
+	return s;
+}
+
+/**
+ * ltrim - Removes leading whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Return pointer to the first non-whitespace character in @s.
+ */
+char *ltrim(char *s)
+{
+	while (isspace(*s))
+		s++;
+
+	return s;
+}
+
+/**
+ * rtrim - Removes trailing whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Note that the first trailing whitespace is replaced with a %NUL-terminator
+ * in the given string @s. Returns @s.
+ */
+char *rtrim(char *s)
+{
+	size_t size = strlen(s);
+	char *end;
+
+	if (!size)
+		return s;
+
+	end = s + size - 1;
+	while (end >= s && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	return s;
+}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
+{
+	/*
+	 * FIXME: replace this with an expression using log10() when we
+	 * find a suitable implementation, maybe the one in the dvb drivers...
+	 *
+	 * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
+	 */
+	size_t size = nints * 28 + 1; /* \0 */
+	size_t i, printed = 0;
+	char *expr = malloc(size);
+
+	if (expr) {
+		const char *or_and = "||", *eq_neq = "==";
+		char *e = expr;
+
+		if (!in) {
+			or_and = "&&";
+			eq_neq = "!=";
+		}
+
+		for (i = 0; i < nints; ++i) {
+			if (printed == size)
+				goto out_err_overflow;
+
+			if (i > 0)
+				printed += scnprintf(e + printed, size - printed, " %s ", or_and);
+			printed += scnprintf(e + printed, size - printed,
+					     "%s %s %d", var, eq_neq, ints[i]);
+		}
+	}
+
+	return expr;
+
+out_err_overflow:
+	free(expr);
+	return NULL;
+}
+
+/* Like strpbrk(), but not break if it is right after a backslash (escaped) */
+char *strpbrk_esc(char *str, const char *stopset)
+{
+	char *ptr;
+
+	do {
+		ptr = strpbrk(str, stopset);
+		if (ptr == str ||
+		    (ptr == str + 1 && *(ptr - 1) != '\\'))
+			break;
+		str = ptr + 1;
+	} while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
+
+	return ptr;
+}
+
+/* Like strdup, but do not copy a single backslash */
+char *strdup_esc(const char *str)
+{
+	char *s, *d, *p, *ret = strdup(str);
+
+	if (!ret)
+		return NULL;
+
+	d = strchr(ret, '\\');
+	if (!d)
+		return ret;
+
+	s = d + 1;
+	do {
+		if (*s == '\0') {
+			*d = '\0';
+			break;
+		}
+		p = strchr(s + 1, '\\');
+		if (p) {
+			memmove(d, s, p - s);
+			d += p - s;
+			s = p + 1;
+		} else
+			memmove(d, s, strlen(s) + 1);
+	} while (p);
+
+	return ret;
+}
diff --git a/util/string2.h b/util/string2.h
new file mode 100644
index 0000000..4c68a09
--- /dev/null
+++ b/util/string2.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_STRING_H
+#define PERF_STRING_H
+
+#include <linux/types.h>
+#include <stddef.h>
+#include <string.h>
+
+s64 perf_atoll(const char *str);
+char **argv_split(const char *str, int *argcp);
+void argv_free(char **argv);
+bool strglobmatch(const char *str, const char *pat);
+bool strglobmatch_nocase(const char *str, const char *pat);
+bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+	return strpbrk(str, "*?[") != NULL;
+}
+int strtailcmp(const char *s1, const char *s2);
+char *strxfrchar(char *s, char from, char to);
+
+char *ltrim(char *s);
+char *rtrim(char *s);
+
+static inline char *trim(char *s)
+{
+	return ltrim(rtrim(s));
+}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
+
+static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints)
+{
+	return asprintf_expr_inout_ints(var, true, nints, ints);
+}
+
+static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints)
+{
+	return asprintf_expr_inout_ints(var, false, nints, ints);
+}
+
+char *strpbrk_esc(char *str, const char *stopset);
+char *strdup_esc(const char *str);
+
+#endif /* PERF_STRING_H */
diff --git a/util/strlist.c b/util/strlist.c
new file mode 100644
index 0000000..9de5434
--- /dev/null
+++ b/util/strlist.c
@@ -0,0 +1,209 @@
+/*
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include "strlist.h"
+#include "util.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static
+struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry)
+{
+	const char *s = entry;
+	struct rb_node *rc = NULL;
+	struct strlist *strlist = container_of(rblist, struct strlist, rblist);
+	struct str_node *snode = malloc(sizeof(*snode));
+
+	if (snode != NULL) {
+		if (strlist->dupstr) {
+			s = strdup(s);
+			if (s == NULL)
+				goto out_delete;
+		}
+		snode->s = s;
+		rc = &snode->rb_node;
+	}
+
+	return rc;
+
+out_delete:
+	free(snode);
+	return NULL;
+}
+
+static void str_node__delete(struct str_node *snode, bool dupstr)
+{
+	if (dupstr)
+		zfree((char **)&snode->s);
+	free(snode);
+}
+
+static
+void strlist__node_delete(struct rblist *rblist, struct rb_node *rb_node)
+{
+	struct strlist *slist = container_of(rblist, struct strlist, rblist);
+	struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+	str_node__delete(snode, slist->dupstr);
+}
+
+static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+	const char *str = entry;
+	struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+	return strcmp(snode->s, str);
+}
+
+int strlist__add(struct strlist *slist, const char *new_entry)
+{
+	return rblist__add_node(&slist->rblist, new_entry);
+}
+
+int strlist__load(struct strlist *slist, const char *filename)
+{
+	char entry[1024];
+	int err;
+	FILE *fp = fopen(filename, "r");
+
+	if (fp == NULL)
+		return -errno;
+
+	while (fgets(entry, sizeof(entry), fp) != NULL) {
+		const size_t len = strlen(entry);
+
+		if (len == 0)
+			continue;
+		entry[len - 1] = '\0';
+
+		err = strlist__add(slist, entry);
+		if (err != 0)
+			goto out;
+	}
+
+	err = 0;
+out:
+	fclose(fp);
+	return err;
+}
+
+void strlist__remove(struct strlist *slist, struct str_node *snode)
+{
+	rblist__remove_node(&slist->rblist, &snode->rb_node);
+}
+
+struct str_node *strlist__find(struct strlist *slist, const char *entry)
+{
+	struct str_node *snode = NULL;
+	struct rb_node *rb_node = rblist__find(&slist->rblist, entry);
+
+	if (rb_node)
+		snode = container_of(rb_node, struct str_node, rb_node);
+
+	return snode;
+}
+
+static int strlist__parse_list_entry(struct strlist *slist, const char *s,
+				     const char *subst_dir)
+{
+	int err;
+	char *subst = NULL;
+
+	if (strncmp(s, "file://", 7) == 0)
+		return strlist__load(slist, s + 7);
+
+	if (subst_dir) {
+		err = -ENOMEM;
+		if (asprintf(&subst, "%s/%s", subst_dir, s) < 0)
+			goto out;
+
+		if (access(subst, F_OK) == 0) {
+			err = strlist__load(slist, subst);
+			goto out;
+		}
+
+		if (slist->file_only) {
+			err = -ENOENT;
+			goto out;
+		}
+	}
+
+	err = strlist__add(slist, s);
+out:
+	free(subst);
+	return err;
+}
+
+static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir)
+{
+	char *sep;
+	int err;
+
+	while ((sep = strchr(s, ',')) != NULL) {
+		*sep = '\0';
+		err = strlist__parse_list_entry(slist, s, subst_dir);
+		*sep = ',';
+		if (err != 0)
+			return err;
+		s = sep + 1;
+	}
+
+	return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0;
+}
+
+struct strlist *strlist__new(const char *list, const struct strlist_config *config)
+{
+	struct strlist *slist = malloc(sizeof(*slist));
+
+	if (slist != NULL) {
+		bool dupstr = true;
+		bool file_only = false;
+		const char *dirname = NULL;
+
+		if (config) {
+			dupstr = !config->dont_dupstr;
+			dirname = config->dirname;
+			file_only = config->file_only;
+		}
+
+		rblist__init(&slist->rblist);
+		slist->rblist.node_cmp    = strlist__node_cmp;
+		slist->rblist.node_new    = strlist__node_new;
+		slist->rblist.node_delete = strlist__node_delete;
+
+		slist->dupstr	 = dupstr;
+		slist->file_only = file_only;
+
+		if (list && strlist__parse_list(slist, list, dirname) != 0)
+			goto out_error;
+	}
+
+	return slist;
+out_error:
+	free(slist);
+	return NULL;
+}
+
+void strlist__delete(struct strlist *slist)
+{
+	if (slist != NULL)
+		rblist__delete(&slist->rblist);
+}
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
+{
+	struct str_node *snode = NULL;
+	struct rb_node *rb_node;
+
+	rb_node = rblist__entry(&slist->rblist, idx);
+	if (rb_node)
+		snode = container_of(rb_node, struct str_node, rb_node);
+
+	return snode;
+}
diff --git a/util/strlist.h b/util/strlist.h
new file mode 100644
index 0000000..d58f1e0
--- /dev/null
+++ b/util/strlist.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRLIST_H
+#define __PERF_STRLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct str_node {
+	struct rb_node rb_node;
+	const char     *s;
+};
+
+struct strlist {
+	struct rblist rblist;
+	bool	      dupstr;
+	bool	      file_only;
+};
+
+/*
+ * @file_only: When dirname is present, only consider entries as filenames,
+ *             that should not be added to the list if dirname/entry is not
+ *             found
+ */
+struct strlist_config {
+	bool dont_dupstr;
+	bool file_only;
+	const char *dirname;
+};
+
+struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
+void strlist__delete(struct strlist *slist);
+
+void strlist__remove(struct strlist *slist, struct str_node *sn);
+int strlist__load(struct strlist *slist, const char *filename);
+int strlist__add(struct strlist *slist, const char *str);
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
+struct str_node *strlist__find(struct strlist *slist, const char *entry);
+
+static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
+{
+	return strlist__find(slist, entry) != NULL;
+}
+
+static inline bool strlist__empty(const struct strlist *slist)
+{
+	return rblist__empty(&slist->rblist);
+}
+
+static inline unsigned int strlist__nr_entries(const struct strlist *slist)
+{
+	return rblist__nr_entries(&slist->rblist);
+}
+
+/* For strlist iteration */
+static inline struct str_node *strlist__first(struct strlist *slist)
+{
+	struct rb_node *rn = rb_first(&slist->rblist.entries);
+	return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+static inline struct str_node *strlist__next(struct str_node *sn)
+{
+	struct rb_node *rn;
+	if (!sn)
+		return NULL;
+	rn = rb_next(&sn->rb_node);
+	return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+
+/**
+ * strlist_for_each      - iterate over a strlist
+ * @pos:	the &struct str_node to use as a loop cursor.
+ * @slist:	the &struct strlist for loop.
+ */
+#define strlist__for_each_entry(pos, slist)	\
+	for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
+
+/**
+ * strlist_for_each_safe - iterate over a strlist safe against removal of
+ *                         str_node
+ * @pos:	the &struct str_node to use as a loop cursor.
+ * @n:		another &struct str_node to use as temporary storage.
+ * @slist:	the &struct strlist for loop.
+ */
+#define strlist__for_each_entry_safe(pos, n, slist)	\
+	for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
+	     pos = n, n = strlist__next(n))
+#endif /* __PERF_STRLIST_H */
diff --git a/util/svghelper.c b/util/svghelper.c
new file mode 100644
index 0000000..1cbada2
--- /dev/null
+++ b/util/svghelper.c
@@ -0,0 +1,809 @@
+/*
+ * svghelper.c - helper functions for outputting svg
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * Authors:
+ *     Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/bitmap.h>
+#include <linux/time64.h>
+
+#include "perf.h"
+#include "svghelper.h"
+#include "util.h"
+#include "cpumap.h"
+
+static u64 first_time, last_time;
+static u64 turbo_frequency, max_freq;
+
+
+#define SLOT_MULT 30.0
+#define SLOT_HEIGHT 25.0
+#define SLOT_HALF (SLOT_HEIGHT / 2)
+
+int svg_page_width = 1000;
+u64 svg_highlight;
+const char *svg_highlight_name;
+
+#define MIN_TEXT_SIZE 0.01
+
+static u64 total_height;
+static FILE *svgfile;
+
+static double cpu2slot(int cpu)
+{
+	return 2 * cpu + 1;
+}
+
+static int *topology_map;
+
+static double cpu2y(int cpu)
+{
+	if (topology_map)
+		return cpu2slot(topology_map[cpu]) * SLOT_MULT;
+	else
+		return cpu2slot(cpu) * SLOT_MULT;
+}
+
+static double time2pixels(u64 __time)
+{
+	double X;
+
+	X = 1.0 * svg_page_width * (__time - first_time) / (last_time - first_time);
+	return X;
+}
+
+/*
+ * Round text sizes so that the svg viewer only needs a discrete
+ * number of renderings of the font
+ */
+static double round_text_size(double size)
+{
+	int loop = 100;
+	double target = 10.0;
+
+	if (size >= 10.0)
+		return size;
+	while (loop--) {
+		if (size >= target)
+			return target;
+		target = target / 2.0;
+	}
+	return size;
+}
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end)
+{
+	int new_width;
+
+	svgfile = fopen(filename, "w");
+	if (!svgfile) {
+		fprintf(stderr, "Cannot open %s for output\n", filename);
+		return;
+	}
+	first_time = start;
+	first_time = first_time / 100000000 * 100000000;
+	last_time = end;
+
+	/*
+	 * if the recording is short, we default to a width of 1000, but
+	 * for longer recordings we want at least 200 units of width per second
+	 */
+	new_width = (last_time - first_time) / 5000000;
+
+	if (new_width > svg_page_width)
+		svg_page_width = new_width;
+
+	total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT;
+	fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n");
+	fprintf(svgfile, "<!DOCTYPE svg SYSTEM \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n");
+	fprintf(svgfile, "<svg width=\"%i\" height=\"%" PRIu64 "\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height);
+
+	fprintf(svgfile, "<defs>\n  <style type=\"text/css\">\n    <![CDATA[\n");
+
+	fprintf(svgfile, "      rect          { stroke-width: 1; }\n");
+	fprintf(svgfile, "      rect.process  { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:1;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.process2 { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.process3 { fill:rgb(180,180,180); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.sample   { fill:rgb(  0,  0,255); fill-opacity:0.8; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.sample_hi{ fill:rgb(255,128,  0); fill-opacity:0.8; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.error    { fill:rgb(255,  0,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.net      { fill:rgb(  0,128,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.disk     { fill:rgb(  0,  0,255); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.sync     { fill:rgb(128,128,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.poll     { fill:rgb(  0,128,128); fill-opacity:0.2; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.blocked  { fill:rgb(255,  0,  0); fill-opacity:0.5; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.waiting  { fill:rgb(224,214,  0); fill-opacity:0.8; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.WAITING  { fill:rgb(255,214, 48); fill-opacity:0.6; stroke-width:0;   stroke:rgb(  0,  0,  0); } \n");
+	fprintf(svgfile, "      rect.cpu      { fill:rgb(192,192,192); fill-opacity:0.2; stroke-width:0.5; stroke:rgb(128,128,128); } \n");
+	fprintf(svgfile, "      rect.pstate   { fill:rgb(128,128,128); fill-opacity:0.8; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c1       { fill:rgb(255,214,214); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c2       { fill:rgb(255,172,172); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c3       { fill:rgb(255,130,130); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c4       { fill:rgb(255, 88, 88); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c5       { fill:rgb(255, 44, 44); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      rect.c6       { fill:rgb(255,  0,  0); fill-opacity:0.5; stroke-width:0; } \n");
+	fprintf(svgfile, "      line.pstate   { stroke:rgb(255,255,  0); stroke-opacity:0.8; stroke-width:2; } \n");
+
+	fprintf(svgfile, "    ]]>\n   </style>\n</defs>\n");
+}
+
+static double normalize_height(double height)
+{
+	if (height < 0.25)
+		return 0.25;
+	else if (height < 0.50)
+		return 0.50;
+	else if (height < 0.75)
+		return 0.75;
+	else
+		return 0.100;
+}
+
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+	double w = time2pixels(end) - time2pixels(start);
+	height = normalize_height(height);
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start),
+		w,
+		Yslot * SLOT_MULT,
+		SLOT_HALF * height,
+		type);
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+	double w = time2pixels(end) - time2pixels(start);
+	height = normalize_height(height);
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start),
+		w,
+		Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HALF * height,
+		SLOT_HALF * height,
+		type);
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+	double w = time2pixels(end) - time2pixels(start);
+	height = normalize_height(height);
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start),
+		w,
+		Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HEIGHT * height,
+		SLOT_HEIGHT * height,
+		type);
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_box(int Yslot, u64 start, u64 end, const char *type)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type);
+}
+
+static char *time_to_string(u64 duration);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	fprintf(svgfile, "<title>#%d blocked %s</title>\n", cpu,
+		time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Blocked on:\n%s</desc>\n", backtrace);
+	svg_box(Yslot, start, end, "blocked");
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+	double text_size;
+	const char *type;
+
+	if (!svgfile)
+		return;
+
+	if (svg_highlight && end - start > svg_highlight)
+		type = "sample_hi";
+	else
+		type = "sample";
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<title>#%d running %s</title>\n",
+		cpu, time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT,
+		type);
+
+	text_size = (time2pixels(end)-time2pixels(start));
+	if (cpu > 9)
+		text_size = text_size/2;
+	if (text_size > 1.25)
+		text_size = 1.25;
+	text_size = round_text_size(text_size);
+
+	if (text_size > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">%i</text>\n",
+			time2pixels(start), Yslot *  SLOT_MULT + SLOT_HEIGHT - 1, text_size,  cpu + 1);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+static char *time_to_string(u64 duration)
+{
+	static char text[80];
+
+	text[0] = 0;
+
+	if (duration < NSEC_PER_USEC) /* less than 1 usec */
+		return text;
+
+	if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+		sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
+		return text;
+	}
+	sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
+
+	return text;
+}
+
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+	char *text;
+	const char *style;
+	double font_size;
+
+	if (!svgfile)
+		return;
+
+	style = "waiting";
+
+	if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
+		style = "WAITING";
+
+	text = time_to_string(end-start);
+
+	font_size = 1.0 * (time2pixels(end)-time2pixels(start));
+
+	if (font_size > 3)
+		font_size = 3;
+
+	font_size = round_text_size(font_size);
+
+	fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), Yslot * SLOT_MULT);
+	fprintf(svgfile, "<title>#%d waiting %s</title>\n", cpu, time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Waiting on:\n%s</desc>\n", backtrace);
+	fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(end)-time2pixels(start), SLOT_HEIGHT, style);
+	if (font_size > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\"> %s</text>\n",
+			font_size, text);
+	fprintf(svgfile, "</g>\n");
+}
+
+static char *cpu_model(void)
+{
+	static char cpu_m[255];
+	char buf[256];
+	FILE *file;
+
+	cpu_m[0] = 0;
+	/* CPU type */
+	file = fopen("/proc/cpuinfo", "r");
+	if (file) {
+		while (fgets(buf, 255, file)) {
+			if (strstr(buf, "model name")) {
+				strncpy(cpu_m, &buf[13], 255);
+				break;
+			}
+		}
+		fclose(file);
+	}
+
+	/* CPU type */
+	file = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies", "r");
+	if (file) {
+		while (fgets(buf, 255, file)) {
+			unsigned int freq;
+			freq = strtoull(buf, NULL, 10);
+			if (freq > max_freq)
+				max_freq = freq;
+		}
+		fclose(file);
+	}
+	return cpu_m;
+}
+
+void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq)
+{
+	char cpu_string[80];
+	if (!svgfile)
+		return;
+
+	max_freq = __max_freq;
+	turbo_frequency = __turbo_freq;
+
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"cpu\"/>\n",
+		time2pixels(first_time),
+		time2pixels(last_time)-time2pixels(first_time),
+		cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+	sprintf(cpu_string, "CPU %i", (int)cpu);
+	fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+		10+time2pixels(first_time), cpu2y(cpu) + SLOT_HEIGHT/2, cpu_string);
+
+	fprintf(svgfile, "<text transform=\"translate(%.8f,%.8f)\" font-size=\"1.25pt\">%s</text>\n",
+		10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model());
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace)
+{
+	double width;
+	const char *type;
+
+	if (!svgfile)
+		return;
+
+	if (svg_highlight && end - start >= svg_highlight)
+		type = "sample_hi";
+	else if (svg_highlight_name && strstr(name, svg_highlight_name))
+		type = "sample_hi";
+	else
+		type = "sample";
+
+	fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), cpu2y(cpu));
+	fprintf(svgfile, "<title>%d %s running %s</title>\n", pid, name, time_to_string(end - start));
+	if (backtrace)
+		fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+	fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+		time2pixels(end)-time2pixels(start), SLOT_MULT+SLOT_HEIGHT, type);
+	width = time2pixels(end)-time2pixels(start);
+	if (width > 6)
+		width = 6;
+
+	width = round_text_size(width);
+
+	if (width > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\">%s</text>\n",
+			width, name);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_cstate(int cpu, u64 start, u64 end, int type)
+{
+	double width;
+	char style[128];
+
+	if (!svgfile)
+		return;
+
+
+	fprintf(svgfile, "<g>\n");
+
+	if (type > 6)
+		type = 6;
+	sprintf(style, "c%i", type);
+
+	fprintf(svgfile, "<rect class=\"%s\" x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\"/>\n",
+		style,
+		time2pixels(start), time2pixels(end)-time2pixels(start),
+		cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+	width = (time2pixels(end)-time2pixels(start))/2.0;
+	if (width > 6)
+		width = 6;
+
+	width = round_text_size(width);
+
+	if (width > MIN_TEXT_SIZE)
+		fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">C%i</text>\n",
+			time2pixels(start), cpu2y(cpu)+width, width, type);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+static char *HzToHuman(unsigned long hz)
+{
+	static char buffer[1024];
+	unsigned long long Hz;
+
+	memset(buffer, 0, 1024);
+
+	Hz = hz;
+
+	/* default: just put the Number in */
+	sprintf(buffer, "%9lli", Hz);
+
+	if (Hz > 1000)
+		sprintf(buffer, " %6lli Mhz", (Hz+500)/1000);
+
+	if (Hz > 1500000)
+		sprintf(buffer, " %6.2f Ghz", (Hz+5000.0)/1000000);
+
+	if (Hz == turbo_frequency)
+		sprintf(buffer, "Turbo");
+
+	return buffer;
+}
+
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq)
+{
+	double height = 0;
+
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+
+	if (max_freq)
+		height = freq * 1.0 / max_freq * (SLOT_HEIGHT + SLOT_MULT);
+	height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - height;
+	fprintf(svgfile, "<line x1=\"%.8f\" x2=\"%.8f\" y1=\"%.1f\" y2=\"%.1f\" class=\"pstate\"/>\n",
+		time2pixels(start), time2pixels(end), height, height);
+	fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"0.25pt\">%s</text>\n",
+		time2pixels(start), height+0.9, HzToHuman(freq));
+
+	fprintf(svgfile, "</g>\n");
+}
+
+
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace)
+{
+	double height;
+
+	if (!svgfile)
+		return;
+
+
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<title>%s wakes up %s</title>\n",
+		desc1 ? desc1 : "?",
+		desc2 ? desc2 : "?");
+
+	if (backtrace)
+		fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+	if (row1 < row2) {
+		if (row1) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+			if (desc2)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+					time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_HEIGHT/48, desc2);
+		}
+		if (row2) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32,  time2pixels(start), row2 * SLOT_MULT);
+			if (desc1)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+					time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, desc1);
+		}
+	} else {
+		if (row2) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+			if (desc1)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+					time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/48, desc1);
+		}
+		if (row1) {
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+				time2pixels(start), row1 * SLOT_MULT - SLOT_MULT/32,  time2pixels(start), row1 * SLOT_MULT);
+			if (desc2)
+				fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+					time2pixels(start), row1 * SLOT_MULT - SLOT_HEIGHT/32, desc2);
+		}
+	}
+	height = row1 * SLOT_MULT;
+	if (row2 > row1)
+		height += SLOT_HEIGHT;
+	if (row1)
+		fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(32,255,32)\"/>\n",
+			time2pixels(start), height);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace)
+{
+	double height;
+
+	if (!svgfile)
+		return;
+
+
+	fprintf(svgfile, "<g>\n");
+
+	if (backtrace)
+		fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+	if (row1 < row2)
+		fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+			time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row2 * SLOT_MULT);
+	else
+		fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+			time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT,  time2pixels(start), row1 * SLOT_MULT);
+
+	height = row1 * SLOT_MULT;
+	if (row2 > row1)
+		height += SLOT_HEIGHT;
+	fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(32,255,32)\"/>\n",
+			time2pixels(start), height);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_interrupt(u64 start, int row, const char *backtrace)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+
+	fprintf(svgfile, "<title>Wakeup from interrupt</title>\n");
+
+	if (backtrace)
+		fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+	fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(255,128,128)\"/>\n",
+			time2pixels(start), row * SLOT_MULT);
+	fprintf(svgfile, "<circle  cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\"  style=\"fill:rgb(255,128,128)\"/>\n",
+			time2pixels(start), row * SLOT_MULT + SLOT_HEIGHT);
+
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_text(int Yslot, u64 start, const char *text)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+		time2pixels(start), Yslot * SLOT_MULT+SLOT_HEIGHT/2, text);
+}
+
+static void svg_legenda_box(int X, const char *text, const char *style)
+{
+	double boxsize;
+	boxsize = SLOT_HEIGHT / 2;
+
+	fprintf(svgfile, "<rect x=\"%i\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+		X, boxsize, boxsize, style);
+	fprintf(svgfile, "<text transform=\"translate(%.8f, %.8f)\" font-size=\"%.8fpt\">%s</text>\n",
+		X + boxsize + 5, boxsize, 0.8 * boxsize, text);
+}
+
+void svg_io_legenda(void)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	svg_legenda_box(0,	"Disk", "disk");
+	svg_legenda_box(100,	"Network", "net");
+	svg_legenda_box(200,	"Sync", "sync");
+	svg_legenda_box(300,	"Poll", "poll");
+	svg_legenda_box(400,	"Error", "error");
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_legenda(void)
+{
+	if (!svgfile)
+		return;
+
+	fprintf(svgfile, "<g>\n");
+	svg_legenda_box(0,	"Running", "sample");
+	svg_legenda_box(100,	"Idle","c1");
+	svg_legenda_box(200,	"Deeper Idle", "c3");
+	svg_legenda_box(350,	"Deepest Idle", "c6");
+	svg_legenda_box(550,	"Sleeping", "process2");
+	svg_legenda_box(650,	"Waiting for cpu", "waiting");
+	svg_legenda_box(800,	"Blocked on IO", "blocked");
+	fprintf(svgfile, "</g>\n");
+}
+
+void svg_time_grid(double min_thickness)
+{
+	u64 i;
+
+	if (!svgfile)
+		return;
+
+	i = first_time;
+	while (i < last_time) {
+		int color = 220;
+		double thickness = 0.075;
+		if ((i % 100000000) == 0) {
+			thickness = 0.5;
+			color = 192;
+		}
+		if ((i % 1000000000) == 0) {
+			thickness = 2.0;
+			color = 128;
+		}
+
+		if (thickness >= min_thickness)
+			fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%" PRIu64 "\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%.3f\"/>\n",
+				time2pixels(i), SLOT_MULT/2, time2pixels(i),
+				total_height, color, color, color, thickness);
+
+		i += 10000000;
+	}
+}
+
+void svg_close(void)
+{
+	if (svgfile) {
+		fprintf(svgfile, "</svg>\n");
+		fclose(svgfile);
+		svgfile = NULL;
+	}
+}
+
+#define cpumask_bits(maskp) ((maskp)->bits)
+typedef struct { DECLARE_BITMAP(bits, MAX_NR_CPUS); } cpumask_t;
+
+struct topology {
+	cpumask_t *sib_core;
+	int sib_core_nr;
+	cpumask_t *sib_thr;
+	int sib_thr_nr;
+};
+
+static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos)
+{
+	int i;
+	int thr;
+
+	for (i = 0; i < t->sib_thr_nr; i++) {
+		if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
+			continue;
+
+		for_each_set_bit(thr,
+				 cpumask_bits(&t->sib_thr[i]),
+				 MAX_NR_CPUS)
+			if (map[thr] == -1)
+				map[thr] = (*pos)++;
+	}
+}
+
+static void scan_core_topology(int *map, struct topology *t)
+{
+	int pos = 0;
+	int i;
+	int cpu;
+
+	for (i = 0; i < t->sib_core_nr; i++)
+		for_each_set_bit(cpu,
+				 cpumask_bits(&t->sib_core[i]),
+				 MAX_NR_CPUS)
+			scan_thread_topology(map, t, cpu, &pos);
+}
+
+static int str_to_bitmap(char *s, cpumask_t *b)
+{
+	int i;
+	int ret = 0;
+	struct cpu_map *m;
+	int c;
+
+	m = cpu_map__new(s);
+	if (!m)
+		return -1;
+
+	for (i = 0; i < m->nr; i++) {
+		c = m->map[i];
+		if (c >= MAX_NR_CPUS) {
+			ret = -1;
+			break;
+		}
+
+		set_bit(c, cpumask_bits(b));
+	}
+
+	cpu_map__put(m);
+
+	return ret;
+}
+
+int svg_build_topology_map(char *sib_core, int sib_core_nr,
+			   char *sib_thr, int sib_thr_nr)
+{
+	int i;
+	struct topology t;
+
+	t.sib_core_nr = sib_core_nr;
+	t.sib_thr_nr = sib_thr_nr;
+	t.sib_core = calloc(sib_core_nr, sizeof(cpumask_t));
+	t.sib_thr = calloc(sib_thr_nr, sizeof(cpumask_t));
+
+	if (!t.sib_core || !t.sib_thr) {
+		fprintf(stderr, "topology: no memory\n");
+		goto exit;
+	}
+
+	for (i = 0; i < sib_core_nr; i++) {
+		if (str_to_bitmap(sib_core, &t.sib_core[i])) {
+			fprintf(stderr, "topology: can't parse siblings map\n");
+			goto exit;
+		}
+
+		sib_core += strlen(sib_core) + 1;
+	}
+
+	for (i = 0; i < sib_thr_nr; i++) {
+		if (str_to_bitmap(sib_thr, &t.sib_thr[i])) {
+			fprintf(stderr, "topology: can't parse siblings map\n");
+			goto exit;
+		}
+
+		sib_thr += strlen(sib_thr) + 1;
+	}
+
+	topology_map = malloc(sizeof(int) * MAX_NR_CPUS);
+	if (!topology_map) {
+		fprintf(stderr, "topology: no memory\n");
+		goto exit;
+	}
+
+	for (i = 0; i < MAX_NR_CPUS; i++)
+		topology_map[i] = -1;
+
+	scan_core_topology(topology_map, &t);
+
+	return 0;
+
+exit:
+	zfree(&t.sib_core);
+	zfree(&t.sib_thr);
+
+	return -1;
+}
diff --git a/util/svghelper.h b/util/svghelper.h
new file mode 100644
index 0000000..e55338d
--- /dev/null
+++ b/util/svghelper.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SVGHELPER_H
+#define __PERF_SVGHELPER_H
+
+#include <linux/types.h>
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+
+
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr);
+
+extern int svg_page_width;
+extern u64 svg_highlight;
+extern const char *svg_highlight_name;
+
+#endif /* __PERF_SVGHELPER_H */
diff --git a/util/symbol-elf.c b/util/symbol-elf.c
new file mode 100644
index 0000000..2de7705
--- /dev/null
+++ b/util/symbol-elf.c
@@ -0,0 +1,2083 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "symbol.h"
+#include "demangle-java.h"
+#include "demangle-rust.h"
+#include "machine.h"
+#include "vdso.h"
+#include "debug.h"
+#include "sane_ctype.h"
+#include <symbol/kallsyms.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183  /* ARM 64 bit */
+#endif
+
+typedef Elf64_Nhdr GElf_Nhdr;
+
+#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
+extern char *cplus_demangle(const char *, int);
+
+static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i)
+{
+	return cplus_demangle(c, i);
+}
+#else
+#ifdef NO_DEMANGLE
+static inline char *bfd_demangle(void __maybe_unused *v,
+				 const char __maybe_unused *c,
+				 int __maybe_unused i)
+{
+	return NULL;
+}
+#else
+#define PACKAGE 'perf'
+#include <bfd.h>
+#endif
+#endif
+
+#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
+static int elf_getphdrnum(Elf *elf, size_t *dst)
+{
+	GElf_Ehdr gehdr;
+	GElf_Ehdr *ehdr;
+
+	ehdr = gelf_getehdr(elf, &gehdr);
+	if (!ehdr)
+		return -1;
+
+	*dst = ehdr->e_phnum;
+
+	return 0;
+}
+#endif
+
+#ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
+static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
+{
+	pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__);
+	return -1;
+}
+#endif
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @syms: struct elf_symtab instance to iterate
+ * @idx: uint32_t idx
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \
+	for (idx = 0, gelf_getsym(syms, idx, &sym);\
+	     idx < nr_syms; \
+	     idx++, gelf_getsym(syms, idx, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+	return GELF_ST_TYPE(sym->st_info);
+}
+
+#ifndef STT_GNU_IFUNC
+#define STT_GNU_IFUNC 10
+#endif
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+	return (elf_sym__type(sym) == STT_FUNC ||
+		elf_sym__type(sym) == STT_GNU_IFUNC) &&
+	       sym->st_name != 0 &&
+	       sym->st_shndx != SHN_UNDEF;
+}
+
+static inline bool elf_sym__is_object(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_OBJECT &&
+		sym->st_name != 0 &&
+		sym->st_shndx != SHN_UNDEF;
+}
+
+static inline int elf_sym__is_label(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_NOTYPE &&
+		sym->st_name != 0 &&
+		sym->st_shndx != SHN_UNDEF &&
+		sym->st_shndx != SHN_ABS;
+}
+
+static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type)
+{
+	switch (type) {
+	case MAP__FUNCTION:
+		return elf_sym__is_function(sym);
+	case MAP__VARIABLE:
+		return elf_sym__is_object(sym);
+	default:
+		return false;
+	}
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+					const Elf_Data *symstrs)
+{
+	return symstrs->d_buf + sym->st_name;
+}
+
+static inline const char *elf_sec__name(const GElf_Shdr *shdr,
+					const Elf_Data *secstrs)
+{
+	return secstrs->d_buf + shdr->sh_name;
+}
+
+static inline int elf_sec__is_text(const GElf_Shdr *shdr,
+					const Elf_Data *secstrs)
+{
+	return strstr(elf_sec__name(shdr, secstrs), "text") != NULL;
+}
+
+static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
+				    const Elf_Data *secstrs)
+{
+	return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
+}
+
+static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs,
+			  enum map_type type)
+{
+	switch (type) {
+	case MAP__FUNCTION:
+		return elf_sec__is_text(shdr, secstrs);
+	case MAP__VARIABLE:
+		return elf_sec__is_data(shdr, secstrs);
+	default:
+		return false;
+	}
+}
+
+static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
+{
+	Elf_Scn *sec = NULL;
+	GElf_Shdr shdr;
+	size_t cnt = 1;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		gelf_getshdr(sec, &shdr);
+
+		if ((addr >= shdr.sh_addr) &&
+		    (addr < (shdr.sh_addr + shdr.sh_size)))
+			return cnt;
+
+		++cnt;
+	}
+
+	return -1;
+}
+
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+			     GElf_Shdr *shp, const char *name, size_t *idx)
+{
+	Elf_Scn *sec = NULL;
+	size_t cnt = 1;
+
+	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
+	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL))
+		return NULL;
+
+	while ((sec = elf_nextscn(elf, sec)) != NULL) {
+		char *str;
+
+		gelf_getshdr(sec, shp);
+		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+		if (str && !strcmp(name, str)) {
+			if (idx)
+				*idx = cnt;
+			return sec;
+		}
+		++cnt;
+	}
+
+	return NULL;
+}
+
+static bool want_demangle(bool is_kernel_sym)
+{
+	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+	int demangle_flags = verbose > 0 ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+	char *demangled = NULL;
+
+	/*
+	 * We need to figure out if the object was created from C++ sources
+	 * DWARF DW_compile_unit has this, but we don't always have access
+	 * to it...
+	 */
+	if (!want_demangle(dso->kernel || kmodule))
+	    return demangled;
+
+	demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+	if (demangled == NULL)
+		demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+	else if (rust_is_mangled(demangled))
+		/*
+		    * Input to Rust demangling is the BFD-demangled
+		    * name which it Rust-demangles in place.
+		    */
+		rust_demangle_sym(demangled);
+
+	return demangled;
+}
+
+#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+
+#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
+	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
+	     idx < nr_entries; \
+	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+
+/*
+ * We need to check if we have a .dynsym, so that we can handle the
+ * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+ * .dynsym or .symtab).
+ * And always look at the original dso, not at debuginfo packages, that
+ * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
+ */
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
+{
+	uint32_t nr_rel_entries, idx;
+	GElf_Sym sym;
+	u64 plt_offset, plt_header_size, plt_entry_size;
+	GElf_Shdr shdr_plt;
+	struct symbol *f;
+	GElf_Shdr shdr_rel_plt, shdr_dynsym;
+	Elf_Data *reldata, *syms, *symstrs;
+	Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym;
+	size_t dynsym_idx;
+	GElf_Ehdr ehdr;
+	char sympltname[1024];
+	Elf *elf;
+	int nr = 0, symidx, err = 0;
+
+	if (!ss->dynsym)
+		return 0;
+
+	elf = ss->elf;
+	ehdr = ss->ehdr;
+
+	scn_dynsym = ss->dynsym;
+	shdr_dynsym = ss->dynshdr;
+	dynsym_idx = ss->dynsym_idx;
+
+	if (scn_dynsym == NULL)
+		goto out_elf_end;
+
+	scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+					  ".rela.plt", NULL);
+	if (scn_plt_rel == NULL) {
+		scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+						  ".rel.plt", NULL);
+		if (scn_plt_rel == NULL)
+			goto out_elf_end;
+	}
+
+	err = -1;
+
+	if (shdr_rel_plt.sh_link != dynsym_idx)
+		goto out_elf_end;
+
+	if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL)
+		goto out_elf_end;
+
+	/*
+	 * Fetch the relocation section to find the idxes to the GOT
+	 * and the symbols in the .dynsym they refer to.
+	 */
+	reldata = elf_getdata(scn_plt_rel, NULL);
+	if (reldata == NULL)
+		goto out_elf_end;
+
+	syms = elf_getdata(scn_dynsym, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link);
+	if (scn_symstrs == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(scn_symstrs, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	if (symstrs->d_size == 0)
+		goto out_elf_end;
+
+	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+	plt_offset = shdr_plt.sh_offset;
+	switch (ehdr.e_machine) {
+		case EM_ARM:
+			plt_header_size = 20;
+			plt_entry_size = 12;
+			break;
+
+		case EM_AARCH64:
+			plt_header_size = 32;
+			plt_entry_size = 16;
+			break;
+
+		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+			plt_header_size = shdr_plt.sh_entsize;
+			plt_entry_size = shdr_plt.sh_entsize;
+			break;
+	}
+	plt_offset += plt_header_size;
+
+	if (shdr_rel_plt.sh_type == SHT_RELA) {
+		GElf_Rela pos_mem, *pos;
+
+		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
+					   nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
+			symidx = GELF_R_SYM(pos->r_info);
+			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_name);
+			free(demangled);
+
+			f = symbol__new(plt_offset, plt_entry_size,
+					STB_GLOBAL, sympltname);
+			if (!f)
+				goto out_elf_end;
+
+			plt_offset += plt_entry_size;
+			symbols__insert(&dso->symbols[map->type], f);
+			++nr;
+		}
+	} else if (shdr_rel_plt.sh_type == SHT_REL) {
+		GElf_Rel pos_mem, *pos;
+		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
+					  nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
+			symidx = GELF_R_SYM(pos->r_info);
+			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
+			snprintf(sympltname, sizeof(sympltname),
+				 "%s@plt", elf_name);
+			free(demangled);
+
+			f = symbol__new(plt_offset, plt_entry_size,
+					STB_GLOBAL, sympltname);
+			if (!f)
+				goto out_elf_end;
+
+			plt_offset += plt_entry_size;
+			symbols__insert(&dso->symbols[map->type], f);
+			++nr;
+		}
+	}
+
+	err = 0;
+out_elf_end:
+	if (err == 0)
+		return nr;
+	pr_debug("%s: problems reading %s PLT info.\n",
+		 __func__, dso->long_name);
+	return 0;
+}
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+	return demangle_sym(dso, kmodule, elf_name);
+}
+
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
+static int elf_read_build_id(Elf *elf, void *bf, size_t size)
+{
+	int err = -1;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	Elf_Kind ek;
+	void *ptr;
+
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		pr_err("%s: cannot get elf header.\n", __func__);
+		goto out;
+	}
+
+	/*
+	 * Check following sections for notes:
+	 *   '.note.gnu.build-id'
+	 *   '.notes'
+	 *   '.note' (VDSO specific)
+	 */
+	do {
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".note.gnu.build-id", NULL);
+		if (sec)
+			break;
+
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".notes", NULL);
+		if (sec)
+			break;
+
+		sec = elf_section_by_name(elf, &ehdr, &shdr,
+					  ".note", NULL);
+		if (sec)
+			break;
+
+		return err;
+
+	} while (0);
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out;
+
+	ptr = data->d_buf;
+	while (ptr < (data->d_buf + data->d_size)) {
+		GElf_Nhdr *nhdr = ptr;
+		size_t namesz = NOTE_ALIGN(nhdr->n_namesz),
+		       descsz = NOTE_ALIGN(nhdr->n_descsz);
+		const char *name;
+
+		ptr += sizeof(*nhdr);
+		name = ptr;
+		ptr += namesz;
+		if (nhdr->n_type == NT_GNU_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU")) {
+			if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+				size_t sz = min(size, descsz);
+				memcpy(bf, ptr, sz);
+				memset(bf + sz, 0, size - sz);
+				err = descsz;
+				break;
+			}
+		}
+		ptr += descsz;
+	}
+
+out:
+	return err;
+}
+
+int filename__read_build_id(const char *filename, void *bf, size_t size)
+{
+	int fd, err = -1;
+	Elf *elf;
+
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	err = elf_read_build_id(elf, bf, size);
+
+	elf_end(elf);
+out_close:
+	close(fd);
+out:
+	return err;
+}
+
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+{
+	int fd, err = -1;
+
+	if (size < BUILD_ID_SIZE)
+		goto out;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	while (1) {
+		char bf[BUFSIZ];
+		GElf_Nhdr nhdr;
+		size_t namesz, descsz;
+
+		if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+			break;
+
+		namesz = NOTE_ALIGN(nhdr.n_namesz);
+		descsz = NOTE_ALIGN(nhdr.n_descsz);
+		if (nhdr.n_type == NT_GNU_BUILD_ID &&
+		    nhdr.n_namesz == sizeof("GNU")) {
+			if (read(fd, bf, namesz) != (ssize_t)namesz)
+				break;
+			if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+				size_t sz = min(descsz, size);
+				if (read(fd, build_id, sz) == (ssize_t)sz) {
+					memset(build_id + sz, 0, size - sz);
+					err = 0;
+					break;
+				}
+			} else if (read(fd, bf, descsz) != (ssize_t)descsz)
+				break;
+		} else {
+			int n = namesz + descsz;
+
+			if (n > (int)sizeof(bf)) {
+				n = sizeof(bf);
+				pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+					 __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+			}
+			if (read(fd, bf, n) != n)
+				break;
+		}
+	}
+	close(fd);
+out:
+	return err;
+}
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+			     size_t size)
+{
+	int fd, err = -1;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	Elf_Kind ek;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_elf_end;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		pr_err("%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+				  ".gnu_debuglink", NULL);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out_elf_end;
+
+	/* the start of this section is a zero-terminated string */
+	strncpy(debuglink, data->d_buf, size);
+
+	err = 0;
+
+out_elf_end:
+	elf_end(elf);
+out_close:
+	close(fd);
+out:
+	return err;
+}
+
+static int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+	static unsigned int const endian = 1;
+
+	dso->needs_swap = DSO_SWAP__NO;
+
+	switch (eidata) {
+	case ELFDATA2LSB:
+		/* We are big endian, DSO is little endian. */
+		if (*(unsigned char const *)&endian != 1)
+			dso->needs_swap = DSO_SWAP__YES;
+		break;
+
+	case ELFDATA2MSB:
+		/* We are little endian, DSO is big endian. */
+		if (*(unsigned char const *)&endian != 0)
+			dso->needs_swap = DSO_SWAP__YES;
+		break;
+
+	default:
+		pr_err("unrecognized DSO data encoding %d\n", eidata);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss)
+{
+	return ss->dynsym || ss->opdsec;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss)
+{
+	return ss->symtab != NULL;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+	zfree(&ss->name);
+	elf_end(ss->elf);
+	close(ss->fd);
+}
+
+bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+		 enum dso_binary_type type)
+{
+	int err = -1;
+	GElf_Ehdr ehdr;
+	Elf *elf;
+	int fd;
+
+	if (dso__needs_decompress(dso)) {
+		fd = dso__decompress_kmodule_fd(dso, name);
+		if (fd < 0)
+			return -1;
+
+		type = dso->symtab_type;
+	} else {
+		fd = open(name, O_RDONLY);
+		if (fd < 0) {
+			dso->load_errno = errno;
+			return -1;
+		}
+	}
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+		dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+		goto out_close;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+		pr_debug("%s: cannot get elf header.\n", __func__);
+		goto out_elf_end;
+	}
+
+	if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) {
+		dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR;
+		goto out_elf_end;
+	}
+
+	/* Always reject images with a mismatched build-id: */
+	if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
+		u8 build_id[BUILD_ID_SIZE];
+
+		if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
+			dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
+			goto out_elf_end;
+		}
+
+		if (!dso__build_id_equal(dso, build_id)) {
+			pr_debug("%s: build id mismatch for %s.\n", __func__, name);
+			dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
+			goto out_elf_end;
+		}
+	}
+
+	ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+	ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab",
+			NULL);
+	if (ss->symshdr.sh_type != SHT_SYMTAB)
+		ss->symtab = NULL;
+
+	ss->dynsym_idx = 0;
+	ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym",
+			&ss->dynsym_idx);
+	if (ss->dynshdr.sh_type != SHT_DYNSYM)
+		ss->dynsym = NULL;
+
+	ss->opdidx = 0;
+	ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd",
+			&ss->opdidx);
+	if (ss->opdshdr.sh_type != SHT_PROGBITS)
+		ss->opdsec = NULL;
+
+	if (dso->kernel == DSO_TYPE_USER)
+		ss->adjust_symbols = true;
+	else
+		ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
+
+	ss->name   = strdup(name);
+	if (!ss->name) {
+		dso->load_errno = errno;
+		goto out_elf_end;
+	}
+
+	ss->elf    = elf;
+	ss->fd     = fd;
+	ss->ehdr   = ehdr;
+	ss->type   = type;
+
+	return 0;
+
+out_elf_end:
+	elf_end(elf);
+out_close:
+	close(fd);
+	return err;
+}
+
+/**
+ * ref_reloc_sym_not_found - has kernel relocation symbol been found.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns %true if we are dealing with the kernel maps and the
+ * relocation reference symbol has not yet been found.  Otherwise %false is
+ * returned.
+ */
+static bool ref_reloc_sym_not_found(struct kmap *kmap)
+{
+	return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+	       !kmap->ref_reloc_sym->unrelocated_addr;
+}
+
+/**
+ * ref_reloc - kernel relocation offset.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns the offset of kernel addresses as determined by using
+ * the relocation reference symbol i.e. if the kernel has not been relocated
+ * then the return value is zero.
+ */
+static u64 ref_reloc(struct kmap *kmap)
+{
+	if (kmap && kmap->ref_reloc_sym &&
+	    kmap->ref_reloc_sym->unrelocated_addr)
+		return kmap->ref_reloc_sym->addr -
+		       kmap->ref_reloc_sym->unrelocated_addr;
+	return 0;
+}
+
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+		GElf_Sym *sym __maybe_unused) { }
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		  struct symsrc *runtime_ss, int kmodule)
+{
+	struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
+	struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL;
+	struct map *curr_map = map;
+	struct dso *curr_dso = dso;
+	Elf_Data *symstrs, *secstrs;
+	uint32_t nr_syms;
+	int err = -1;
+	uint32_t idx;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	GElf_Shdr tshdr;
+	Elf_Data *syms, *opddata = NULL;
+	GElf_Sym sym;
+	Elf_Scn *sec, *sec_strndx;
+	Elf *elf;
+	int nr = 0;
+	bool remap_kernel = false, adjust_kernel_syms = false;
+
+	if (kmap && !kmaps)
+		return -1;
+
+	dso->symtab_type = syms_ss->type;
+	dso->is_64_bit = syms_ss->is_64_bit;
+	dso->rel = syms_ss->ehdr.e_type == ET_REL;
+
+	/*
+	 * Modules may already have symbols from kallsyms, but those symbols
+	 * have the wrong values for the dso maps, so remove them.
+	 */
+	if (kmodule && syms_ss->symtab)
+		symbols__delete(&dso->symbols[map->type]);
+
+	if (!syms_ss->symtab) {
+		/*
+		 * If the vmlinux is stripped, fail so we will fall back
+		 * to using kallsyms. The vmlinux runtime symbols aren't
+		 * of much use.
+		 */
+		if (dso->kernel)
+			goto out_elf_end;
+
+		syms_ss->symtab  = syms_ss->dynsym;
+		syms_ss->symshdr = syms_ss->dynshdr;
+	}
+
+	elf = syms_ss->elf;
+	ehdr = syms_ss->ehdr;
+	sec = syms_ss->symtab;
+	shdr = syms_ss->symshdr;
+
+	if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+				".text", NULL))
+		dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
+	if (runtime_ss->opdsec)
+		opddata = elf_rawdata(runtime_ss->opdsec, NULL);
+
+	syms = elf_getdata(sec, NULL);
+	if (syms == NULL)
+		goto out_elf_end;
+
+	sec = elf_getscn(elf, shdr.sh_link);
+	if (sec == NULL)
+		goto out_elf_end;
+
+	symstrs = elf_getdata(sec, NULL);
+	if (symstrs == NULL)
+		goto out_elf_end;
+
+	sec_strndx = elf_getscn(runtime_ss->elf, runtime_ss->ehdr.e_shstrndx);
+	if (sec_strndx == NULL)
+		goto out_elf_end;
+
+	secstrs = elf_getdata(sec_strndx, NULL);
+	if (secstrs == NULL)
+		goto out_elf_end;
+
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	memset(&sym, 0, sizeof(sym));
+
+	/*
+	 * The kernel relocation symbol is needed in advance in order to adjust
+	 * kernel maps correctly.
+	 */
+	if (ref_reloc_sym_not_found(kmap)) {
+		elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+			const char *elf_name = elf_sym__name(&sym, symstrs);
+
+			if (strcmp(elf_name, kmap->ref_reloc_sym->name))
+				continue;
+			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+			map->reloc = kmap->ref_reloc_sym->addr -
+				     kmap->ref_reloc_sym->unrelocated_addr;
+			break;
+		}
+	}
+
+	/*
+	 * Handle any relocation of vdso necessary because older kernels
+	 * attempted to prelink vdso to its virtual address.
+	 */
+	if (dso__is_vdso(dso))
+		map->reloc = map->start - dso->text_offset;
+
+	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
+	/*
+	 * Initial kernel and module mappings do not map to the dso.  For
+	 * function mappings, flag the fixups.
+	 */
+	if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) {
+		remap_kernel = true;
+		adjust_kernel_syms = dso->adjust_symbols;
+	}
+	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+		struct symbol *f;
+		const char *elf_name = elf_sym__name(&sym, symstrs);
+		char *demangled = NULL;
+		int is_label = elf_sym__is_label(&sym);
+		const char *section_name;
+		bool used_opd = false;
+
+		if (!is_label && !elf_sym__is_a(&sym, map->type))
+			continue;
+
+		/* Reject ARM ELF "mapping symbols": these aren't unique and
+		 * don't identify functions, so will confuse the profile
+		 * output: */
+		if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) {
+			if (elf_name[0] == '$' && strchr("adtx", elf_name[1])
+			    && (elf_name[2] == '\0' || elf_name[2] == '.'))
+				continue;
+		}
+
+		if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) {
+			u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr;
+			u64 *opd = opddata->d_buf + offset;
+			sym.st_value = DSO__SWAP(dso, u64, *opd);
+			sym.st_shndx = elf_addr_to_index(runtime_ss->elf,
+					sym.st_value);
+			used_opd = true;
+		}
+		/*
+		 * When loading symbols in a data mapping, ABS symbols (which
+		 * has a value of SHN_ABS in its st_shndx) failed at
+		 * elf_getscn().  And it marks the loading as a failure so
+		 * already loaded symbols cannot be fixed up.
+		 *
+		 * I'm not sure what should be done. Just ignore them for now.
+		 * - Namhyung Kim
+		 */
+		if (sym.st_shndx == SHN_ABS)
+			continue;
+
+		sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
+		if (!sec)
+			goto out_elf_end;
+
+		gelf_getshdr(sec, &shdr);
+
+		if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
+			continue;
+
+		section_name = elf_sec__name(&shdr, secstrs);
+
+		/* On ARM, symbols for thumb functions have 1 added to
+		 * the symbol address as a flag - remove it */
+		if ((ehdr.e_machine == EM_ARM) &&
+		    (map->type == MAP__FUNCTION) &&
+		    (sym.st_value & 1))
+			--sym.st_value;
+
+		if (dso->kernel || kmodule) {
+			char dso_name[PATH_MAX];
+
+			/* Adjust symbol to map to file offset */
+			if (adjust_kernel_syms)
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
+			if (strcmp(section_name,
+				   (curr_dso->short_name +
+				    dso->short_name_len)) == 0)
+				goto new_symbol;
+
+			if (strcmp(section_name, ".text") == 0) {
+				/*
+				 * The initial kernel mapping is based on
+				 * kallsyms and identity maps.  Overwrite it to
+				 * map to the kernel dso.
+				 */
+				if (remap_kernel && dso->kernel) {
+					remap_kernel = false;
+					map->start = shdr.sh_addr +
+						     ref_reloc(kmap);
+					map->end = map->start + shdr.sh_size;
+					map->pgoff = shdr.sh_offset;
+					map->map_ip = map__map_ip;
+					map->unmap_ip = map__unmap_ip;
+					/* Ensure maps are correctly ordered */
+					if (kmaps) {
+						map__get(map);
+						map_groups__remove(kmaps, map);
+						map_groups__insert(kmaps, map);
+						map__put(map);
+					}
+				}
+
+				/*
+				 * The initial module mapping is based on
+				 * /proc/modules mapped to offset zero.
+				 * Overwrite it to map to the module dso.
+				 */
+				if (remap_kernel && kmodule) {
+					remap_kernel = false;
+					map->pgoff = shdr.sh_offset;
+				}
+
+				curr_map = map;
+				curr_dso = dso;
+				goto new_symbol;
+			}
+
+			if (!kmap)
+				goto new_symbol;
+
+			snprintf(dso_name, sizeof(dso_name),
+				 "%s%s", dso->short_name, section_name);
+
+			curr_map = map_groups__find_by_name(kmaps, map->type, dso_name);
+			if (curr_map == NULL) {
+				u64 start = sym.st_value;
+
+				if (kmodule)
+					start += map->start + shdr.sh_offset;
+
+				curr_dso = dso__new(dso_name);
+				if (curr_dso == NULL)
+					goto out_elf_end;
+				curr_dso->kernel = dso->kernel;
+				curr_dso->long_name = dso->long_name;
+				curr_dso->long_name_len = dso->long_name_len;
+				curr_map = map__new2(start, curr_dso,
+						     map->type);
+				dso__put(curr_dso);
+				if (curr_map == NULL) {
+					goto out_elf_end;
+				}
+				if (adjust_kernel_syms) {
+					curr_map->start = shdr.sh_addr +
+							  ref_reloc(kmap);
+					curr_map->end = curr_map->start +
+							shdr.sh_size;
+					curr_map->pgoff = shdr.sh_offset;
+				} else {
+					curr_map->map_ip = identity__map_ip;
+					curr_map->unmap_ip = identity__map_ip;
+				}
+				curr_dso->symtab_type = dso->symtab_type;
+				map_groups__insert(kmaps, curr_map);
+				/*
+				 * Add it before we drop the referece to curr_map,
+				 * i.e. while we still are sure to have a reference
+				 * to this DSO via curr_map->dso.
+				 */
+				dsos__add(&map->groups->machine->dsos, curr_dso);
+				/* kmaps already got it */
+				map__put(curr_map);
+				dso__set_loaded(curr_dso, map->type);
+			} else
+				curr_dso = curr_map->dso;
+
+			goto new_symbol;
+		}
+
+		if ((used_opd && runtime_ss->adjust_symbols)
+				|| (!used_opd && syms_ss->adjust_symbols)) {
+			pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+				  "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+				  (u64)sym.st_value, (u64)shdr.sh_addr,
+				  (u64)shdr.sh_offset);
+			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+		}
+new_symbol:
+		demangled = demangle_sym(dso, kmodule, elf_name);
+		if (demangled != NULL)
+			elf_name = demangled;
+
+		f = symbol__new(sym.st_value, sym.st_size,
+				GELF_ST_BIND(sym.st_info), elf_name);
+		free(demangled);
+		if (!f)
+			goto out_elf_end;
+
+		arch__sym_update(f, &sym);
+
+		__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+		nr++;
+	}
+
+	/*
+	 * For misannotated, zeroed, ASM function sizes.
+	 */
+	if (nr > 0) {
+		symbols__fixup_end(&dso->symbols[map->type]);
+		symbols__fixup_duplicate(&dso->symbols[map->type]);
+		if (kmap) {
+			/*
+			 * We need to fixup this here too because we create new
+			 * maps here, for things like vsyscall sections.
+			 */
+			__map_groups__fixup_end(kmaps, map->type);
+		}
+	}
+	err = nr;
+out_elf_end:
+	return err;
+}
+
+static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
+{
+	GElf_Phdr phdr;
+	size_t i, phdrnum;
+	int err;
+	u64 sz;
+
+	if (elf_getphdrnum(elf, &phdrnum))
+		return -1;
+
+	for (i = 0; i < phdrnum; i++) {
+		if (gelf_getphdr(elf, i, &phdr) == NULL)
+			return -1;
+		if (phdr.p_type != PT_LOAD)
+			continue;
+		if (exe) {
+			if (!(phdr.p_flags & PF_X))
+				continue;
+		} else {
+			if (!(phdr.p_flags & PF_R))
+				continue;
+		}
+		sz = min(phdr.p_memsz, phdr.p_filesz);
+		if (!sz)
+			continue;
+		err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit)
+{
+	int err;
+	Elf *elf;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return -1;
+
+	if (is_64_bit)
+		*is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+	err = elf_read_maps(elf, exe, mapfn, data);
+
+	elf_end(elf);
+	return err;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+	GElf_Ehdr ehdr;
+	Elf_Kind ek;
+	Elf *elf;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		goto out;
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_end;
+
+	if (gelf_getclass(elf) == ELFCLASS64) {
+		dso_type = DSO__TYPE_64BIT;
+		goto out_end;
+	}
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out_end;
+
+	if (ehdr.e_machine == EM_X86_64)
+		dso_type = DSO__TYPE_X32BIT;
+	else
+		dso_type = DSO__TYPE_32BIT;
+out_end:
+	elf_end(elf);
+out:
+	return dso_type;
+}
+
+static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len)
+{
+	ssize_t r;
+	size_t n;
+	int err = -1;
+	char *buf = malloc(page_size);
+
+	if (buf == NULL)
+		return -1;
+
+	if (lseek(to, to_offs, SEEK_SET) != to_offs)
+		goto out;
+
+	if (lseek(from, from_offs, SEEK_SET) != from_offs)
+		goto out;
+
+	while (len) {
+		n = page_size;
+		if (len < n)
+			n = len;
+		/* Use read because mmap won't work on proc files */
+		r = read(from, buf, n);
+		if (r < 0)
+			goto out;
+		if (!r)
+			break;
+		n = r;
+		r = write(to, buf, n);
+		if (r < 0)
+			goto out;
+		if ((size_t)r != n)
+			goto out;
+		len -= n;
+	}
+
+	err = 0;
+out:
+	free(buf);
+	return err;
+}
+
+struct kcore {
+	int fd;
+	int elfclass;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+};
+
+static int kcore__open(struct kcore *kcore, const char *filename)
+{
+	GElf_Ehdr *ehdr;
+
+	kcore->fd = open(filename, O_RDONLY);
+	if (kcore->fd == -1)
+		return -1;
+
+	kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL);
+	if (!kcore->elf)
+		goto out_close;
+
+	kcore->elfclass = gelf_getclass(kcore->elf);
+	if (kcore->elfclass == ELFCLASSNONE)
+		goto out_end;
+
+	ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+	if (!ehdr)
+		goto out_end;
+
+	return 0;
+
+out_end:
+	elf_end(kcore->elf);
+out_close:
+	close(kcore->fd);
+	return -1;
+}
+
+static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
+		       bool temp)
+{
+	kcore->elfclass = elfclass;
+
+	if (temp)
+		kcore->fd = mkstemp(filename);
+	else
+		kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400);
+	if (kcore->fd == -1)
+		return -1;
+
+	kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL);
+	if (!kcore->elf)
+		goto out_close;
+
+	if (!gelf_newehdr(kcore->elf, elfclass))
+		goto out_end;
+
+	memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr));
+
+	return 0;
+
+out_end:
+	elf_end(kcore->elf);
+out_close:
+	close(kcore->fd);
+	unlink(filename);
+	return -1;
+}
+
+static void kcore__close(struct kcore *kcore)
+{
+	elf_end(kcore->elf);
+	close(kcore->fd);
+}
+
+static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
+{
+	GElf_Ehdr *ehdr = &to->ehdr;
+	GElf_Ehdr *kehdr = &from->ehdr;
+
+	memcpy(ehdr->e_ident, kehdr->e_ident, EI_NIDENT);
+	ehdr->e_type      = kehdr->e_type;
+	ehdr->e_machine   = kehdr->e_machine;
+	ehdr->e_version   = kehdr->e_version;
+	ehdr->e_entry     = 0;
+	ehdr->e_shoff     = 0;
+	ehdr->e_flags     = kehdr->e_flags;
+	ehdr->e_phnum     = count;
+	ehdr->e_shentsize = 0;
+	ehdr->e_shnum     = 0;
+	ehdr->e_shstrndx  = 0;
+
+	if (from->elfclass == ELFCLASS32) {
+		ehdr->e_phoff     = sizeof(Elf32_Ehdr);
+		ehdr->e_ehsize    = sizeof(Elf32_Ehdr);
+		ehdr->e_phentsize = sizeof(Elf32_Phdr);
+	} else {
+		ehdr->e_phoff     = sizeof(Elf64_Ehdr);
+		ehdr->e_ehsize    = sizeof(Elf64_Ehdr);
+		ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	}
+
+	if (!gelf_update_ehdr(to->elf, ehdr))
+		return -1;
+
+	if (!gelf_newphdr(to->elf, count))
+		return -1;
+
+	return 0;
+}
+
+static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
+			   u64 addr, u64 len)
+{
+	GElf_Phdr phdr = {
+		.p_type		= PT_LOAD,
+		.p_flags	= PF_R | PF_W | PF_X,
+		.p_offset	= offset,
+		.p_vaddr	= addr,
+		.p_paddr	= 0,
+		.p_filesz	= len,
+		.p_memsz	= len,
+		.p_align	= page_size,
+	};
+
+	if (!gelf_update_phdr(kcore->elf, idx, &phdr))
+		return -1;
+
+	return 0;
+}
+
+static off_t kcore__write(struct kcore *kcore)
+{
+	return elf_update(kcore->elf, ELF_C_WRITE);
+}
+
+struct phdr_data {
+	off_t offset;
+	u64 addr;
+	u64 len;
+};
+
+struct kcore_copy_info {
+	u64 stext;
+	u64 etext;
+	u64 first_symbol;
+	u64 last_symbol;
+	u64 first_module;
+	u64 last_module_symbol;
+	struct phdr_data kernel_map;
+	struct phdr_data modules_map;
+};
+
+static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
+					u64 start)
+{
+	struct kcore_copy_info *kci = arg;
+
+	if (!symbol_type__is_a(type, MAP__FUNCTION))
+		return 0;
+
+	if (strchr(name, '[')) {
+		if (start > kci->last_module_symbol)
+			kci->last_module_symbol = start;
+		return 0;
+	}
+
+	if (!kci->first_symbol || start < kci->first_symbol)
+		kci->first_symbol = start;
+
+	if (!kci->last_symbol || start > kci->last_symbol)
+		kci->last_symbol = start;
+
+	if (!strcmp(name, "_stext")) {
+		kci->stext = start;
+		return 0;
+	}
+
+	if (!strcmp(name, "_etext")) {
+		kci->etext = start;
+		return 0;
+	}
+
+	return 0;
+}
+
+static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci,
+				      const char *dir)
+{
+	char kallsyms_filename[PATH_MAX];
+
+	scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir);
+
+	if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms"))
+		return -1;
+
+	if (kallsyms__parse(kallsyms_filename, kci,
+			    kcore_copy__process_kallsyms) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__process_modules(void *arg,
+				       const char *name __maybe_unused,
+				       u64 start, u64 size __maybe_unused)
+{
+	struct kcore_copy_info *kci = arg;
+
+	if (!kci->first_module || start < kci->first_module)
+		kci->first_module = start;
+
+	return 0;
+}
+
+static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
+				     const char *dir)
+{
+	char modules_filename[PATH_MAX];
+
+	scnprintf(modules_filename, PATH_MAX, "%s/modules", dir);
+
+	if (symbol__restricted_filename(modules_filename, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(modules_filename, kci,
+			   kcore_copy__process_modules) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff,
+			    u64 s, u64 e)
+{
+	if (p->addr || s < start || s >= end)
+		return;
+
+	p->addr = s;
+	p->offset = (s - start) + pgoff;
+	p->len = e < end ? e - s : end - s;
+}
+
+static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_copy_info *kci = data;
+	u64 end = start + len;
+
+	kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext,
+			kci->etext);
+
+	kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module,
+			kci->last_module_symbol);
+
+	return 0;
+}
+
+static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
+{
+	if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
+				 Elf *elf)
+{
+	if (kcore_copy__parse_kallsyms(kci, dir))
+		return -1;
+
+	if (kcore_copy__parse_modules(kci, dir))
+		return -1;
+
+	if (kci->stext)
+		kci->stext = round_down(kci->stext, page_size);
+	else
+		kci->stext = round_down(kci->first_symbol, page_size);
+
+	if (kci->etext) {
+		kci->etext = round_up(kci->etext, page_size);
+	} else if (kci->last_symbol) {
+		kci->etext = round_up(kci->last_symbol, page_size);
+		kci->etext += page_size;
+	}
+
+	kci->first_module = round_down(kci->first_module, page_size);
+
+	if (kci->last_module_symbol) {
+		kci->last_module_symbol = round_up(kci->last_module_symbol,
+						   page_size);
+		kci->last_module_symbol += page_size;
+	}
+
+	if (!kci->stext || !kci->etext)
+		return -1;
+
+	if (kci->first_module && !kci->last_module_symbol)
+		return -1;
+
+	return kcore_copy__read_maps(kci, elf);
+}
+
+static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
+				 const char *name)
+{
+	char from_filename[PATH_MAX];
+	char to_filename[PATH_MAX];
+
+	scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+	scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+	return copyfile_mode(from_filename, to_filename, 0400);
+}
+
+static int kcore_copy__unlink(const char *dir, const char *name)
+{
+	char filename[PATH_MAX];
+
+	scnprintf(filename, PATH_MAX, "%s/%s", dir, name);
+
+	return unlink(filename);
+}
+
+static int kcore_copy__compare_fds(int from, int to)
+{
+	char *buf_from;
+	char *buf_to;
+	ssize_t ret;
+	size_t len;
+	int err = -1;
+
+	buf_from = malloc(page_size);
+	buf_to = malloc(page_size);
+	if (!buf_from || !buf_to)
+		goto out;
+
+	while (1) {
+		/* Use read because mmap won't work on proc files */
+		ret = read(from, buf_from, page_size);
+		if (ret < 0)
+			goto out;
+
+		if (!ret)
+			break;
+
+		len = ret;
+
+		if (readn(to, buf_to, len) != (int)len)
+			goto out;
+
+		if (memcmp(buf_from, buf_to, len))
+			goto out;
+	}
+
+	err = 0;
+out:
+	free(buf_to);
+	free(buf_from);
+	return err;
+}
+
+static int kcore_copy__compare_files(const char *from_filename,
+				     const char *to_filename)
+{
+	int from, to, err = -1;
+
+	from = open(from_filename, O_RDONLY);
+	if (from < 0)
+		return -1;
+
+	to = open(to_filename, O_RDONLY);
+	if (to < 0)
+		goto out_close_from;
+
+	err = kcore_copy__compare_fds(from, to);
+
+	close(to);
+out_close_from:
+	close(from);
+	return err;
+}
+
+static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+				    const char *name)
+{
+	char from_filename[PATH_MAX];
+	char to_filename[PATH_MAX];
+
+	scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+	scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+	return kcore_copy__compare_files(from_filename, to_filename);
+}
+
+/**
+ * kcore_copy - copy kallsyms, modules and kcore from one directory to another.
+ * @from_dir: from directory
+ * @to_dir: to directory
+ *
+ * This function copies kallsyms, modules and kcore files from one directory to
+ * another.  kallsyms and modules are copied entirely.  Only code segments are
+ * copied from kcore.  It is assumed that two segments suffice: one for the
+ * kernel proper and one for all the modules.  The code segments are determined
+ * from kallsyms and modules files.  The kernel map starts at _stext or the
+ * lowest function symbol, and ends at _etext or the highest function symbol.
+ * The module map starts at the lowest module address and ends at the highest
+ * module symbol.  Start addresses are rounded down to the nearest page.  End
+ * addresses are rounded up to the nearest page.  An extra page is added to the
+ * highest kernel symbol and highest module symbol to, hopefully, encompass that
+ * symbol too.  Because it contains only code sections, the resulting kcore is
+ * unusual.  One significant peculiarity is that the mapping (start -> pgoff)
+ * is not the same for the kernel map and the modules map.  That happens because
+ * the data is copied adjacently whereas the original kcore has gaps.  Finally,
+ * kallsyms and modules files are compared with their copies to check that
+ * modules have not been loaded or unloaded while the copies were taking place.
+ *
+ * Return: %0 on success, %-1 on failure.
+ */
+int kcore_copy(const char *from_dir, const char *to_dir)
+{
+	struct kcore kcore;
+	struct kcore extract;
+	size_t count = 2;
+	int idx = 0, err = -1;
+	off_t offset = page_size, sz, modules_offset = 0;
+	struct kcore_copy_info kci = { .stext = 0, };
+	char kcore_filename[PATH_MAX];
+	char extract_filename[PATH_MAX];
+
+	if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
+		return -1;
+
+	if (kcore_copy__copy_file(from_dir, to_dir, "modules"))
+		goto out_unlink_kallsyms;
+
+	scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir);
+	scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir);
+
+	if (kcore__open(&kcore, kcore_filename))
+		goto out_unlink_modules;
+
+	if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf))
+		goto out_kcore_close;
+
+	if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
+		goto out_kcore_close;
+
+	if (!kci.modules_map.addr)
+		count -= 1;
+
+	if (kcore__copy_hdr(&kcore, &extract, count))
+		goto out_extract_close;
+
+	if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr,
+			    kci.kernel_map.len))
+		goto out_extract_close;
+
+	if (kci.modules_map.addr) {
+		modules_offset = offset + kci.kernel_map.len;
+		if (kcore__add_phdr(&extract, idx, modules_offset,
+				    kci.modules_map.addr, kci.modules_map.len))
+			goto out_extract_close;
+	}
+
+	sz = kcore__write(&extract);
+	if (sz < 0 || sz > offset)
+		goto out_extract_close;
+
+	if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset,
+		       kci.kernel_map.len))
+		goto out_extract_close;
+
+	if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset,
+					 extract.fd, modules_offset,
+					 kci.modules_map.len))
+		goto out_extract_close;
+
+	if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
+		goto out_extract_close;
+
+	if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+		goto out_extract_close;
+
+	err = 0;
+
+out_extract_close:
+	kcore__close(&extract);
+	if (err)
+		unlink(extract_filename);
+out_kcore_close:
+	kcore__close(&kcore);
+out_unlink_modules:
+	if (err)
+		kcore_copy__unlink(to_dir, "modules");
+out_unlink_kallsyms:
+	if (err)
+		kcore_copy__unlink(to_dir, "kallsyms");
+
+	return err;
+}
+
+int kcore_extract__create(struct kcore_extract *kce)
+{
+	struct kcore kcore;
+	struct kcore extract;
+	size_t count = 1;
+	int idx = 0, err = -1;
+	off_t offset = page_size, sz;
+
+	if (kcore__open(&kcore, kce->kcore_filename))
+		return -1;
+
+	strcpy(kce->extract_filename, PERF_KCORE_EXTRACT);
+	if (kcore__init(&extract, kce->extract_filename, kcore.elfclass, true))
+		goto out_kcore_close;
+
+	if (kcore__copy_hdr(&kcore, &extract, count))
+		goto out_extract_close;
+
+	if (kcore__add_phdr(&extract, idx, offset, kce->addr, kce->len))
+		goto out_extract_close;
+
+	sz = kcore__write(&extract);
+	if (sz < 0 || sz > offset)
+		goto out_extract_close;
+
+	if (copy_bytes(kcore.fd, kce->offs, extract.fd, offset, kce->len))
+		goto out_extract_close;
+
+	err = 0;
+
+out_extract_close:
+	kcore__close(&extract);
+	if (err)
+		unlink(kce->extract_filename);
+out_kcore_close:
+	kcore__close(&kcore);
+
+	return err;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce)
+{
+	unlink(kce->extract_filename);
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+/**
+ * populate_sdt_note : Parse raw data and identify SDT note
+ * @elf: elf of the opened file
+ * @data: raw data of a section with description offset applied
+ * @len: note description size
+ * @type: type of the note
+ * @sdt_notes: List to add the SDT note
+ *
+ * Responsible for parsing the @data in section .note.stapsdt in @elf and
+ * if its an SDT note, it appends to @sdt_notes list.
+ */
+static int populate_sdt_note(Elf **elf, const char *data, size_t len,
+			     struct list_head *sdt_notes)
+{
+	const char *provider, *name, *args;
+	struct sdt_note *tmp = NULL;
+	GElf_Ehdr ehdr;
+	GElf_Addr base_off = 0;
+	GElf_Shdr shdr;
+	int ret = -EINVAL;
+
+	union {
+		Elf64_Addr a64[NR_ADDR];
+		Elf32_Addr a32[NR_ADDR];
+	} buf;
+
+	Elf_Data dst = {
+		.d_buf = &buf, .d_type = ELF_T_ADDR, .d_version = EV_CURRENT,
+		.d_size = gelf_fsize((*elf), ELF_T_ADDR, NR_ADDR, EV_CURRENT),
+		.d_off = 0, .d_align = 0
+	};
+	Elf_Data src = {
+		.d_buf = (void *) data, .d_type = ELF_T_ADDR,
+		.d_version = EV_CURRENT, .d_size = dst.d_size, .d_off = 0,
+		.d_align = 0
+	};
+
+	tmp = (struct sdt_note *)calloc(1, sizeof(struct sdt_note));
+	if (!tmp) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	INIT_LIST_HEAD(&tmp->note_list);
+
+	if (len < dst.d_size + 3)
+		goto out_free_note;
+
+	/* Translation from file representation to memory representation */
+	if (gelf_xlatetom(*elf, &dst, &src,
+			  elf_getident(*elf, NULL)[EI_DATA]) == NULL) {
+		pr_err("gelf_xlatetom : %s\n", elf_errmsg(-1));
+		goto out_free_note;
+	}
+
+	/* Populate the fields of sdt_note */
+	provider = data + dst.d_size;
+
+	name = (const char *)memchr(provider, '\0', data + len - provider);
+	if (name++ == NULL)
+		goto out_free_note;
+
+	tmp->provider = strdup(provider);
+	if (!tmp->provider) {
+		ret = -ENOMEM;
+		goto out_free_note;
+	}
+	tmp->name = strdup(name);
+	if (!tmp->name) {
+		ret = -ENOMEM;
+		goto out_free_prov;
+	}
+
+	args = memchr(name, '\0', data + len - name);
+
+	/*
+	 * There is no argument if:
+	 * - We reached the end of the note;
+	 * - There is not enough room to hold a potential string;
+	 * - The argument string is empty or just contains ':'.
+	 */
+	if (args == NULL || data + len - args < 2 ||
+		args[1] == ':' || args[1] == '\0')
+		tmp->args = NULL;
+	else {
+		tmp->args = strdup(++args);
+		if (!tmp->args) {
+			ret = -ENOMEM;
+			goto out_free_name;
+		}
+	}
+
+	if (gelf_getclass(*elf) == ELFCLASS32) {
+		memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr));
+		tmp->bit32 = true;
+	} else {
+		memcpy(&tmp->addr, &buf, 3 * sizeof(Elf64_Addr));
+		tmp->bit32 = false;
+	}
+
+	if (!gelf_getehdr(*elf, &ehdr)) {
+		pr_debug("%s : cannot get elf header.\n", __func__);
+		ret = -EBADF;
+		goto out_free_args;
+	}
+
+	/* Adjust the prelink effect :
+	 * Find out the .stapsdt.base section.
+	 * This scn will help us to handle prelinking (if present).
+	 * Compare the retrieved file offset of the base section with the
+	 * base address in the description of the SDT note. If its different,
+	 * then accordingly, adjust the note location.
+	 */
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) {
+		base_off = shdr.sh_offset;
+		if (base_off) {
+			if (tmp->bit32)
+				tmp->addr.a32[0] = tmp->addr.a32[0] + base_off -
+					tmp->addr.a32[1];
+			else
+				tmp->addr.a64[0] = tmp->addr.a64[0] + base_off -
+					tmp->addr.a64[1];
+		}
+	}
+
+	list_add_tail(&tmp->note_list, sdt_notes);
+	return 0;
+
+out_free_args:
+	free(tmp->args);
+out_free_name:
+	free(tmp->name);
+out_free_prov:
+	free(tmp->provider);
+out_free_note:
+	free(tmp);
+out_err:
+	return ret;
+}
+
+/**
+ * construct_sdt_notes_list : constructs a list of SDT notes
+ * @elf : elf to look into
+ * @sdt_notes : empty list_head
+ *
+ * Scans the sections in 'elf' for the section
+ * .note.stapsdt. It, then calls populate_sdt_note to find
+ * out the SDT events and populates the 'sdt_notes'.
+ */
+static int construct_sdt_notes_list(Elf *elf, struct list_head *sdt_notes)
+{
+	GElf_Ehdr ehdr;
+	Elf_Scn *scn = NULL;
+	Elf_Data *data;
+	GElf_Shdr shdr;
+	size_t shstrndx, next;
+	GElf_Nhdr nhdr;
+	size_t name_off, desc_off, offset;
+	int ret = 0;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		ret = -EBADF;
+		goto out_ret;
+	}
+	if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
+		ret = -EBADF;
+		goto out_ret;
+	}
+
+	/* Look for the required section */
+	scn = elf_section_by_name(elf, &ehdr, &shdr, SDT_NOTE_SCN, NULL);
+	if (!scn) {
+		ret = -ENOENT;
+		goto out_ret;
+	}
+
+	if ((shdr.sh_type != SHT_NOTE) || (shdr.sh_flags & SHF_ALLOC)) {
+		ret = -ENOENT;
+		goto out_ret;
+	}
+
+	data = elf_getdata(scn, NULL);
+
+	/* Get the SDT notes */
+	for (offset = 0; (next = gelf_getnote(data, offset, &nhdr, &name_off,
+					      &desc_off)) > 0; offset = next) {
+		if (nhdr.n_namesz == sizeof(SDT_NOTE_NAME) &&
+		    !memcmp(data->d_buf + name_off, SDT_NOTE_NAME,
+			    sizeof(SDT_NOTE_NAME))) {
+			/* Check the type of the note */
+			if (nhdr.n_type != SDT_NOTE_TYPE)
+				goto out_ret;
+
+			ret = populate_sdt_note(&elf, ((data->d_buf) + desc_off),
+						nhdr.n_descsz, sdt_notes);
+			if (ret < 0)
+				goto out_ret;
+		}
+	}
+	if (list_empty(sdt_notes))
+		ret = -ENOENT;
+
+out_ret:
+	return ret;
+}
+
+/**
+ * get_sdt_note_list : Wrapper to construct a list of sdt notes
+ * @head : empty list_head
+ * @target : file to find SDT notes from
+ *
+ * This opens the file, initializes
+ * the ELF and then calls construct_sdt_notes_list.
+ */
+int get_sdt_note_list(struct list_head *head, const char *target)
+{
+	Elf *elf;
+	int fd, ret;
+
+	fd = open(target, O_RDONLY);
+	if (fd < 0)
+		return -EBADF;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (!elf) {
+		ret = -EBADF;
+		goto out_close;
+	}
+	ret = construct_sdt_notes_list(elf, head);
+	elf_end(elf);
+out_close:
+	close(fd);
+	return ret;
+}
+
+/**
+ * cleanup_sdt_note_list : free the sdt notes' list
+ * @sdt_notes: sdt notes' list
+ *
+ * Free up the SDT notes in @sdt_notes.
+ * Returns the number of SDT notes free'd.
+ */
+int cleanup_sdt_note_list(struct list_head *sdt_notes)
+{
+	struct sdt_note *tmp, *pos;
+	int nr_free = 0;
+
+	list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
+		list_del(&pos->note_list);
+		free(pos->name);
+		free(pos->provider);
+		free(pos);
+		nr_free++;
+	}
+	return nr_free;
+}
+
+/**
+ * sdt_notes__get_count: Counts the number of sdt events
+ * @start: list_head to sdt_notes list
+ *
+ * Returns the number of SDT notes in a list
+ */
+int sdt_notes__get_count(struct list_head *start)
+{
+	struct sdt_note *sdt_ptr;
+	int count = 0;
+
+	list_for_each_entry(sdt_ptr, start, note_list)
+		count++;
+	return count;
+}
+#endif
+
+void symbol__elf_init(void)
+{
+	elf_version(EV_CURRENT);
+}
diff --git a/util/symbol-minimal.c b/util/symbol-minimal.c
new file mode 100644
index 0000000..ff48d0d
--- /dev/null
+++ b/util/symbol-minimal.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "symbol.h"
+#include "util.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+
+
+static bool check_need_swap(int file_endian)
+{
+	const int data = 1;
+	u8 *check = (u8 *)&data;
+	int host_endian;
+
+	if (check[0] == 1)
+		host_endian = ELFDATA2LSB;
+	else
+		host_endian = ELFDATA2MSB;
+
+	return host_endian != file_endian;
+}
+
+#define NOTE_ALIGN(sz) (((sz) + 3) & ~3)
+
+#define NT_GNU_BUILD_ID	3
+
+static int read_build_id(void *note_data, size_t note_len, void *bf,
+			 size_t size, bool need_swap)
+{
+	struct {
+		u32 n_namesz;
+		u32 n_descsz;
+		u32 n_type;
+	} *nhdr;
+	void *ptr;
+
+	ptr = note_data;
+	while (ptr < (note_data + note_len)) {
+		const char *name;
+		size_t namesz, descsz;
+
+		nhdr = ptr;
+		if (need_swap) {
+			nhdr->n_namesz = bswap_32(nhdr->n_namesz);
+			nhdr->n_descsz = bswap_32(nhdr->n_descsz);
+			nhdr->n_type = bswap_32(nhdr->n_type);
+		}
+
+		namesz = NOTE_ALIGN(nhdr->n_namesz);
+		descsz = NOTE_ALIGN(nhdr->n_descsz);
+
+		ptr += sizeof(*nhdr);
+		name = ptr;
+		ptr += namesz;
+		if (nhdr->n_type == NT_GNU_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU")) {
+			if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+				size_t sz = min(size, descsz);
+				memcpy(bf, ptr, sz);
+				memset(bf + sz, 0, size - sz);
+				return 0;
+			}
+		}
+		ptr += descsz;
+	}
+
+	return -1;
+}
+
+int filename__read_debuglink(const char *filename __maybe_unused,
+			     char *debuglink __maybe_unused,
+			     size_t size __maybe_unused)
+{
+	return -1;
+}
+
+/*
+ * Just try PT_NOTE header otherwise fails
+ */
+int filename__read_build_id(const char *filename, void *bf, size_t size)
+{
+	FILE *fp;
+	int ret = -1;
+	bool need_swap = false;
+	u8 e_ident[EI_NIDENT];
+	size_t buf_size;
+	void *buf;
+	int i;
+
+	fp = fopen(filename, "r");
+	if (fp == NULL)
+		return -1;
+
+	if (fread(e_ident, sizeof(e_ident), 1, fp) != 1)
+		goto out;
+
+	if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+	    e_ident[EI_VERSION] != EV_CURRENT)
+		goto out;
+
+	need_swap = check_need_swap(e_ident[EI_DATA]);
+
+	/* for simplicity */
+	fseek(fp, 0, SEEK_SET);
+
+	if (e_ident[EI_CLASS] == ELFCLASS32) {
+		Elf32_Ehdr ehdr;
+		Elf32_Phdr *phdr;
+
+		if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+			goto out;
+
+		if (need_swap) {
+			ehdr.e_phoff = bswap_32(ehdr.e_phoff);
+			ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+			ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+		}
+
+		buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+		buf = malloc(buf_size);
+		if (buf == NULL)
+			goto out;
+
+		fseek(fp, ehdr.e_phoff, SEEK_SET);
+		if (fread(buf, buf_size, 1, fp) != 1)
+			goto out_free;
+
+		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+			void *tmp;
+			long offset;
+
+			if (need_swap) {
+				phdr->p_type = bswap_32(phdr->p_type);
+				phdr->p_offset = bswap_32(phdr->p_offset);
+				phdr->p_filesz = bswap_32(phdr->p_filesz);
+			}
+
+			if (phdr->p_type != PT_NOTE)
+				continue;
+
+			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
+			tmp = realloc(buf, buf_size);
+			if (tmp == NULL)
+				goto out_free;
+
+			buf = tmp;
+			fseek(fp, offset, SEEK_SET);
+			if (fread(buf, buf_size, 1, fp) != 1)
+				goto out_free;
+
+			ret = read_build_id(buf, buf_size, bf, size, need_swap);
+			if (ret == 0)
+				ret = size;
+			break;
+		}
+	} else {
+		Elf64_Ehdr ehdr;
+		Elf64_Phdr *phdr;
+
+		if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+			goto out;
+
+		if (need_swap) {
+			ehdr.e_phoff = bswap_64(ehdr.e_phoff);
+			ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+			ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+		}
+
+		buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+		buf = malloc(buf_size);
+		if (buf == NULL)
+			goto out;
+
+		fseek(fp, ehdr.e_phoff, SEEK_SET);
+		if (fread(buf, buf_size, 1, fp) != 1)
+			goto out_free;
+
+		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+			void *tmp;
+			long offset;
+
+			if (need_swap) {
+				phdr->p_type = bswap_32(phdr->p_type);
+				phdr->p_offset = bswap_64(phdr->p_offset);
+				phdr->p_filesz = bswap_64(phdr->p_filesz);
+			}
+
+			if (phdr->p_type != PT_NOTE)
+				continue;
+
+			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
+			tmp = realloc(buf, buf_size);
+			if (tmp == NULL)
+				goto out_free;
+
+			buf = tmp;
+			fseek(fp, offset, SEEK_SET);
+			if (fread(buf, buf_size, 1, fp) != 1)
+				goto out_free;
+
+			ret = read_build_id(buf, buf_size, bf, size, need_swap);
+			if (ret == 0)
+				ret = size;
+			break;
+		}
+	}
+out_free:
+	free(buf);
+out:
+	fclose(fp);
+	return ret;
+}
+
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+{
+	int fd;
+	int ret = -1;
+	struct stat stbuf;
+	size_t buf_size;
+	void *buf;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	if (fstat(fd, &stbuf) < 0)
+		goto out;
+
+	buf_size = stbuf.st_size;
+	buf = malloc(buf_size);
+	if (buf == NULL)
+		goto out;
+
+	if (read(fd, buf, buf_size) != (ssize_t) buf_size)
+		goto out_free;
+
+	ret = read_build_id(buf, buf_size, build_id, size, false);
+out_free:
+	free(buf);
+out:
+	close(fd);
+	return ret;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+	         enum dso_binary_type type)
+{
+	int fd = open(name, O_RDONLY);
+	if (fd < 0)
+		goto out_errno;
+
+	ss->name = strdup(name);
+	if (!ss->name)
+		goto out_close;
+
+	ss->fd = fd;
+	ss->type = type;
+
+	return 0;
+out_close:
+	close(fd);
+out_errno:
+	dso->load_errno = errno;
+	return -1;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused)
+{
+	/* Assume all sym sources could be a runtime image. */
+	return true;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss __maybe_unused)
+{
+	return false;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+	zfree(&ss->name);
+	close(ss->fd);
+}
+
+int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
+				struct symsrc *ss __maybe_unused,
+				struct map *map __maybe_unused)
+{
+	return 0;
+}
+
+static int fd__is_64_bit(int fd)
+{
+	u8 e_ident[EI_NIDENT];
+
+	if (lseek(fd, 0, SEEK_SET))
+		return -1;
+
+	if (readn(fd, e_ident, sizeof(e_ident)) != sizeof(e_ident))
+		return -1;
+
+	if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+	    e_ident[EI_VERSION] != EV_CURRENT)
+		return -1;
+
+	return e_ident[EI_CLASS] == ELFCLASS64;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+	Elf64_Ehdr ehdr;
+	int ret;
+
+	ret = fd__is_64_bit(fd);
+	if (ret < 0)
+		return DSO__TYPE_UNKNOWN;
+
+	if (ret)
+		return DSO__TYPE_64BIT;
+
+	if (readn(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
+		return DSO__TYPE_UNKNOWN;
+
+	if (ehdr.e_machine == EM_X86_64)
+		return DSO__TYPE_X32BIT;
+
+	return DSO__TYPE_32BIT;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
+		  struct symsrc *ss,
+		  struct symsrc *runtime_ss __maybe_unused,
+		  int kmodule __maybe_unused)
+{
+	unsigned char build_id[BUILD_ID_SIZE];
+	int ret;
+
+	ret = fd__is_64_bit(ss->fd);
+	if (ret >= 0)
+		dso->is_64_bit = ret;
+
+	if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) {
+		dso__set_build_id(dso, build_id);
+	}
+	return 0;
+}
+
+int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
+		    mapfn_t mapfn __maybe_unused, void *data __maybe_unused,
+		    bool *is_64_bit __maybe_unused)
+{
+	return -1;
+}
+
+int kcore_extract__create(struct kcore_extract *kce __maybe_unused)
+{
+	return -1;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce __maybe_unused)
+{
+}
+
+int kcore_copy(const char *from_dir __maybe_unused,
+	       const char *to_dir __maybe_unused)
+{
+	return -1;
+}
+
+void symbol__elf_init(void)
+{
+}
+
+char *dso__demangle_sym(struct dso *dso __maybe_unused,
+			int kmodule __maybe_unused,
+			const char *elf_name __maybe_unused)
+{
+	return NULL;
+}
diff --git a/util/symbol.c b/util/symbol.c
new file mode 100644
index 0000000..1466814
--- /dev/null
+++ b/util/symbol.c
@@ -0,0 +1,2243 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include "annotate.h"
+#include "build-id.h"
+#include "util.h"
+#include "debug.h"
+#include "machine.h"
+#include "symbol.h"
+#include "strlist.h"
+#include "intlist.h"
+#include "namespaces.h"
+#include "header.h"
+#include "path.h"
+#include "sane_ctype.h"
+
+#include <elf.h>
+#include <limits.h>
+#include <symbol/kallsyms.h>
+#include <sys/utsname.h>
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
+static bool symbol__is_idle(const char *name);
+
+int vmlinux_path__nr_entries;
+char **vmlinux_path;
+
+struct symbol_conf symbol_conf = {
+	.use_modules		= true,
+	.try_vmlinux_path	= true,
+	.annotate_src		= true,
+	.demangle		= true,
+	.demangle_kernel	= false,
+	.cumulate_callchain	= true,
+	.show_hist_headers	= true,
+	.symfs			= "",
+	.event_group		= true,
+	.inline_name		= true,
+};
+
+static enum dso_binary_type binary_type_symtab[] = {
+	DSO_BINARY_TYPE__KALLSYMS,
+	DSO_BINARY_TYPE__GUEST_KALLSYMS,
+	DSO_BINARY_TYPE__JAVA_JIT,
+	DSO_BINARY_TYPE__DEBUGLINK,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE,
+	DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+	DSO_BINARY_TYPE__GUEST_KMODULE,
+	DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
+
+bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+{
+	symbol_type = toupper(symbol_type);
+
+	switch (map_type) {
+	case MAP__FUNCTION:
+		return symbol_type == 'T' || symbol_type == 'W';
+	case MAP__VARIABLE:
+		return symbol_type == 'D';
+	default:
+		return false;
+	}
+}
+
+static int prefix_underscores_count(const char *str)
+{
+	const char *tail = str;
+
+	while (*tail == '_')
+		tail++;
+
+	return tail - str;
+}
+
+const char * __weak arch__normalize_symbol_name(const char *name)
+{
+	return name;
+}
+
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	return strcmp(namea, nameb);
+}
+
+int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb,
+					unsigned int n)
+{
+	return strncmp(namea, nameb, n);
+}
+
+int __weak arch__choose_best_symbol(struct symbol *syma,
+				    struct symbol *symb __maybe_unused)
+{
+	/* Avoid "SyS" kernel syscall aliases */
+	if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+		return SYMBOL_B;
+	if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
+}
+
+static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
+{
+	s64 a;
+	s64 b;
+	size_t na, nb;
+
+	/* Prefer a symbol with non zero length */
+	a = syma->end - syma->start;
+	b = symb->end - symb->start;
+	if ((b == 0) && (a > 0))
+		return SYMBOL_A;
+	else if ((a == 0) && (b > 0))
+		return SYMBOL_B;
+
+	/* Prefer a non weak symbol over a weak one */
+	a = syma->binding == STB_WEAK;
+	b = symb->binding == STB_WEAK;
+	if (b && !a)
+		return SYMBOL_A;
+	if (a && !b)
+		return SYMBOL_B;
+
+	/* Prefer a global symbol over a non global one */
+	a = syma->binding == STB_GLOBAL;
+	b = symb->binding == STB_GLOBAL;
+	if (a && !b)
+		return SYMBOL_A;
+	if (b && !a)
+		return SYMBOL_B;
+
+	/* Prefer a symbol with less underscores */
+	a = prefix_underscores_count(syma->name);
+	b = prefix_underscores_count(symb->name);
+	if (b > a)
+		return SYMBOL_A;
+	else if (a > b)
+		return SYMBOL_B;
+
+	/* Choose the symbol with the longest name */
+	na = strlen(syma->name);
+	nb = strlen(symb->name);
+	if (na > nb)
+		return SYMBOL_A;
+	else if (na < nb)
+		return SYMBOL_B;
+
+	return arch__choose_best_symbol(syma, symb);
+}
+
+void symbols__fixup_duplicate(struct rb_root *symbols)
+{
+	struct rb_node *nd;
+	struct symbol *curr, *next;
+
+	if (symbol_conf.allow_aliases)
+		return;
+
+	nd = rb_first(symbols);
+
+	while (nd) {
+		curr = rb_entry(nd, struct symbol, rb_node);
+again:
+		nd = rb_next(&curr->rb_node);
+		next = rb_entry(nd, struct symbol, rb_node);
+
+		if (!nd)
+			break;
+
+		if (curr->start != next->start)
+			continue;
+
+		if (choose_best_symbol(curr, next) == SYMBOL_A) {
+			rb_erase(&next->rb_node, symbols);
+			symbol__delete(next);
+			goto again;
+		} else {
+			nd = rb_next(&curr->rb_node);
+			rb_erase(&curr->rb_node, symbols);
+			symbol__delete(curr);
+		}
+	}
+}
+
+void symbols__fixup_end(struct rb_root *symbols)
+{
+	struct rb_node *nd, *prevnd = rb_first(symbols);
+	struct symbol *curr, *prev;
+
+	if (prevnd == NULL)
+		return;
+
+	curr = rb_entry(prevnd, struct symbol, rb_node);
+
+	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+		prev = curr;
+		curr = rb_entry(nd, struct symbol, rb_node);
+
+		if (prev->end == prev->start && prev->end != curr->start)
+			prev->end = curr->start;
+	}
+
+	/* Last entry */
+	if (curr->end == curr->start)
+		curr->end = roundup(curr->start, 4096) + 4096;
+}
+
+void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
+{
+	struct maps *maps = &mg->maps[type];
+	struct map *next, *curr;
+
+	down_write(&maps->lock);
+
+	curr = maps__first(maps);
+	if (curr == NULL)
+		goto out_unlock;
+
+	for (next = map__next(curr); next; next = map__next(curr)) {
+		if (!curr->end)
+			curr->end = next->start;
+		curr = next;
+	}
+
+	/*
+	 * We still haven't the actual symbols, so guess the
+	 * last map final address.
+	 */
+	if (!curr->end)
+		curr->end = ~0ULL;
+
+out_unlock:
+	up_write(&maps->lock);
+}
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
+{
+	size_t namelen = strlen(name) + 1;
+	struct symbol *sym = calloc(1, (symbol_conf.priv_size +
+					sizeof(*sym) + namelen));
+	if (sym == NULL)
+		return NULL;
+
+	if (symbol_conf.priv_size) {
+		if (symbol_conf.init_annotation) {
+			struct annotation *notes = (void *)sym;
+			pthread_mutex_init(&notes->lock, NULL);
+		}
+		sym = ((void *)sym) + symbol_conf.priv_size;
+	}
+
+	sym->start   = start;
+	sym->end     = len ? start + len : start;
+	sym->binding = binding;
+	sym->namelen = namelen - 1;
+
+	pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n",
+		  __func__, name, start, sym->end);
+	memcpy(sym->name, name, namelen);
+
+	return sym;
+}
+
+void symbol__delete(struct symbol *sym)
+{
+	free(((void *)sym) - symbol_conf.priv_size);
+}
+
+void symbols__delete(struct rb_root *symbols)
+{
+	struct symbol *pos;
+	struct rb_node *next = rb_first(symbols);
+
+	while (next) {
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+		rb_erase(&pos->rb_node, symbols);
+		symbol__delete(pos);
+	}
+}
+
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
+{
+	struct rb_node **p = &symbols->rb_node;
+	struct rb_node *parent = NULL;
+	const u64 ip = sym->start;
+	struct symbol *s;
+
+	if (kernel) {
+		const char *name = sym->name;
+		/*
+		 * ppc64 uses function descriptors and appends a '.' to the
+		 * start of every instruction address. Remove it.
+		 */
+		if (name[0] == '.')
+			name++;
+		sym->idle = symbol__is_idle(name);
+	}
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol, rb_node);
+		if (ip < s->start)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&sym->rb_node, parent, p);
+	rb_insert_color(&sym->rb_node, symbols);
+}
+
+void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+{
+	__symbols__insert(symbols, sym, false);
+}
+
+static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
+{
+	struct rb_node *n;
+
+	if (symbols == NULL)
+		return NULL;
+
+	n = symbols->rb_node;
+
+	while (n) {
+		struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+		if (ip < s->start)
+			n = n->rb_left;
+		else if (ip > s->end || (ip == s->end && ip != s->start))
+			n = n->rb_right;
+		else
+			return s;
+	}
+
+	return NULL;
+}
+
+static struct symbol *symbols__first(struct rb_root *symbols)
+{
+	struct rb_node *n = rb_first(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
+static struct symbol *symbols__last(struct rb_root *symbols)
+{
+	struct rb_node *n = rb_last(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
+static struct symbol *symbols__next(struct symbol *sym)
+{
+	struct rb_node *n = rb_next(&sym->rb_node);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
+static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
+{
+	struct rb_node **p = &symbols->rb_node;
+	struct rb_node *parent = NULL;
+	struct symbol_name_rb_node *symn, *s;
+
+	symn = container_of(sym, struct symbol_name_rb_node, sym);
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
+		if (strcmp(sym->name, s->sym.name) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&symn->rb_node, parent, p);
+	rb_insert_color(&symn->rb_node, symbols);
+}
+
+static void symbols__sort_by_name(struct rb_root *symbols,
+				  struct rb_root *source)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(source); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		symbols__insert_by_name(symbols, pos);
+	}
+}
+
+int symbol__match_symbol_name(const char *name, const char *str,
+			      enum symbol_tag_include includes)
+{
+	const char *versioning;
+
+	if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY &&
+	    (versioning = strstr(name, "@@"))) {
+		int len = strlen(str);
+
+		if (len < versioning - name)
+			len = versioning - name;
+
+		return arch__compare_symbol_names_n(name, str, len);
+	} else
+		return arch__compare_symbol_names(name, str);
+}
+
+static struct symbol *symbols__find_by_name(struct rb_root *symbols,
+					    const char *name,
+					    enum symbol_tag_include includes)
+{
+	struct rb_node *n;
+	struct symbol_name_rb_node *s = NULL;
+
+	if (symbols == NULL)
+		return NULL;
+
+	n = symbols->rb_node;
+
+	while (n) {
+		int cmp;
+
+		s = rb_entry(n, struct symbol_name_rb_node, rb_node);
+		cmp = symbol__match_symbol_name(s->sym.name, name, includes);
+
+		if (cmp > 0)
+			n = n->rb_left;
+		else if (cmp < 0)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	if (n == NULL)
+		return NULL;
+
+	if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY)
+		/* return first symbol that has same name (if any) */
+		for (n = rb_prev(n); n; n = rb_prev(n)) {
+			struct symbol_name_rb_node *tmp;
+
+			tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
+			if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
+				break;
+
+			s = tmp;
+		}
+
+	return &s->sym;
+}
+
+void dso__reset_find_symbol_cache(struct dso *dso)
+{
+	enum map_type type;
+
+	for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
+		dso->last_find_result[type].addr   = 0;
+		dso->last_find_result[type].symbol = NULL;
+	}
+}
+
+void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
+{
+	__symbols__insert(&dso->symbols[type], sym, dso->kernel);
+
+	/* update the symbol cache if necessary */
+	if (dso->last_find_result[type].addr >= sym->start &&
+	    (dso->last_find_result[type].addr < sym->end ||
+	    sym->start == sym->end)) {
+		dso->last_find_result[type].symbol = sym;
+	}
+}
+
+struct symbol *dso__find_symbol(struct dso *dso,
+				enum map_type type, u64 addr)
+{
+	if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) {
+		dso->last_find_result[type].addr   = addr;
+		dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
+	}
+
+	return dso->last_find_result[type].symbol;
+}
+
+struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
+{
+	return symbols__first(&dso->symbols[type]);
+}
+
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+{
+	return symbols__last(&dso->symbols[type]);
+}
+
+struct symbol *dso__next_symbol(struct symbol *sym)
+{
+	return symbols__next(sym);
+}
+
+struct symbol *symbol__next_by_name(struct symbol *sym)
+{
+	struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym);
+	struct rb_node *n = rb_next(&s->rb_node);
+
+	return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL;
+}
+
+ /*
+  * Teturns first symbol that matched with @name.
+  */
+struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
+					const char *name)
+{
+	struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name,
+						 SYMBOL_TAG_INCLUDE__NONE);
+	if (!s)
+		s = symbols__find_by_name(&dso->symbol_names[type], name,
+					  SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
+	return s;
+}
+
+void dso__sort_by_name(struct dso *dso, enum map_type type)
+{
+	dso__set_sorted_by_name(dso, type);
+	return symbols__sort_by_name(&dso->symbol_names[type],
+				     &dso->symbols[type]);
+}
+
+int modules__parse(const char *filename, void *arg,
+		   int (*process_module)(void *arg, const char *name,
+					 u64 start, u64 size))
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int err = 0;
+
+	file = fopen(filename, "r");
+	if (file == NULL)
+		return -1;
+
+	while (1) {
+		char name[PATH_MAX];
+		u64 start, size;
+		char *sep, *endptr;
+		ssize_t line_len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0) {
+			if (feof(file))
+				break;
+			err = -1;
+			goto out;
+		}
+
+		if (!line) {
+			err = -1;
+			goto out;
+		}
+
+		line[--line_len] = '\0'; /* \n */
+
+		sep = strrchr(line, 'x');
+		if (sep == NULL)
+			continue;
+
+		hex2u64(sep + 1, &start);
+
+		sep = strchr(line, ' ');
+		if (sep == NULL)
+			continue;
+
+		*sep = '\0';
+
+		scnprintf(name, sizeof(name), "[%s]", line);
+
+		size = strtoul(sep + 1, &endptr, 0);
+		if (*endptr != ' ' && *endptr != '\t')
+			continue;
+
+		err = process_module(arg, name, start, size);
+		if (err)
+			break;
+	}
+out:
+	free(line);
+	fclose(file);
+	return err;
+}
+
+struct process_kallsyms_args {
+	struct map *map;
+	struct dso *dso;
+};
+
+/*
+ * These are symbols in the kernel image, so make sure that
+ * sym is from a kernel DSO.
+ */
+static bool symbol__is_idle(const char *name)
+{
+	const char * const idle_symbols[] = {
+		"cpu_idle",
+		"cpu_startup_entry",
+		"intel_idle",
+		"default_idle",
+		"native_safe_halt",
+		"enter_idle",
+		"exit_idle",
+		"mwait_idle",
+		"mwait_idle_with_hints",
+		"poll_idle",
+		"ppc64_runlatch_off",
+		"pseries_dedicated_idle_sleep",
+		NULL
+	};
+	int i;
+
+	for (i = 0; idle_symbols[i]; i++) {
+		if (!strcmp(idle_symbols[i], name))
+			return true;
+	}
+
+	return false;
+}
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+				       char type, u64 start)
+{
+	struct symbol *sym;
+	struct process_kallsyms_args *a = arg;
+	struct rb_root *root = &a->dso->symbols[a->map->type];
+
+	if (!symbol_type__is_a(type, a->map->type))
+		return 0;
+
+	/*
+	 * module symbols are not sorted so we add all
+	 * symbols, setting length to 0, and rely on
+	 * symbols__fixup_end() to fix it up.
+	 */
+	sym = symbol__new(start, 0, kallsyms2elf_binding(type), name);
+	if (sym == NULL)
+		return -ENOMEM;
+	/*
+	 * We will pass the symbols to the filter later, in
+	 * map__split_kallsyms, when we have split the maps per module
+	 */
+	__symbols__insert(root, sym, !strchr(name, '['));
+
+	return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
+				  struct map *map)
+{
+	struct process_kallsyms_args args = { .map = map, .dso = dso, };
+	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
+}
+
+static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
+{
+	struct map_groups *kmaps = map__kmaps(map);
+	struct map *curr_map;
+	struct symbol *pos;
+	int count = 0;
+	struct rb_root old_root = dso->symbols[map->type];
+	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_node *next = rb_first(root);
+
+	if (!kmaps)
+		return -1;
+
+	*root = RB_ROOT;
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		rb_erase_init(&pos->rb_node, &old_root);
+
+		module = strchr(pos->name, '\t');
+		if (module)
+			*module = '\0';
+
+		curr_map = map_groups__find(kmaps, map->type, pos->start);
+
+		if (!curr_map) {
+			symbol__delete(pos);
+			continue;
+		}
+
+		pos->start -= curr_map->start - curr_map->pgoff;
+		if (pos->end)
+			pos->end -= curr_map->start - curr_map->pgoff;
+		symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+		++count;
+	}
+
+	/* Symbols have been adjusted */
+	dso->adjust_symbols = 1;
+
+	return count;
+}
+
+/*
+ * Split the symbols into maps, making sure there are no overlaps, i.e. the
+ * kernel range is broken in several maps, named [kernel].N, as we don't have
+ * the original ELF section names vmlinux have.
+ */
+static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
+{
+	struct map_groups *kmaps = map__kmaps(map);
+	struct machine *machine;
+	struct map *curr_map = map;
+	struct symbol *pos;
+	int count = 0, moved = 0;
+	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_node *next = rb_first(root);
+	int kernel_range = 0;
+
+	if (!kmaps)
+		return -1;
+
+	machine = kmaps->machine;
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		module = strchr(pos->name, '\t');
+		if (module) {
+			if (!symbol_conf.use_modules)
+				goto discard_symbol;
+
+			*module++ = '\0';
+
+			if (strcmp(curr_map->dso->short_name, module)) {
+				if (curr_map != map &&
+				    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+				    machine__is_default_guest(machine)) {
+					/*
+					 * We assume all symbols of a module are
+					 * continuous in * kallsyms, so curr_map
+					 * points to a module and all its
+					 * symbols are in its kmap. Mark it as
+					 * loaded.
+					 */
+					dso__set_loaded(curr_map->dso,
+							curr_map->type);
+				}
+
+				curr_map = map_groups__find_by_name(kmaps,
+							map->type, module);
+				if (curr_map == NULL) {
+					pr_debug("%s/proc/{kallsyms,modules} "
+					         "inconsistency while looking "
+						 "for \"%s\" module!\n",
+						 machine->root_dir, module);
+					curr_map = map;
+					goto discard_symbol;
+				}
+
+				if (curr_map->dso->loaded &&
+				    !machine__is_default_guest(machine))
+					goto discard_symbol;
+			}
+			/*
+			 * So that we look just like we get from .ko files,
+			 * i.e. not prelinked, relative to map->start.
+			 */
+			pos->start = curr_map->map_ip(curr_map, pos->start);
+			pos->end   = curr_map->map_ip(curr_map, pos->end);
+		} else if (curr_map != map) {
+			char dso_name[PATH_MAX];
+			struct dso *ndso;
+
+			if (delta) {
+				/* Kernel was relocated at boot time */
+				pos->start -= delta;
+				pos->end -= delta;
+			}
+
+			if (count == 0) {
+				curr_map = map;
+				goto add_symbol;
+			}
+
+			if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+				snprintf(dso_name, sizeof(dso_name),
+					"[guest.kernel].%d",
+					kernel_range++);
+			else
+				snprintf(dso_name, sizeof(dso_name),
+					"[kernel].%d",
+					kernel_range++);
+
+			ndso = dso__new(dso_name);
+			if (ndso == NULL)
+				return -1;
+
+			ndso->kernel = dso->kernel;
+
+			curr_map = map__new2(pos->start, ndso, map->type);
+			if (curr_map == NULL) {
+				dso__put(ndso);
+				return -1;
+			}
+
+			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+			map_groups__insert(kmaps, curr_map);
+			++kernel_range;
+		} else if (delta) {
+			/* Kernel was relocated at boot time */
+			pos->start -= delta;
+			pos->end -= delta;
+		}
+add_symbol:
+		if (curr_map != map) {
+			rb_erase(&pos->rb_node, root);
+			symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+			++moved;
+		} else
+			++count;
+
+		continue;
+discard_symbol:
+		rb_erase(&pos->rb_node, root);
+		symbol__delete(pos);
+	}
+
+	if (curr_map != map &&
+	    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+	    machine__is_default_guest(kmaps->machine)) {
+		dso__set_loaded(curr_map->dso, curr_map->type);
+	}
+
+	return count + moved;
+}
+
+bool symbol__restricted_filename(const char *filename,
+				 const char *restricted_filename)
+{
+	bool restricted = false;
+
+	if (symbol_conf.kptr_restrict) {
+		char *r = realpath(filename, NULL);
+
+		if (r != NULL) {
+			restricted = strcmp(r, restricted_filename) == 0;
+			free(r);
+			return restricted;
+		}
+	}
+
+	return restricted;
+}
+
+struct module_info {
+	struct rb_node rb_node;
+	char *name;
+	u64 start;
+};
+
+static void add_module(struct module_info *mi, struct rb_root *modules)
+{
+	struct rb_node **p = &modules->rb_node;
+	struct rb_node *parent = NULL;
+	struct module_info *m;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct module_info, rb_node);
+		if (strcmp(mi->name, m->name) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&mi->rb_node, parent, p);
+	rb_insert_color(&mi->rb_node, modules);
+}
+
+static void delete_modules(struct rb_root *modules)
+{
+	struct module_info *mi;
+	struct rb_node *next = rb_first(modules);
+
+	while (next) {
+		mi = rb_entry(next, struct module_info, rb_node);
+		next = rb_next(&mi->rb_node);
+		rb_erase(&mi->rb_node, modules);
+		zfree(&mi->name);
+		free(mi);
+	}
+}
+
+static struct module_info *find_module(const char *name,
+				       struct rb_root *modules)
+{
+	struct rb_node *n = modules->rb_node;
+
+	while (n) {
+		struct module_info *m;
+		int cmp;
+
+		m = rb_entry(n, struct module_info, rb_node);
+		cmp = strcmp(name, m->name);
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return m;
+	}
+
+	return NULL;
+}
+
+static int __read_proc_modules(void *arg, const char *name, u64 start,
+			       u64 size __maybe_unused)
+{
+	struct rb_root *modules = arg;
+	struct module_info *mi;
+
+	mi = zalloc(sizeof(struct module_info));
+	if (!mi)
+		return -ENOMEM;
+
+	mi->name = strdup(name);
+	mi->start = start;
+
+	if (!mi->name) {
+		free(mi);
+		return -ENOMEM;
+	}
+
+	add_module(mi, modules);
+
+	return 0;
+}
+
+static int read_proc_modules(const char *filename, struct rb_root *modules)
+{
+	if (symbol__restricted_filename(filename, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(filename, modules, __read_proc_modules)) {
+		delete_modules(modules);
+		return -1;
+	}
+
+	return 0;
+}
+
+int compare_proc_modules(const char *from, const char *to)
+{
+	struct rb_root from_modules = RB_ROOT;
+	struct rb_root to_modules = RB_ROOT;
+	struct rb_node *from_node, *to_node;
+	struct module_info *from_m, *to_m;
+	int ret = -1;
+
+	if (read_proc_modules(from, &from_modules))
+		return -1;
+
+	if (read_proc_modules(to, &to_modules))
+		goto out_delete_from;
+
+	from_node = rb_first(&from_modules);
+	to_node = rb_first(&to_modules);
+	while (from_node) {
+		if (!to_node)
+			break;
+
+		from_m = rb_entry(from_node, struct module_info, rb_node);
+		to_m = rb_entry(to_node, struct module_info, rb_node);
+
+		if (from_m->start != to_m->start ||
+		    strcmp(from_m->name, to_m->name))
+			break;
+
+		from_node = rb_next(from_node);
+		to_node = rb_next(to_node);
+	}
+
+	if (!from_node && !to_node)
+		ret = 0;
+
+	delete_modules(&to_modules);
+out_delete_from:
+	delete_modules(&from_modules);
+
+	return ret;
+}
+
+static int do_validate_kcore_modules(const char *filename, struct map *map,
+				  struct map_groups *kmaps)
+{
+	struct rb_root modules = RB_ROOT;
+	struct map *old_map;
+	int err;
+
+	err = read_proc_modules(filename, &modules);
+	if (err)
+		return err;
+
+	old_map = map_groups__first(kmaps, map->type);
+	while (old_map) {
+		struct map *next = map_groups__next(old_map);
+		struct module_info *mi;
+
+		if (old_map == map || old_map->start == map->start) {
+			/* The kernel map */
+			old_map = next;
+			continue;
+		}
+
+		/* Module must be in memory at the same address */
+		mi = find_module(old_map->dso->short_name, &modules);
+		if (!mi || mi->start != old_map->start) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		old_map = next;
+	}
+out:
+	delete_modules(&modules);
+	return err;
+}
+
+/*
+ * If kallsyms is referenced by name then we look for filename in the same
+ * directory.
+ */
+static bool filename_from_kallsyms_filename(char *filename,
+					    const char *base_name,
+					    const char *kallsyms_filename)
+{
+	char *name;
+
+	strcpy(filename, kallsyms_filename);
+	name = strrchr(filename, '/');
+	if (!name)
+		return false;
+
+	name += 1;
+
+	if (!strcmp(name, "kallsyms")) {
+		strcpy(name, base_name);
+		return true;
+	}
+
+	return false;
+}
+
+static int validate_kcore_modules(const char *kallsyms_filename,
+				  struct map *map)
+{
+	struct map_groups *kmaps = map__kmaps(map);
+	char modules_filename[PATH_MAX];
+
+	if (!kmaps)
+		return -EINVAL;
+
+	if (!filename_from_kallsyms_filename(modules_filename, "modules",
+					     kallsyms_filename))
+		return -EINVAL;
+
+	if (do_validate_kcore_modules(modules_filename, map, kmaps))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int validate_kcore_addresses(const char *kallsyms_filename,
+				    struct map *map)
+{
+	struct kmap *kmap = map__kmap(map);
+
+	if (!kmap)
+		return -EINVAL;
+
+	if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
+		u64 start;
+
+		if (kallsyms__get_function_start(kallsyms_filename,
+						 kmap->ref_reloc_sym->name, &start))
+			return -ENOENT;
+		if (start != kmap->ref_reloc_sym->addr)
+			return -EINVAL;
+	}
+
+	return validate_kcore_modules(kallsyms_filename, map);
+}
+
+struct kcore_mapfn_data {
+	struct dso *dso;
+	enum map_type type;
+	struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_mapfn_data *md = data;
+	struct map *map;
+
+	map = map__new2(start, md->dso, md->type);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->end = map->start + len;
+	map->pgoff = pgoff;
+
+	list_add(&map->node, &md->maps);
+
+	return 0;
+}
+
+static int dso__load_kcore(struct dso *dso, struct map *map,
+			   const char *kallsyms_filename)
+{
+	struct map_groups *kmaps = map__kmaps(map);
+	struct machine *machine;
+	struct kcore_mapfn_data md;
+	struct map *old_map, *new_map, *replacement_map = NULL;
+	bool is_64_bit;
+	int err, fd;
+	char kcore_filename[PATH_MAX];
+	struct symbol *sym;
+
+	if (!kmaps)
+		return -EINVAL;
+
+	machine = kmaps->machine;
+
+	/* This function requires that the map is the kernel map */
+	if (map != machine->vmlinux_maps[map->type])
+		return -EINVAL;
+
+	if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
+					     kallsyms_filename))
+		return -EINVAL;
+
+	/* Modules and kernel must be present at their original addresses */
+	if (validate_kcore_addresses(kallsyms_filename, map))
+		return -EINVAL;
+
+	md.dso = dso;
+	md.type = map->type;
+	INIT_LIST_HEAD(&md.maps);
+
+	fd = open(kcore_filename, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
+			 kcore_filename);
+		return -EINVAL;
+	}
+
+	/* Read new maps into temporary lists */
+	err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
+			      &is_64_bit);
+	if (err)
+		goto out_err;
+	dso->is_64_bit = is_64_bit;
+
+	if (list_empty(&md.maps)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old maps */
+	old_map = map_groups__first(kmaps, map->type);
+	while (old_map) {
+		struct map *next = map_groups__next(old_map);
+
+		if (old_map != map)
+			map_groups__remove(kmaps, old_map);
+		old_map = next;
+	}
+
+	/* Find the kernel map using the first symbol */
+	sym = dso__first_symbol(dso, map->type);
+	list_for_each_entry(new_map, &md.maps, node) {
+		if (sym && sym->start >= new_map->start &&
+		    sym->start < new_map->end) {
+			replacement_map = new_map;
+			break;
+		}
+	}
+
+	if (!replacement_map)
+		replacement_map = list_entry(md.maps.next, struct map, node);
+
+	/* Add new maps */
+	while (!list_empty(&md.maps)) {
+		new_map = list_entry(md.maps.next, struct map, node);
+		list_del_init(&new_map->node);
+		if (new_map == replacement_map) {
+			map->start	= new_map->start;
+			map->end	= new_map->end;
+			map->pgoff	= new_map->pgoff;
+			map->map_ip	= new_map->map_ip;
+			map->unmap_ip	= new_map->unmap_ip;
+			/* Ensure maps are correctly ordered */
+			map__get(map);
+			map_groups__remove(kmaps, map);
+			map_groups__insert(kmaps, map);
+			map__put(map);
+		} else {
+			map_groups__insert(kmaps, new_map);
+		}
+
+		map__put(new_map);
+	}
+
+	/*
+	 * Set the data type and long name so that kcore can be read via
+	 * dso__data_read_addr().
+	 */
+	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
+	else
+		dso->binary_type = DSO_BINARY_TYPE__KCORE;
+	dso__set_long_name(dso, strdup(kcore_filename), true);
+
+	close(fd);
+
+	if (map->type == MAP__FUNCTION)
+		pr_debug("Using %s for kernel object code\n", kcore_filename);
+	else
+		pr_debug("Using %s for kernel data\n", kcore_filename);
+
+	return 0;
+
+out_err:
+	while (!list_empty(&md.maps)) {
+		map = list_entry(md.maps.next, struct map, node);
+		list_del_init(&map->node);
+		map__put(map);
+	}
+	close(fd);
+	return -EINVAL;
+}
+
+/*
+ * If the kernel is relocated at boot time, kallsyms won't match.  Compute the
+ * delta based on the relocation reference symbol.
+ */
+static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
+{
+	struct kmap *kmap = map__kmap(map);
+	u64 addr;
+
+	if (!kmap)
+		return -1;
+
+	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
+		return 0;
+
+	if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr))
+		return -1;
+
+	*delta = addr - kmap->ref_reloc_sym->addr;
+	return 0;
+}
+
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+			 struct map *map, bool no_kcore)
+{
+	u64 delta = 0;
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return -1;
+
+	if (dso__load_all_kallsyms(dso, filename, map) < 0)
+		return -1;
+
+	if (kallsyms__delta(map, filename, &delta))
+		return -1;
+
+	symbols__fixup_end(&dso->symbols[map->type]);
+	symbols__fixup_duplicate(&dso->symbols[map->type]);
+
+	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
+
+	if (!no_kcore && !dso__load_kcore(dso, map, filename))
+		return dso__split_kallsyms_for_kcore(dso, map);
+	else
+		return dso__split_kallsyms(dso, map, delta);
+}
+
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+		       struct map *map)
+{
+	return __dso__load_kallsyms(dso, filename, map, false);
+}
+
+static int dso__load_perf_map(const char *map_path, struct dso *dso,
+			      struct map *map)
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int nr_syms = 0;
+
+	file = fopen(map_path, "r");
+	if (file == NULL)
+		goto out_failure;
+
+	while (!feof(file)) {
+		u64 start, size;
+		struct symbol *sym;
+		int line_len, len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0)
+			break;
+
+		if (!line)
+			goto out_failure;
+
+		line[--line_len] = '\0'; /* \n */
+
+		len = hex2u64(line, &start);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		len += hex2u64(line + len, &size);
+
+		len++;
+		if (len + 2 >= line_len)
+			continue;
+
+		sym = symbol__new(start, size, STB_GLOBAL, line + len);
+
+		if (sym == NULL)
+			goto out_delete_line;
+
+		symbols__insert(&dso->symbols[map->type], sym);
+		nr_syms++;
+	}
+
+	free(line);
+	fclose(file);
+
+	return nr_syms;
+
+out_delete_line:
+	free(line);
+out_failure:
+	return -1;
+}
+
+static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
+					   enum dso_binary_type type)
+{
+	switch (type) {
+	case DSO_BINARY_TYPE__JAVA_JIT:
+	case DSO_BINARY_TYPE__DEBUGLINK:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+		return !kmod && dso->kernel == DSO_TYPE_USER;
+
+	case DSO_BINARY_TYPE__KALLSYMS:
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__KCORE:
+		return dso->kernel == DSO_TYPE_KERNEL;
+
+	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_KCORE:
+		return dso->kernel == DSO_TYPE_GUEST_KERNEL;
+
+	case DSO_BINARY_TYPE__GUEST_KMODULE:
+	case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+		/*
+		 * kernel modules know their symtab type - it's set when
+		 * creating a module dso in machine__findnew_module_map().
+		 */
+		return kmod && dso->symtab_type == type;
+
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+	case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+		return true;
+
+	case DSO_BINARY_TYPE__NOT_FOUND:
+	default:
+		return false;
+	}
+}
+
+/* Checks for the existence of the perf-<pid>.map file in two different
+ * locations.  First, if the process is a separate mount namespace, check in
+ * that namespace using the pid of the innermost pid namespace.  If's not in a
+ * namespace, or the file can't be found there, try in the mount namespace of
+ * the tracing process using our view of its pid.
+ */
+static int dso__find_perf_map(char *filebuf, size_t bufsz,
+			      struct nsinfo **nsip)
+{
+	struct nscookie nsc;
+	struct nsinfo *nsi;
+	struct nsinfo *nnsi;
+	int rc = -1;
+
+	nsi = *nsip;
+
+	if (nsi->need_setns) {
+		snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsi->nstgid);
+		nsinfo__mountns_enter(nsi, &nsc);
+		rc = access(filebuf, R_OK);
+		nsinfo__mountns_exit(&nsc);
+		if (rc == 0)
+			return rc;
+	}
+
+	nnsi = nsinfo__copy(nsi);
+	if (nnsi) {
+		nsinfo__put(nsi);
+
+		nnsi->need_setns = false;
+		snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nnsi->tgid);
+		*nsip = nnsi;
+		rc = 0;
+	}
+
+	return rc;
+}
+
+int dso__load(struct dso *dso, struct map *map)
+{
+	char *name;
+	int ret = -1;
+	u_int i;
+	struct machine *machine;
+	char *root_dir = (char *) "";
+	int ss_pos = 0;
+	struct symsrc ss_[2];
+	struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
+	bool kmod;
+	bool perfmap;
+	unsigned char build_id[BUILD_ID_SIZE];
+	struct nscookie nsc;
+	char newmapname[PATH_MAX];
+	const char *map_path = dso->long_name;
+
+	perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0;
+	if (perfmap) {
+		if (dso->nsinfo && (dso__find_perf_map(newmapname,
+		    sizeof(newmapname), &dso->nsinfo) == 0)) {
+			map_path = newmapname;
+		}
+	}
+
+	nsinfo__mountns_enter(dso->nsinfo, &nsc);
+	pthread_mutex_lock(&dso->lock);
+
+	/* check again under the dso->lock */
+	if (dso__loaded(dso, map->type)) {
+		ret = 1;
+		goto out;
+	}
+
+	if (dso->kernel) {
+		if (dso->kernel == DSO_TYPE_KERNEL)
+			ret = dso__load_kernel_sym(dso, map);
+		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			ret = dso__load_guest_kernel_sym(dso, map);
+
+		goto out;
+	}
+
+	if (map->groups && map->groups->machine)
+		machine = map->groups->machine;
+	else
+		machine = NULL;
+
+	dso->adjust_symbols = 0;
+
+	if (perfmap) {
+		struct stat st;
+
+		if (lstat(map_path, &st) < 0)
+			goto out;
+
+		if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) {
+			pr_warning("File %s not owned by current user or root, "
+				   "ignoring it (use -f to override).\n", map_path);
+			goto out;
+		}
+
+		ret = dso__load_perf_map(map_path, dso, map);
+		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
+					     DSO_BINARY_TYPE__NOT_FOUND;
+		goto out;
+	}
+
+	if (machine)
+		root_dir = machine->root_dir;
+
+	name = malloc(PATH_MAX);
+	if (!name)
+		goto out;
+
+	kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+		dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+
+	/*
+	 * Read the build id if possible. This is required for
+	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
+	 */
+	if (!dso->has_build_id &&
+	    is_regular_file(dso->long_name)) {
+	    __symbol__join_symfs(name, PATH_MAX, dso->long_name);
+	    if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0)
+		dso__set_build_id(dso, build_id);
+	}
+
+	/*
+	 * Iterate over candidate debug images.
+	 * Keep track of "interesting" ones (those which have a symtab, dynsym,
+	 * and/or opd section) for processing.
+	 */
+	for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) {
+		struct symsrc *ss = &ss_[ss_pos];
+		bool next_slot = false;
+		bool is_reg;
+		bool nsexit;
+		int sirc = -1;
+
+		enum dso_binary_type symtab_type = binary_type_symtab[i];
+
+		nsexit = (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE ||
+		    symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO);
+
+		if (!dso__is_compatible_symtab_type(dso, kmod, symtab_type))
+			continue;
+
+		if (dso__read_binary_type_filename(dso, symtab_type,
+						   root_dir, name, PATH_MAX))
+			continue;
+
+		if (nsexit)
+			nsinfo__mountns_exit(&nsc);
+
+		is_reg = is_regular_file(name);
+		if (is_reg)
+			sirc = symsrc__init(ss, dso, name, symtab_type);
+
+		if (nsexit)
+			nsinfo__mountns_enter(dso->nsinfo, &nsc);
+
+		if (!is_reg || sirc < 0)
+			continue;
+
+		if (!syms_ss && symsrc__has_symtab(ss)) {
+			syms_ss = ss;
+			next_slot = true;
+			if (!dso->symsrc_filename)
+				dso->symsrc_filename = strdup(name);
+		}
+
+		if (!runtime_ss && symsrc__possibly_runtime(ss)) {
+			runtime_ss = ss;
+			next_slot = true;
+		}
+
+		if (next_slot) {
+			ss_pos++;
+
+			if (syms_ss && runtime_ss)
+				break;
+		} else {
+			symsrc__destroy(ss);
+		}
+
+	}
+
+	if (!runtime_ss && !syms_ss)
+		goto out_free;
+
+	if (runtime_ss && !syms_ss) {
+		syms_ss = runtime_ss;
+	}
+
+	/* We'll have to hope for the best */
+	if (!runtime_ss && syms_ss)
+		runtime_ss = syms_ss;
+
+	if (syms_ss)
+		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
+	else
+		ret = -1;
+
+	if (ret > 0) {
+		int nr_plt;
+
+		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
+		if (nr_plt > 0)
+			ret += nr_plt;
+	}
+
+	for (; ss_pos > 0; ss_pos--)
+		symsrc__destroy(&ss_[ss_pos - 1]);
+out_free:
+	free(name);
+	if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
+		ret = 0;
+out:
+	dso__set_loaded(dso, map->type);
+	pthread_mutex_unlock(&dso->lock);
+	nsinfo__mountns_exit(&nsc);
+
+	return ret;
+}
+
+struct map *map_groups__find_by_name(struct map_groups *mg,
+				     enum map_type type, const char *name)
+{
+	struct maps *maps = &mg->maps[type];
+	struct map *map;
+
+	down_read(&maps->lock);
+
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		if (map->dso && strcmp(map->dso->short_name, name) == 0)
+			goto out_unlock;
+	}
+
+	map = NULL;
+
+out_unlock:
+	up_read(&maps->lock);
+	return map;
+}
+
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+		      const char *vmlinux, bool vmlinux_allocated)
+{
+	int err = -1;
+	struct symsrc ss;
+	char symfs_vmlinux[PATH_MAX];
+	enum dso_binary_type symtab_type;
+
+	if (vmlinux[0] == '/')
+		snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s", vmlinux);
+	else
+		symbol__join_symfs(symfs_vmlinux, vmlinux);
+
+	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+	else
+		symtab_type = DSO_BINARY_TYPE__VMLINUX;
+
+	if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
+		return -1;
+
+	err = dso__load_sym(dso, map, &ss, &ss, 0);
+	symsrc__destroy(&ss);
+
+	if (err > 0) {
+		if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+		else
+			dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
+		dso__set_long_name(dso, vmlinux, vmlinux_allocated);
+		dso__set_loaded(dso, map->type);
+		pr_debug("Using %s for symbols\n", symfs_vmlinux);
+	}
+
+	return err;
+}
+
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
+{
+	int i, err = 0;
+	char *filename = NULL;
+
+	pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+		 vmlinux_path__nr_entries + 1);
+
+	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+		err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
+		if (err > 0)
+			goto out;
+	}
+
+	if (!symbol_conf.ignore_vmlinux_buildid)
+		filename = dso__build_id_filename(dso, NULL, 0, false);
+	if (filename != NULL) {
+		err = dso__load_vmlinux(dso, map, filename, true);
+		if (err > 0)
+			goto out;
+		free(filename);
+	}
+out:
+	return err;
+}
+
+static bool visible_dir_filter(const char *name, struct dirent *d)
+{
+	if (d->d_type != DT_DIR)
+		return false;
+	return lsdir_no_dot_filter(name, d);
+}
+
+static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
+{
+	char kallsyms_filename[PATH_MAX];
+	int ret = -1;
+	struct strlist *dirs;
+	struct str_node *nd;
+
+	dirs = lsdir(dir, visible_dir_filter);
+	if (!dirs)
+		return -1;
+
+	strlist__for_each_entry(nd, dirs) {
+		scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
+			  "%s/%s/kallsyms", dir, nd->s);
+		if (!validate_kcore_addresses(kallsyms_filename, map)) {
+			strlcpy(dir, kallsyms_filename, dir_sz);
+			ret = 0;
+			break;
+		}
+	}
+
+	strlist__delete(dirs);
+
+	return ret;
+}
+
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+	int fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static char *dso__find_kallsyms(struct dso *dso, struct map *map)
+{
+	u8 host_build_id[BUILD_ID_SIZE];
+	char sbuild_id[SBUILD_ID_SIZE];
+	bool is_host = false;
+	char path[PATH_MAX];
+
+	if (!dso->has_build_id) {
+		/*
+		 * Last resort, if we don't have a build-id and couldn't find
+		 * any vmlinux file, try the running kernel kallsyms table.
+		 */
+		goto proc_kallsyms;
+	}
+
+	if (sysfs__read_build_id("/sys/kernel/notes", host_build_id,
+				 sizeof(host_build_id)) == 0)
+		is_host = dso__build_id_equal(dso, host_build_id);
+
+	/* Try a fast path for /proc/kallsyms if possible */
+	if (is_host) {
+		/*
+		 * Do not check the build-id cache, unless we know we cannot use
+		 * /proc/kcore or module maps don't match to /proc/kallsyms.
+		 * To check readability of /proc/kcore, do not use access(R_OK)
+		 * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+		 * can't check it.
+		 */
+		if (filename__readable("/proc/kcore") &&
+		    !validate_kcore_addresses("/proc/kallsyms", map))
+			goto proc_kallsyms;
+	}
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
+	/* Find kallsyms in build-id cache with kcore */
+	scnprintf(path, sizeof(path), "%s/%s/%s",
+		  buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
+	if (!find_matching_kcore(map, path, sizeof(path)))
+		return strdup(path);
+
+	/* Use current /proc/kallsyms if possible */
+	if (is_host) {
+proc_kallsyms:
+		return strdup("/proc/kallsyms");
+	}
+
+	/* Finally, find a cache of kallsyms */
+	if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
+		pr_err("No kallsyms or vmlinux with build-id %s was found\n",
+		       sbuild_id);
+		return NULL;
+	}
+
+	return strdup(path);
+}
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
+{
+	int err;
+	const char *kallsyms_filename = NULL;
+	char *kallsyms_allocated_filename = NULL;
+	/*
+	 * Step 1: if the user specified a kallsyms or vmlinux filename, use
+	 * it and only it, reporting errors to the user if it cannot be used.
+	 *
+	 * For instance, try to analyse an ARM perf.data file _without_ a
+	 * build-id, or if the user specifies the wrong path to the right
+	 * vmlinux file, obviously we can't fallback to another vmlinux (a
+	 * x86_86 one, on the machine where analysis is being performed, say),
+	 * or worse, /proc/kallsyms.
+	 *
+	 * If the specified file _has_ a build-id and there is a build-id
+	 * section in the perf.data file, we will still do the expected
+	 * validation in dso__load_vmlinux and will bail out if they don't
+	 * match.
+	 */
+	if (symbol_conf.kallsyms_name != NULL) {
+		kallsyms_filename = symbol_conf.kallsyms_name;
+		goto do_kallsyms;
+	}
+
+	if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
+		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
+	}
+
+	if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
+		err = dso__load_vmlinux_path(dso, map);
+		if (err > 0)
+			return err;
+	}
+
+	/* do not try local files if a symfs was given */
+	if (symbol_conf.symfs[0] != 0)
+		return -1;
+
+	kallsyms_allocated_filename = dso__find_kallsyms(dso, map);
+	if (!kallsyms_allocated_filename)
+		return -1;
+
+	kallsyms_filename = kallsyms_allocated_filename;
+
+do_kallsyms:
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
+	if (err > 0)
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
+	free(kallsyms_allocated_filename);
+
+	if (err > 0 && !dso__is_kcore(dso)) {
+		dso->binary_type = DSO_BINARY_TYPE__KALLSYMS;
+		dso__set_long_name(dso, DSO__NAME_KALLSYMS, false);
+		map__fixup_start(map);
+		map__fixup_end(map);
+	}
+
+	return err;
+}
+
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
+{
+	int err;
+	const char *kallsyms_filename = NULL;
+	struct machine *machine;
+	char path[PATH_MAX];
+
+	if (!map->groups) {
+		pr_debug("Guest kernel map hasn't the point to groups\n");
+		return -1;
+	}
+	machine = map->groups->machine;
+
+	if (machine__is_default_guest(machine)) {
+		/*
+		 * if the user specified a vmlinux filename, use it and only
+		 * it, reporting errors to the user if it cannot be used.
+		 * Or use file guest_kallsyms inputted by user on commandline
+		 */
+		if (symbol_conf.default_guest_vmlinux_name != NULL) {
+			err = dso__load_vmlinux(dso, map,
+						symbol_conf.default_guest_vmlinux_name,
+						false);
+			return err;
+		}
+
+		kallsyms_filename = symbol_conf.default_guest_kallsyms;
+		if (!kallsyms_filename)
+			return -1;
+	} else {
+		sprintf(path, "%s/proc/kallsyms", machine->root_dir);
+		kallsyms_filename = path;
+	}
+
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
+	if (err > 0)
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
+	if (err > 0 && !dso__is_kcore(dso)) {
+		dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+		dso__set_long_name(dso, machine->mmap_name, false);
+		map__fixup_start(map);
+		map__fixup_end(map);
+	}
+
+	return err;
+}
+
+static void vmlinux_path__exit(void)
+{
+	while (--vmlinux_path__nr_entries >= 0)
+		zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+	vmlinux_path__nr_entries = 0;
+
+	zfree(&vmlinux_path);
+}
+
+static const char * const vmlinux_paths[] = {
+	"vmlinux",
+	"/boot/vmlinux"
+};
+
+static const char * const vmlinux_paths_upd[] = {
+	"/boot/vmlinux-%s",
+	"/usr/lib/debug/boot/vmlinux-%s",
+	"/lib/modules/%s/build/vmlinux",
+	"/usr/lib/debug/lib/modules/%s/vmlinux",
+	"/usr/lib/debug/boot/vmlinux-%s.debug"
+};
+
+static int vmlinux_path__add(const char *new_entry)
+{
+	vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry);
+	if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+		return -1;
+	++vmlinux_path__nr_entries;
+
+	return 0;
+}
+
+static int vmlinux_path__init(struct perf_env *env)
+{
+	struct utsname uts;
+	char bf[PATH_MAX];
+	char *kernel_version;
+	unsigned int i;
+
+	vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) +
+			      ARRAY_SIZE(vmlinux_paths_upd)));
+	if (vmlinux_path == NULL)
+		return -1;
+
+	for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++)
+		if (vmlinux_path__add(vmlinux_paths[i]) < 0)
+			goto out_fail;
+
+	/* only try kernel version if no symfs was given */
+	if (symbol_conf.symfs[0] != 0)
+		return 0;
+
+	if (env) {
+		kernel_version = env->os_release;
+	} else {
+		if (uname(&uts) < 0)
+			goto out_fail;
+
+		kernel_version = uts.release;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) {
+		snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version);
+		if (vmlinux_path__add(bf) < 0)
+			goto out_fail;
+	}
+
+	return 0;
+
+out_fail:
+	vmlinux_path__exit();
+	return -1;
+}
+
+int setup_list(struct strlist **list, const char *list_str,
+		      const char *list_name)
+{
+	if (list_str == NULL)
+		return 0;
+
+	*list = strlist__new(list_str, NULL);
+	if (!*list) {
+		pr_err("problems parsing %s list\n", list_name);
+		return -1;
+	}
+
+	symbol_conf.has_filter = true;
+	return 0;
+}
+
+int setup_intlist(struct intlist **list, const char *list_str,
+		  const char *list_name)
+{
+	if (list_str == NULL)
+		return 0;
+
+	*list = intlist__new(list_str);
+	if (!*list) {
+		pr_err("problems parsing %s list\n", list_name);
+		return -1;
+	}
+	return 0;
+}
+
+static bool symbol__read_kptr_restrict(void)
+{
+	bool value = false;
+	FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+
+	if (fp != NULL) {
+		char line[8];
+
+		if (fgets(line, sizeof(line), fp) != NULL)
+			value = ((geteuid() != 0) || (getuid() != 0)) ?
+					(atoi(line) != 0) :
+					(atoi(line) == 2);
+
+		fclose(fp);
+	}
+
+	return value;
+}
+
+int symbol__annotation_init(void)
+{
+	if (symbol_conf.init_annotation)
+		return 0;
+
+	if (symbol_conf.initialized) {
+		pr_err("Annotation needs to be init before symbol__init()\n");
+		return -1;
+	}
+
+	symbol_conf.priv_size += sizeof(struct annotation);
+	symbol_conf.init_annotation = true;
+	return 0;
+}
+
+int symbol__init(struct perf_env *env)
+{
+	const char *symfs;
+
+	if (symbol_conf.initialized)
+		return 0;
+
+	symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64));
+
+	symbol__elf_init();
+
+	if (symbol_conf.sort_by_name)
+		symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) -
+					  sizeof(struct symbol));
+
+	if (symbol_conf.try_vmlinux_path && vmlinux_path__init(env) < 0)
+		return -1;
+
+	if (symbol_conf.field_sep && *symbol_conf.field_sep == '.') {
+		pr_err("'.' is the only non valid --field-separator argument\n");
+		return -1;
+	}
+
+	if (setup_list(&symbol_conf.dso_list,
+		       symbol_conf.dso_list_str, "dso") < 0)
+		return -1;
+
+	if (setup_list(&symbol_conf.comm_list,
+		       symbol_conf.comm_list_str, "comm") < 0)
+		goto out_free_dso_list;
+
+	if (setup_intlist(&symbol_conf.pid_list,
+		       symbol_conf.pid_list_str, "pid") < 0)
+		goto out_free_comm_list;
+
+	if (setup_intlist(&symbol_conf.tid_list,
+		       symbol_conf.tid_list_str, "tid") < 0)
+		goto out_free_pid_list;
+
+	if (setup_list(&symbol_conf.sym_list,
+		       symbol_conf.sym_list_str, "symbol") < 0)
+		goto out_free_tid_list;
+
+	if (setup_list(&symbol_conf.bt_stop_list,
+		       symbol_conf.bt_stop_list_str, "symbol") < 0)
+		goto out_free_sym_list;
+
+	/*
+	 * A path to symbols of "/" is identical to ""
+	 * reset here for simplicity.
+	 */
+	symfs = realpath(symbol_conf.symfs, NULL);
+	if (symfs == NULL)
+		symfs = symbol_conf.symfs;
+	if (strcmp(symfs, "/") == 0)
+		symbol_conf.symfs = "";
+	if (symfs != symbol_conf.symfs)
+		free((void *)symfs);
+
+	symbol_conf.kptr_restrict = symbol__read_kptr_restrict();
+
+	symbol_conf.initialized = true;
+	return 0;
+
+out_free_sym_list:
+	strlist__delete(symbol_conf.sym_list);
+out_free_tid_list:
+	intlist__delete(symbol_conf.tid_list);
+out_free_pid_list:
+	intlist__delete(symbol_conf.pid_list);
+out_free_comm_list:
+	strlist__delete(symbol_conf.comm_list);
+out_free_dso_list:
+	strlist__delete(symbol_conf.dso_list);
+	return -1;
+}
+
+void symbol__exit(void)
+{
+	if (!symbol_conf.initialized)
+		return;
+	strlist__delete(symbol_conf.bt_stop_list);
+	strlist__delete(symbol_conf.sym_list);
+	strlist__delete(symbol_conf.dso_list);
+	strlist__delete(symbol_conf.comm_list);
+	intlist__delete(symbol_conf.tid_list);
+	intlist__delete(symbol_conf.pid_list);
+	vmlinux_path__exit();
+	symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
+	symbol_conf.bt_stop_list = NULL;
+	symbol_conf.initialized = false;
+}
+
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+			 const char *dir, int unset __maybe_unused)
+{
+	char *bf = NULL;
+	int ret;
+
+	symbol_conf.symfs = strdup(dir);
+	if (symbol_conf.symfs == NULL)
+		return -ENOMEM;
+
+	/* skip the locally configured cache if a symfs is given, and
+	 * config buildid dir to symfs/.debug
+	 */
+	ret = asprintf(&bf, "%s/%s", dir, ".debug");
+	if (ret < 0)
+		return -ENOMEM;
+
+	set_buildid_dir(bf);
+
+	free(bf);
+	return 0;
+}
+
+struct mem_info *mem_info__get(struct mem_info *mi)
+{
+	if (mi)
+		refcount_inc(&mi->refcnt);
+	return mi;
+}
+
+void mem_info__put(struct mem_info *mi)
+{
+	if (mi && refcount_dec_and_test(&mi->refcnt))
+		free(mi);
+}
+
+struct mem_info *mem_info__new(void)
+{
+	struct mem_info *mi = zalloc(sizeof(*mi));
+
+	if (mi)
+		refcount_set(&mi->refcnt, 1);
+	return mi;
+}
diff --git a/util/symbol.h b/util/symbol.h
new file mode 100644
index 0000000..70c1674
--- /dev/null
+++ b/util/symbol.h
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL
+#define __PERF_SYMBOL 1
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "map.h"
+#include "../perf.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <byteswap.h>
+#include <libgen.h>
+#include "build-id.h"
+#include "event.h"
+#include "path.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+#include <libelf.h>
+#include <gelf.h>
+#endif
+#include <elf.h>
+
+#include "dso.h"
+
+/*
+ * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
+ * for newer versions we can use mmap to reduce memory usage:
+ */
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
+# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define PERF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+#ifdef HAVE_LIBELF_SUPPORT
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+			     GElf_Shdr *shp, const char *name, size_t *idx);
+#endif
+
+#ifndef DMGL_PARAMS
+#define DMGL_NO_OPTS     0              /* For readability... */
+#define DMGL_PARAMS      (1 << 0)       /* Include function args */
+#define DMGL_ANSI        (1 << 1)       /* Include const, volatile, etc */
+#endif
+
+#define DSO__NAME_KALLSYMS	"[kernel.kallsyms]"
+#define DSO__NAME_KCORE		"[kernel.kcore]"
+
+/** struct symbol - symtab entry
+ *
+ * @ignore - resolvable but tools ignore it (e.g. idle routines)
+ */
+struct symbol {
+	struct rb_node	rb_node;
+	u64		start;
+	u64		end;
+	u16		namelen;
+	u8		binding;
+	u8		idle:1;
+	u8		ignore:1;
+	u8		inlined:1;
+	u8		arch_sym;
+	char		name[0];
+};
+
+void symbol__delete(struct symbol *sym);
+void symbols__delete(struct rb_root *symbols);
+
+/* symbols__for_each_entry - iterate over symbols (rb_root)
+ *
+ * @symbols: the rb_root of symbols
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @nd: the 'struct rb_node *' to use as a temporary storage
+ */
+#define symbols__for_each_entry(symbols, pos, nd)			\
+	for (nd = rb_first(symbols);					\
+	     nd && (pos = rb_entry(nd, struct symbol, rb_node));	\
+	     nd = rb_next(nd))
+
+static inline size_t symbol__size(const struct symbol *sym)
+{
+	return sym->end - sym->start;
+}
+
+struct strlist;
+struct intlist;
+
+struct symbol_conf {
+	unsigned short	priv_size;
+	unsigned short	nr_events;
+	bool		try_vmlinux_path,
+			init_annotation,
+			force,
+			ignore_vmlinux,
+			ignore_vmlinux_buildid,
+			show_kernel_path,
+			use_modules,
+			allow_aliases,
+			sort_by_name,
+			show_nr_samples,
+			show_total_period,
+			use_callchain,
+			cumulate_callchain,
+			show_branchflag_count,
+			exclude_other,
+			show_cpu_utilization,
+			initialized,
+			kptr_restrict,
+			annotate_asm_raw,
+			annotate_src,
+			event_group,
+			demangle,
+			demangle_kernel,
+			filter_relative,
+			show_hist_headers,
+			branch_callstack,
+			has_filter,
+			show_ref_callgraph,
+			hide_unresolved,
+			raw_trace,
+			report_hierarchy,
+			inline_name;
+	const char	*vmlinux_name,
+			*kallsyms_name,
+			*source_prefix,
+			*field_sep;
+	const char	*default_guest_vmlinux_name,
+			*default_guest_kallsyms,
+			*default_guest_modules;
+	const char	*guestmount;
+	const char	*dso_list_str,
+			*comm_list_str,
+			*pid_list_str,
+			*tid_list_str,
+			*sym_list_str,
+			*col_width_list_str,
+			*bt_stop_list_str;
+       struct strlist	*dso_list,
+			*comm_list,
+			*sym_list,
+			*dso_from_list,
+			*dso_to_list,
+			*sym_from_list,
+			*sym_to_list,
+			*bt_stop_list;
+	struct intlist	*pid_list,
+			*tid_list;
+	const char	*symfs;
+};
+
+extern struct symbol_conf symbol_conf;
+
+struct symbol_name_rb_node {
+	struct rb_node	rb_node;
+	struct symbol	sym;
+};
+
+static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
+{
+	return path__join(bf, size, symbol_conf.symfs, path);
+}
+
+#define symbol__join_symfs(bf, path) __symbol__join_symfs(bf, sizeof(bf), path)
+
+extern int vmlinux_path__nr_entries;
+extern char **vmlinux_path;
+
+static inline void *symbol__priv(struct symbol *sym)
+{
+	return ((void *)sym) - symbol_conf.priv_size;
+}
+
+struct ref_reloc_sym {
+	const char	*name;
+	u64		addr;
+	u64		unrelocated_addr;
+};
+
+struct map_symbol {
+	struct map    *map;
+	struct symbol *sym;
+};
+
+struct addr_map_symbol {
+	struct map    *map;
+	struct symbol *sym;
+	u64	      addr;
+	u64	      al_addr;
+	u64	      phys_addr;
+};
+
+struct branch_info {
+	struct addr_map_symbol from;
+	struct addr_map_symbol to;
+	struct branch_flags flags;
+	char			*srcline_from;
+	char			*srcline_to;
+};
+
+struct mem_info {
+	struct addr_map_symbol	iaddr;
+	struct addr_map_symbol	daddr;
+	union perf_mem_data_src	data_src;
+	refcount_t		refcnt;
+};
+
+struct addr_location {
+	struct machine *machine;
+	struct thread *thread;
+	struct map    *map;
+	struct symbol *sym;
+	const char    *srcline;
+	u64	      addr;
+	char	      level;
+	u8	      filtered;
+	u8	      cpumode;
+	s32	      cpu;
+	s32	      socket;
+};
+
+struct symsrc {
+	char *name;
+	int fd;
+	enum dso_binary_type type;
+
+#ifdef HAVE_LIBELF_SUPPORT
+	Elf *elf;
+	GElf_Ehdr ehdr;
+
+	Elf_Scn *opdsec;
+	size_t opdidx;
+	GElf_Shdr opdshdr;
+
+	Elf_Scn *symtab;
+	GElf_Shdr symshdr;
+
+	Elf_Scn *dynsym;
+	size_t dynsym_idx;
+	GElf_Shdr dynshdr;
+
+	bool adjust_symbols;
+	bool is_64_bit;
+#endif
+};
+
+void symsrc__destroy(struct symsrc *ss);
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+		 enum dso_binary_type type);
+bool symsrc__has_symtab(struct symsrc *ss);
+bool symsrc__possibly_runtime(struct symsrc *ss);
+
+int dso__load(struct dso *dso, struct map *map);
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+		      const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+			 bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
+
+void dso__insert_symbol(struct dso *dso, enum map_type type,
+			struct symbol *sym);
+
+struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
+				u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
+					const char *name);
+struct symbol *symbol__next_by_name(struct symbol *sym);
+
+struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__next_symbol(struct symbol *sym);
+
+enum dso_type dso__type_fd(int fd);
+
+int filename__read_build_id(const char *filename, void *bf, size_t size);
+int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+int modules__parse(const char *filename, void *arg,
+		   int (*process_module)(void *arg, const char *name,
+					 u64 start, u64 size));
+int filename__read_debuglink(const char *filename, char *debuglink,
+			     size_t size);
+
+struct perf_env;
+int symbol__init(struct perf_env *env);
+void symbol__exit(void);
+void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+				      const struct addr_location *al,
+				      bool unknown_as_addr,
+				      bool print_offsets, FILE *fp);
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+				    const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+				 const struct addr_location *al,
+				 bool unknown_as_addr, FILE *fp);
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
+size_t symbol__fprintf(struct symbol *sym, FILE *fp);
+bool symbol_type__is_a(char symbol_type, enum map_type map_type);
+bool symbol__restricted_filename(const char *filename,
+				 const char *restricted_filename);
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+			 const char *dir, int unset __maybe_unused);
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		  struct symsrc *runtime_ss, int kmodule);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
+				struct map *map);
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
+
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
+void symbols__insert(struct rb_root *symbols, struct symbol *sym);
+void symbols__fixup_duplicate(struct rb_root *symbols);
+void symbols__fixup_end(struct rb_root *symbols);
+void __map_groups__fixup_end(struct map_groups *mg, enum map_type type);
+
+typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit);
+
+#define PERF_KCORE_EXTRACT "/tmp/perf-kcore-XXXXXX"
+
+struct kcore_extract {
+	char *kcore_filename;
+	u64 addr;
+	u64 offs;
+	u64 len;
+	char extract_filename[sizeof(PERF_KCORE_EXTRACT)];
+	int fd;
+};
+
+int kcore_extract__create(struct kcore_extract *kce);
+void kcore_extract__delete(struct kcore_extract *kce);
+
+int kcore_copy(const char *from_dir, const char *to_dir);
+int compare_proc_modules(const char *from, const char *to);
+
+int setup_list(struct strlist **list, const char *list_str,
+	       const char *list_name);
+int setup_intlist(struct intlist **list, const char *list_str,
+		  const char *list_name);
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
+#endif
+
+const char *arch__normalize_symbol_name(const char *name);
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+int arch__compare_symbol_names(const char *namea, const char *nameb);
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+				 unsigned int n);
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
+enum symbol_tag_include {
+	SYMBOL_TAG_INCLUDE__NONE = 0,
+	SYMBOL_TAG_INCLUDE__DEFAULT_ONLY
+};
+
+int symbol__match_symbol_name(const char *namea, const char *nameb,
+			      enum symbol_tag_include includes);
+
+/* structure containing an SDT note's info */
+struct sdt_note {
+	char *name;			/* name of the note*/
+	char *provider;			/* provider name */
+	char *args;
+	bool bit32;			/* whether the location is 32 bits? */
+	union {				/* location, base and semaphore addrs */
+		Elf64_Addr a64[3];
+		Elf32_Addr a32[3];
+	} addr;
+	struct list_head note_list;	/* SDT notes' list */
+};
+
+int get_sdt_note_list(struct list_head *head, const char *target);
+int cleanup_sdt_note_list(struct list_head *sdt_notes);
+int sdt_notes__get_count(struct list_head *start);
+
+#define SDT_BASE_SCN ".stapsdt.base"
+#define SDT_NOTE_SCN  ".note.stapsdt"
+#define SDT_NOTE_TYPE 3
+#define SDT_NOTE_NAME "stapsdt"
+#define NR_ADDR 3
+
+struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__get(struct mem_info *mi);
+void   mem_info__put(struct mem_info *mi);
+
+static inline void __mem_info__zput(struct mem_info **mi)
+{
+	mem_info__put(*mi);
+	*mi = NULL;
+}
+
+#define mem_info__zput(mi) __mem_info__zput(&mi)
+
+#endif /* __PERF_SYMBOL */
diff --git a/util/symbol_fprintf.c b/util/symbol_fprintf.c
new file mode 100644
index 0000000..6dd2cb8
--- /dev/null
+++ b/util/symbol_fprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+		       sym->start, sym->end,
+		       sym->binding == STB_GLOBAL ? 'g' :
+		       sym->binding == STB_LOCAL  ? 'l' : 'w',
+		       sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+				      const struct addr_location *al,
+				      bool unknown_as_addr,
+				      bool print_offsets, FILE *fp)
+{
+	unsigned long offset;
+	size_t length;
+
+	if (sym) {
+		length = fprintf(fp, "%s", sym->name);
+		if (al && print_offsets) {
+			if (al->addr < sym->end)
+				offset = al->addr - sym->start;
+			else
+				offset = al->addr - al->map->start - sym->start;
+			length += fprintf(fp, "+0x%lx", offset);
+		}
+		return length;
+	} else if (al && unknown_as_addr)
+		return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+	else
+		return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+				    const struct addr_location *al,
+				    FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+				 const struct addr_location *al,
+				 bool unknown_as_addr, FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    enum map_type type, FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+	struct symbol_name_rb_node *pos;
+
+	for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+		fprintf(fp, "%s\n", pos->sym.name);
+	}
+
+	return ret;
+}
diff --git a/util/syscalltbl.c b/util/syscalltbl.c
new file mode 100644
index 0000000..0ee7f56
--- /dev/null
+++ b/util/syscalltbl.c
@@ -0,0 +1,179 @@
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+#include <string.h>
+#include "string2.h"
+#include "util.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_x86_64;
+#elif defined(__s390x__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_s390_64;
+#elif defined(__powerpc64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_64;
+#elif defined(__powerpc__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_32;
+#endif
+
+struct syscall {
+	int id;
+	const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+	const char *key = vkey;
+	const struct syscall *entry = ventry;
+
+	return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+	const struct syscall *a = va, *b = vb;
+
+	return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+	int nr_entries = 0, i, j;
+	struct syscall *entries;
+
+	for (i = 0; i <= syscalltbl_native_max_id; ++i)
+		if (syscalltbl_native[i])
+			++nr_entries;
+
+	entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+	if (tbl->syscalls.entries == NULL)
+		return -1;
+
+	for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+		if (syscalltbl_native[i]) {
+			entries[j].name = syscalltbl_native[i];
+			entries[j].id = i;
+			++j;
+		}
+	}
+
+	qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+	tbl->syscalls.nr_entries = nr_entries;
+	return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+	struct syscalltbl *tbl = malloc(sizeof(*tbl));
+	if (tbl) {
+		if (syscalltbl__init_native(tbl)) {
+			free(tbl);
+			return NULL;
+		}
+	}
+	return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+	zfree(&tbl->syscalls.entries);
+	free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+	return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+	struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+				     tbl->syscalls.nr_entries, sizeof(*sc),
+				     syscallcmpname);
+
+	return sc ? sc->id : -1;
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	int i;
+	struct syscall *syscalls = tbl->syscalls.entries;
+
+	for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+		if (strglobmatch(syscalls[i].name, syscall_glob)) {
+			*idx = i;
+			return syscalls[i].id;
+		}
+	}
+
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	*idx = -1;
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
+#else /* HAVE_SYSCALL_TABLE_SUPPORT */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+	struct syscalltbl *tbl = malloc(sizeof(*tbl));
+	if (tbl)
+		tbl->audit_machine = audit_detect_machine();
+	return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+	free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+	return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+	return audit_name_to_syscall(name, tbl->audit_machine);
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+				  const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/util/syscalltbl.h b/util/syscalltbl.h
new file mode 100644
index 0000000..c8e7e9c
--- /dev/null
+++ b/util/syscalltbl.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+	union {
+		int audit_machine;
+		struct {
+			int nr_entries;
+			void *entries;
+		} syscalls;
+	};
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
+#endif /* __PERF_SYSCALLTBL_H */
diff --git a/util/target.c b/util/target.c
new file mode 100644
index 0000000..21c4d9b
--- /dev/null
+++ b/util/target.c
@@ -0,0 +1,152 @@
+/*
+ * Helper functions for handling target threads/cpus
+ *
+ * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com>
+ *
+ * Released under the GPL v2.
+ */
+
+#include "target.h"
+#include "util.h"
+#include "debug.h"
+
+#include <pwd.h>
+#include <string.h>
+
+
+enum target_errno target__validate(struct target *target)
+{
+	enum target_errno ret = TARGET_ERRNO__SUCCESS;
+
+	if (target->pid)
+		target->tid = target->pid;
+
+	/* CPU and PID are mutually exclusive */
+	if (target->tid && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__PID_OVERRIDE_CPU;
+	}
+
+	/* UID and PID are mutually exclusive */
+	if (target->tid && target->uid_str) {
+		target->uid_str = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__PID_OVERRIDE_UID;
+	}
+
+	/* UID and CPU are mutually exclusive */
+	if (target->uid_str && target->cpu_list) {
+		target->cpu_list = NULL;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__UID_OVERRIDE_CPU;
+	}
+
+	/* PID and SYSTEM are mutually exclusive */
+	if (target->tid && target->system_wide) {
+		target->system_wide = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM;
+	}
+
+	/* UID and SYSTEM are mutually exclusive */
+	if (target->uid_str && target->system_wide) {
+		target->system_wide = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
+	}
+
+	/* THREAD and SYSTEM/CPU are mutually exclusive */
+	if (target->per_thread && (target->system_wide || target->cpu_list)) {
+		target->per_thread = false;
+		if (ret == TARGET_ERRNO__SUCCESS)
+			ret = TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD;
+	}
+
+	return ret;
+}
+
+enum target_errno target__parse_uid(struct target *target)
+{
+	struct passwd pwd, *result;
+	char buf[1024];
+	const char *str = target->uid_str;
+
+	target->uid = UINT_MAX;
+	if (str == NULL)
+		return TARGET_ERRNO__SUCCESS;
+
+	/* Try user name first */
+	getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+	if (result == NULL) {
+		/*
+		 * The user name not found. Maybe it's a UID number.
+		 */
+		char *endptr;
+		int uid = strtol(str, &endptr, 10);
+
+		if (*endptr != '\0')
+			return TARGET_ERRNO__INVALID_UID;
+
+		getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+		if (result == NULL)
+			return TARGET_ERRNO__USER_NOT_FOUND;
+	}
+
+	target->uid = result->pw_uid;
+	return TARGET_ERRNO__SUCCESS;
+}
+
+/*
+ * This must have a same ordering as the enum target_errno.
+ */
+static const char *target__error_str[] = {
+	"PID/TID switch overriding CPU",
+	"PID/TID switch overriding UID",
+	"UID switch overriding CPU",
+	"PID/TID switch overriding SYSTEM",
+	"UID switch overriding SYSTEM",
+	"SYSTEM/CPU switch overriding PER-THREAD",
+	"Invalid User: %s",
+	"Problems obtaining information for user %s",
+};
+
+int target__strerror(struct target *target, int errnum,
+			  char *buf, size_t buflen)
+{
+	int idx;
+	const char *msg;
+
+	BUG_ON(buflen == 0);
+
+	if (errnum >= 0) {
+		str_error_r(errnum, buf, buflen);
+		return 0;
+	}
+
+	if (errnum <  __TARGET_ERRNO__START || errnum >= __TARGET_ERRNO__END)
+		return -1;
+
+	idx = errnum - __TARGET_ERRNO__START;
+	msg = target__error_str[idx];
+
+	switch (errnum) {
+	case TARGET_ERRNO__PID_OVERRIDE_CPU ...
+	     TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD:
+		snprintf(buf, buflen, "%s", msg);
+		break;
+
+	case TARGET_ERRNO__INVALID_UID:
+	case TARGET_ERRNO__USER_NOT_FOUND:
+		snprintf(buf, buflen, msg, target->uid_str);
+		break;
+
+	default:
+		/* cannot reach here */
+		break;
+	}
+
+	return 0;
+}
diff --git a/util/target.h b/util/target.h
new file mode 100644
index 0000000..6ef01a8
--- /dev/null
+++ b/util/target.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TARGET_H
+#define _PERF_TARGET_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct target {
+	const char   *pid;
+	const char   *tid;
+	const char   *cpu_list;
+	const char   *uid_str;
+	uid_t	     uid;
+	bool	     system_wide;
+	bool	     uses_mmap;
+	bool	     default_per_cpu;
+	bool	     per_thread;
+};
+
+enum target_errno {
+	TARGET_ERRNO__SUCCESS		= 0,
+
+	/*
+	 * Choose an arbitrary negative big number not to clash with standard
+	 * errno since SUS requires the errno has distinct positive values.
+	 * See 'Issue 6' in the link below.
+	 *
+	 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+	 */
+	__TARGET_ERRNO__START		= -10000,
+
+	/* for target__validate() */
+	TARGET_ERRNO__PID_OVERRIDE_CPU	= __TARGET_ERRNO__START,
+	TARGET_ERRNO__PID_OVERRIDE_UID,
+	TARGET_ERRNO__UID_OVERRIDE_CPU,
+	TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
+	TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
+	TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
+
+	/* for target__parse_uid() */
+	TARGET_ERRNO__INVALID_UID,
+	TARGET_ERRNO__USER_NOT_FOUND,
+
+	__TARGET_ERRNO__END,
+};
+
+enum target_errno target__validate(struct target *target);
+enum target_errno target__parse_uid(struct target *target);
+
+int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
+
+static inline bool target__has_task(struct target *target)
+{
+	return target->tid || target->pid || target->uid_str;
+}
+
+static inline bool target__has_cpu(struct target *target)
+{
+	return target->system_wide || target->cpu_list;
+}
+
+static inline bool target__none(struct target *target)
+{
+	return !target__has_task(target) && !target__has_cpu(target);
+}
+
+static inline bool target__has_per_thread(struct target *target)
+{
+	return target->system_wide && target->per_thread;
+}
+
+static inline bool target__uses_dummy_map(struct target *target)
+{
+	bool use_dummy = false;
+
+	if (target->default_per_cpu)
+		use_dummy = target->per_thread ? true : false;
+	else if (target__has_task(target) ||
+	         (!target__has_cpu(target) && !target->uses_mmap))
+		use_dummy = true;
+	else if (target__has_per_thread(target))
+		use_dummy = true;
+
+	return use_dummy;
+}
+
+#endif /* _PERF_TARGET_H */
diff --git a/util/term.c b/util/term.c
new file mode 100644
index 0000000..e7aa82c
--- /dev/null
+++ b/util/term.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "term.h"
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+void get_term_dimensions(struct winsize *ws)
+{
+	char *s = getenv("LINES");
+
+	if (s != NULL) {
+		ws->ws_row = atoi(s);
+		s = getenv("COLUMNS");
+		if (s != NULL) {
+			ws->ws_col = atoi(s);
+			if (ws->ws_row && ws->ws_col)
+				return;
+		}
+	}
+#ifdef TIOCGWINSZ
+	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+	    ws->ws_row && ws->ws_col)
+		return;
+#endif
+	ws->ws_row = 25;
+	ws->ws_col = 80;
+}
+
+void set_term_quiet_input(struct termios *old)
+{
+	struct termios tc;
+
+	tcgetattr(0, old);
+	tc = *old;
+	tc.c_lflag &= ~(ICANON | ECHO);
+	tc.c_cc[VMIN] = 0;
+	tc.c_cc[VTIME] = 0;
+	tcsetattr(0, TCSANOW, &tc);
+}
diff --git a/util/term.h b/util/term.h
new file mode 100644
index 0000000..607b170
--- /dev/null
+++ b/util/term.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TERM_H
+#define __PERF_TERM_H
+
+struct termios;
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+void set_term_quiet_input(struct termios *old);
+
+#endif /* __PERF_TERM_H */
diff --git a/util/thread-stack.c b/util/thread-stack.c
new file mode 100644
index 0000000..dd17d6a
--- /dev/null
+++ b/util/thread-stack.c
@@ -0,0 +1,626 @@
+/*
+ * thread-stack.c: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <errno.h>
+#include "thread.h"
+#include "event.h"
+#include "machine.h"
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+#include "comm.h"
+#include "call-path.h"
+#include "thread-stack.h"
+
+#define STACK_GROWTH 2048
+
+/**
+ * struct thread_stack_entry - thread stack entry.
+ * @ret_addr: return address
+ * @timestamp: timestamp (if known)
+ * @ref: external reference (e.g. db_id of sample)
+ * @branch_count: the branch count when the entry was created
+ * @cp: call path
+ * @no_call: a 'call' was not seen
+ */
+struct thread_stack_entry {
+	u64 ret_addr;
+	u64 timestamp;
+	u64 ref;
+	u64 branch_count;
+	struct call_path *cp;
+	bool no_call;
+};
+
+/**
+ * struct thread_stack - thread stack constructed from 'call' and 'return'
+ *                       branch samples.
+ * @stack: array that holds the stack
+ * @cnt: number of entries in the stack
+ * @sz: current maximum stack size
+ * @trace_nr: current trace number
+ * @branch_count: running branch count
+ * @kernel_start: kernel start address
+ * @last_time: last timestamp
+ * @crp: call/return processor
+ * @comm: current comm
+ */
+struct thread_stack {
+	struct thread_stack_entry *stack;
+	size_t cnt;
+	size_t sz;
+	u64 trace_nr;
+	u64 branch_count;
+	u64 kernel_start;
+	u64 last_time;
+	struct call_return_processor *crp;
+	struct comm *comm;
+};
+
+static int thread_stack__grow(struct thread_stack *ts)
+{
+	struct thread_stack_entry *new_stack;
+	size_t sz, new_sz;
+
+	new_sz = ts->sz + STACK_GROWTH;
+	sz = new_sz * sizeof(struct thread_stack_entry);
+
+	new_stack = realloc(ts->stack, sz);
+	if (!new_stack)
+		return -ENOMEM;
+
+	ts->stack = new_stack;
+	ts->sz = new_sz;
+
+	return 0;
+}
+
+static struct thread_stack *thread_stack__new(struct thread *thread,
+					      struct call_return_processor *crp)
+{
+	struct thread_stack *ts;
+
+	ts = zalloc(sizeof(struct thread_stack));
+	if (!ts)
+		return NULL;
+
+	if (thread_stack__grow(ts)) {
+		free(ts);
+		return NULL;
+	}
+
+	if (thread->mg && thread->mg->machine)
+		ts->kernel_start = machine__kernel_start(thread->mg->machine);
+	else
+		ts->kernel_start = 1ULL << 63;
+	ts->crp = crp;
+
+	return ts;
+}
+
+static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
+{
+	int err = 0;
+
+	if (ts->cnt == ts->sz) {
+		err = thread_stack__grow(ts);
+		if (err) {
+			pr_warning("Out of memory: discarding thread stack\n");
+			ts->cnt = 0;
+		}
+	}
+
+	ts->stack[ts->cnt++].ret_addr = ret_addr;
+
+	return err;
+}
+
+static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
+{
+	size_t i;
+
+	/*
+	 * In some cases there may be functions which are not seen to return.
+	 * For example when setjmp / longjmp has been used.  Or the perf context
+	 * switch in the kernel which doesn't stop and start tracing in exactly
+	 * the same code path.  When that happens the return address will be
+	 * further down the stack.  If the return address is not found at all,
+	 * we assume the opposite (i.e. this is a return for a call that wasn't
+	 * seen for some reason) and leave the stack alone.
+	 */
+	for (i = ts->cnt; i; ) {
+		if (ts->stack[--i].ret_addr == ret_addr) {
+			ts->cnt = i;
+			return;
+		}
+	}
+}
+
+static bool thread_stack__in_kernel(struct thread_stack *ts)
+{
+	if (!ts->cnt)
+		return false;
+
+	return ts->stack[ts->cnt - 1].cp->in_kernel;
+}
+
+static int thread_stack__call_return(struct thread *thread,
+				     struct thread_stack *ts, size_t idx,
+				     u64 timestamp, u64 ref, bool no_return)
+{
+	struct call_return_processor *crp = ts->crp;
+	struct thread_stack_entry *tse;
+	struct call_return cr = {
+		.thread = thread,
+		.comm = ts->comm,
+		.db_id = 0,
+	};
+
+	tse = &ts->stack[idx];
+	cr.cp = tse->cp;
+	cr.call_time = tse->timestamp;
+	cr.return_time = timestamp;
+	cr.branch_count = ts->branch_count - tse->branch_count;
+	cr.call_ref = tse->ref;
+	cr.return_ref = ref;
+	if (tse->no_call)
+		cr.flags |= CALL_RETURN_NO_CALL;
+	if (no_return)
+		cr.flags |= CALL_RETURN_NO_RETURN;
+
+	return crp->process(&cr, crp->data);
+}
+
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+{
+	struct call_return_processor *crp = ts->crp;
+	int err;
+
+	if (!crp) {
+		ts->cnt = 0;
+		return 0;
+	}
+
+	while (ts->cnt) {
+		err = thread_stack__call_return(thread, ts, --ts->cnt,
+						ts->last_time, 0, true);
+		if (err) {
+			pr_err("Error flushing thread stack!\n");
+			ts->cnt = 0;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int thread_stack__flush(struct thread *thread)
+{
+	if (thread->ts)
+		return __thread_stack__flush(thread, thread->ts);
+
+	return 0;
+}
+
+int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+			u64 to_ip, u16 insn_len, u64 trace_nr)
+{
+	if (!thread)
+		return -EINVAL;
+
+	if (!thread->ts) {
+		thread->ts = thread_stack__new(thread, NULL);
+		if (!thread->ts) {
+			pr_warning("Out of memory: no thread stack\n");
+			return -ENOMEM;
+		}
+		thread->ts->trace_nr = trace_nr;
+	}
+
+	/*
+	 * When the trace is discontinuous, the trace_nr changes.  In that case
+	 * the stack might be completely invalid.  Better to report nothing than
+	 * to report something misleading, so flush the stack.
+	 */
+	if (trace_nr != thread->ts->trace_nr) {
+		if (thread->ts->trace_nr)
+			__thread_stack__flush(thread, thread->ts);
+		thread->ts->trace_nr = trace_nr;
+	}
+
+	/* Stop here if thread_stack__process() is in use */
+	if (thread->ts->crp)
+		return 0;
+
+	if (flags & PERF_IP_FLAG_CALL) {
+		u64 ret_addr;
+
+		if (!to_ip)
+			return 0;
+		ret_addr = from_ip + insn_len;
+		if (ret_addr == to_ip)
+			return 0; /* Zero-length calls are excluded */
+		return thread_stack__push(thread->ts, ret_addr);
+	} else if (flags & PERF_IP_FLAG_RETURN) {
+		if (!from_ip)
+			return 0;
+		thread_stack__pop(thread->ts, to_ip);
+	}
+
+	return 0;
+}
+
+void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
+{
+	if (!thread || !thread->ts)
+		return;
+
+	if (trace_nr != thread->ts->trace_nr) {
+		if (thread->ts->trace_nr)
+			__thread_stack__flush(thread, thread->ts);
+		thread->ts->trace_nr = trace_nr;
+	}
+}
+
+void thread_stack__free(struct thread *thread)
+{
+	if (thread->ts) {
+		__thread_stack__flush(thread, thread->ts);
+		zfree(&thread->ts->stack);
+		zfree(&thread->ts);
+	}
+}
+
+void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+			  size_t sz, u64 ip)
+{
+	size_t i;
+
+	if (!thread || !thread->ts)
+		chain->nr = 1;
+	else
+		chain->nr = min(sz, thread->ts->cnt + 1);
+
+	chain->ips[0] = ip;
+
+	for (i = 1; i < chain->nr; i++)
+		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+}
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+			   void *data)
+{
+	struct call_return_processor *crp;
+
+	crp = zalloc(sizeof(struct call_return_processor));
+	if (!crp)
+		return NULL;
+	crp->cpr = call_path_root__new();
+	if (!crp->cpr)
+		goto out_free;
+	crp->process = process;
+	crp->data = data;
+	return crp;
+
+out_free:
+	free(crp);
+	return NULL;
+}
+
+void call_return_processor__free(struct call_return_processor *crp)
+{
+	if (crp) {
+		call_path_root__free(crp->cpr);
+		free(crp);
+	}
+}
+
+static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
+				 u64 timestamp, u64 ref, struct call_path *cp,
+				 bool no_call)
+{
+	struct thread_stack_entry *tse;
+	int err;
+
+	if (ts->cnt == ts->sz) {
+		err = thread_stack__grow(ts);
+		if (err)
+			return err;
+	}
+
+	tse = &ts->stack[ts->cnt++];
+	tse->ret_addr = ret_addr;
+	tse->timestamp = timestamp;
+	tse->ref = ref;
+	tse->branch_count = ts->branch_count;
+	tse->cp = cp;
+	tse->no_call = no_call;
+
+	return 0;
+}
+
+static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
+				u64 ret_addr, u64 timestamp, u64 ref,
+				struct symbol *sym)
+{
+	int err;
+
+	if (!ts->cnt)
+		return 1;
+
+	if (ts->cnt == 1) {
+		struct thread_stack_entry *tse = &ts->stack[0];
+
+		if (tse->cp->sym == sym)
+			return thread_stack__call_return(thread, ts, --ts->cnt,
+							 timestamp, ref, false);
+	}
+
+	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
+		return thread_stack__call_return(thread, ts, --ts->cnt,
+						 timestamp, ref, false);
+	} else {
+		size_t i = ts->cnt - 1;
+
+		while (i--) {
+			if (ts->stack[i].ret_addr != ret_addr)
+				continue;
+			i += 1;
+			while (ts->cnt > i) {
+				err = thread_stack__call_return(thread, ts,
+								--ts->cnt,
+								timestamp, ref,
+								true);
+				if (err)
+					return err;
+			}
+			return thread_stack__call_return(thread, ts, --ts->cnt,
+							 timestamp, ref, false);
+		}
+	}
+
+	return 1;
+}
+
+static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
+				struct perf_sample *sample,
+				struct addr_location *from_al,
+				struct addr_location *to_al, u64 ref)
+{
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct call_path *cp;
+	struct symbol *sym;
+	u64 ip;
+
+	if (sample->ip) {
+		ip = sample->ip;
+		sym = from_al->sym;
+	} else if (sample->addr) {
+		ip = sample->addr;
+		sym = to_al->sym;
+	} else {
+		return 0;
+	}
+
+	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
+				ts->kernel_start);
+	if (!cp)
+		return -ENOMEM;
+
+	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
+				     true);
+}
+
+static int thread_stack__no_call_return(struct thread *thread,
+					struct thread_stack *ts,
+					struct perf_sample *sample,
+					struct addr_location *from_al,
+					struct addr_location *to_al, u64 ref)
+{
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct call_path *cp, *parent;
+	u64 ks = ts->kernel_start;
+	int err;
+
+	if (sample->ip >= ks && sample->addr < ks) {
+		/* Return to userspace, so pop all kernel addresses */
+		while (thread_stack__in_kernel(ts)) {
+			err = thread_stack__call_return(thread, ts, --ts->cnt,
+							sample->time, ref,
+							true);
+			if (err)
+				return err;
+		}
+
+		/* If the stack is empty, push the userspace address */
+		if (!ts->cnt) {
+			cp = call_path__findnew(cpr, &cpr->call_path,
+						to_al->sym, sample->addr,
+						ts->kernel_start);
+			if (!cp)
+				return -ENOMEM;
+			return thread_stack__push_cp(ts, 0, sample->time, ref,
+						     cp, true);
+		}
+	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
+		/* Return to userspace, so pop all kernel addresses */
+		while (thread_stack__in_kernel(ts)) {
+			err = thread_stack__call_return(thread, ts, --ts->cnt,
+							sample->time, ref,
+							true);
+			if (err)
+				return err;
+		}
+	}
+
+	if (ts->cnt)
+		parent = ts->stack[ts->cnt - 1].cp;
+	else
+		parent = &cpr->call_path;
+
+	/* This 'return' had no 'call', so push and pop top of stack */
+	cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
+				ts->kernel_start);
+	if (!cp)
+		return -ENOMEM;
+
+	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
+				    true);
+	if (err)
+		return err;
+
+	return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
+				    to_al->sym);
+}
+
+static int thread_stack__trace_begin(struct thread *thread,
+				     struct thread_stack *ts, u64 timestamp,
+				     u64 ref)
+{
+	struct thread_stack_entry *tse;
+	int err;
+
+	if (!ts->cnt)
+		return 0;
+
+	/* Pop trace end */
+	tse = &ts->stack[ts->cnt - 1];
+	if (tse->cp->sym == NULL && tse->cp->ip == 0) {
+		err = thread_stack__call_return(thread, ts, --ts->cnt,
+						timestamp, ref, false);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int thread_stack__trace_end(struct thread_stack *ts,
+				   struct perf_sample *sample, u64 ref)
+{
+	struct call_path_root *cpr = ts->crp->cpr;
+	struct call_path *cp;
+	u64 ret_addr;
+
+	/* No point having 'trace end' on the bottom of the stack */
+	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
+		return 0;
+
+	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
+				ts->kernel_start);
+	if (!cp)
+		return -ENOMEM;
+
+	ret_addr = sample->ip + sample->insn_len;
+
+	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
+				     false);
+}
+
+int thread_stack__process(struct thread *thread, struct comm *comm,
+			  struct perf_sample *sample,
+			  struct addr_location *from_al,
+			  struct addr_location *to_al, u64 ref,
+			  struct call_return_processor *crp)
+{
+	struct thread_stack *ts = thread->ts;
+	int err = 0;
+
+	if (ts) {
+		if (!ts->crp) {
+			/* Supersede thread_stack__event() */
+			thread_stack__free(thread);
+			thread->ts = thread_stack__new(thread, crp);
+			if (!thread->ts)
+				return -ENOMEM;
+			ts = thread->ts;
+			ts->comm = comm;
+		}
+	} else {
+		thread->ts = thread_stack__new(thread, crp);
+		if (!thread->ts)
+			return -ENOMEM;
+		ts = thread->ts;
+		ts->comm = comm;
+	}
+
+	/* Flush stack on exec */
+	if (ts->comm != comm && thread->pid_ == thread->tid) {
+		err = __thread_stack__flush(thread, ts);
+		if (err)
+			return err;
+		ts->comm = comm;
+	}
+
+	/* If the stack is empty, put the current symbol on the stack */
+	if (!ts->cnt) {
+		err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
+					   ref);
+		if (err)
+			return err;
+	}
+
+	ts->branch_count += 1;
+	ts->last_time = sample->time;
+
+	if (sample->flags & PERF_IP_FLAG_CALL) {
+		struct call_path_root *cpr = ts->crp->cpr;
+		struct call_path *cp;
+		u64 ret_addr;
+
+		if (!sample->ip || !sample->addr)
+			return 0;
+
+		ret_addr = sample->ip + sample->insn_len;
+		if (ret_addr == sample->addr)
+			return 0; /* Zero-length calls are excluded */
+
+		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+					to_al->sym, sample->addr,
+					ts->kernel_start);
+		if (!cp)
+			return -ENOMEM;
+		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
+					    cp, false);
+	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
+		if (!sample->ip || !sample->addr)
+			return 0;
+
+		err = thread_stack__pop_cp(thread, ts, sample->addr,
+					   sample->time, ref, from_al->sym);
+		if (err) {
+			if (err < 0)
+				return err;
+			err = thread_stack__no_call_return(thread, ts, sample,
+							   from_al, to_al, ref);
+		}
+	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
+	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
+		err = thread_stack__trace_end(ts, sample, ref);
+	}
+
+	return err;
+}
+
+size_t thread_stack__depth(struct thread *thread)
+{
+	if (!thread->ts)
+		return 0;
+	return thread->ts->cnt;
+}
diff --git a/util/thread-stack.h b/util/thread-stack.h
new file mode 100644
index 0000000..b7e41c4
--- /dev/null
+++ b/util/thread-stack.h
@@ -0,0 +1,102 @@
+/*
+ * thread-stack.h: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_THREAD_STACK_H
+#define __PERF_THREAD_STACK_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+struct thread;
+struct comm;
+struct ip_callchain;
+struct symbol;
+struct dso;
+struct comm;
+struct perf_sample;
+struct addr_location;
+struct call_path;
+
+/*
+ * Call/Return flags.
+ *
+ * CALL_RETURN_NO_CALL: 'return' but no matching 'call'
+ * CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
+ */
+enum {
+	CALL_RETURN_NO_CALL	= 1 << 0,
+	CALL_RETURN_NO_RETURN	= 1 << 1,
+};
+
+/**
+ * struct call_return - paired call/return information.
+ * @thread: thread in which call/return occurred
+ * @comm: comm in which call/return occurred
+ * @cp: call path
+ * @call_time: timestamp of call (if known)
+ * @return_time: timestamp of return (if known)
+ * @branch_count: number of branches seen between call and return
+ * @call_ref: external reference to 'call' sample (e.g. db_id)
+ * @return_ref:  external reference to 'return' sample (e.g. db_id)
+ * @db_id: id used for db-export
+ * @flags: Call/Return flags
+ */
+struct call_return {
+	struct thread *thread;
+	struct comm *comm;
+	struct call_path *cp;
+	u64 call_time;
+	u64 return_time;
+	u64 branch_count;
+	u64 call_ref;
+	u64 return_ref;
+	u64 db_id;
+	u32 flags;
+};
+
+/**
+ * struct call_return_processor - provides a call-back to consume call-return
+ *                                information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
+ */
+struct call_return_processor {
+	struct call_path_root *cpr;
+	int (*process)(struct call_return *cr, void *data);
+	void *data;
+};
+
+int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+			u64 to_ip, u16 insn_len, u64 trace_nr);
+void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
+void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+			  size_t sz, u64 ip);
+int thread_stack__flush(struct thread *thread);
+void thread_stack__free(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread);
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+			   void *data);
+void call_return_processor__free(struct call_return_processor *crp);
+int thread_stack__process(struct thread *thread, struct comm *comm,
+			  struct perf_sample *sample,
+			  struct addr_location *from_al,
+			  struct addr_location *to_al, u64 ref,
+			  struct call_return_processor *crp);
+
+#endif
diff --git a/util/thread.c b/util/thread.c
new file mode 100644
index 0000000..68b65b1
--- /dev/null
+++ b/util/thread.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../perf.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include "session.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "util.h"
+#include "debug.h"
+#include "namespaces.h"
+#include "comm.h"
+#include "unwind.h"
+
+#include <api/fs/fs.h>
+
+int thread__init_map_groups(struct thread *thread, struct machine *machine)
+{
+	pid_t pid = thread->pid_;
+
+	if (pid == thread->tid || pid == -1) {
+		thread->mg = map_groups__new(machine);
+	} else {
+		struct thread *leader = __machine__findnew_thread(machine, pid, pid);
+		if (leader) {
+			thread->mg = map_groups__get(leader->mg);
+			thread__put(leader);
+		}
+	}
+
+	return thread->mg ? 0 : -1;
+}
+
+struct thread *thread__new(pid_t pid, pid_t tid)
+{
+	char *comm_str;
+	struct comm *comm;
+	struct thread *thread = zalloc(sizeof(*thread));
+
+	if (thread != NULL) {
+		thread->pid_ = pid;
+		thread->tid = tid;
+		thread->ppid = -1;
+		thread->cpu = -1;
+		INIT_LIST_HEAD(&thread->namespaces_list);
+		INIT_LIST_HEAD(&thread->comm_list);
+		init_rwsem(&thread->namespaces_lock);
+		init_rwsem(&thread->comm_lock);
+
+		comm_str = malloc(32);
+		if (!comm_str)
+			goto err_thread;
+
+		snprintf(comm_str, 32, ":%d", tid);
+		comm = comm__new(comm_str, 0, false);
+		free(comm_str);
+		if (!comm)
+			goto err_thread;
+
+		list_add(&comm->list, &thread->comm_list);
+		refcount_set(&thread->refcnt, 1);
+		RB_CLEAR_NODE(&thread->rb_node);
+		/* Thread holds first ref to nsdata. */
+		thread->nsinfo = nsinfo__new(pid);
+	}
+
+	return thread;
+
+err_thread:
+	free(thread);
+	return NULL;
+}
+
+void thread__delete(struct thread *thread)
+{
+	struct namespaces *namespaces, *tmp_namespaces;
+	struct comm *comm, *tmp_comm;
+
+	BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
+
+	thread_stack__free(thread);
+
+	if (thread->mg) {
+		map_groups__put(thread->mg);
+		thread->mg = NULL;
+	}
+	down_write(&thread->namespaces_lock);
+	list_for_each_entry_safe(namespaces, tmp_namespaces,
+				 &thread->namespaces_list, list) {
+		list_del(&namespaces->list);
+		namespaces__free(namespaces);
+	}
+	up_write(&thread->namespaces_lock);
+
+	down_write(&thread->comm_lock);
+	list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
+		list_del(&comm->list);
+		comm__free(comm);
+	}
+	up_write(&thread->comm_lock);
+
+	unwind__finish_access(thread);
+	nsinfo__zput(thread->nsinfo);
+
+	exit_rwsem(&thread->namespaces_lock);
+	exit_rwsem(&thread->comm_lock);
+	free(thread);
+}
+
+struct thread *thread__get(struct thread *thread)
+{
+	if (thread)
+		refcount_inc(&thread->refcnt);
+	return thread;
+}
+
+void thread__put(struct thread *thread)
+{
+	if (thread && refcount_dec_and_test(&thread->refcnt)) {
+		/*
+		 * Remove it from the dead_threads list, as last reference
+		 * is gone.
+		 */
+		list_del_init(&thread->node);
+		thread__delete(thread);
+	}
+}
+
+struct namespaces *thread__namespaces(const struct thread *thread)
+{
+	if (list_empty(&thread->namespaces_list))
+		return NULL;
+
+	return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+}
+
+static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
+				    struct namespaces_event *event)
+{
+	struct namespaces *new, *curr = thread__namespaces(thread);
+
+	new = namespaces__new(event);
+	if (!new)
+		return -ENOMEM;
+
+	list_add(&new->list, &thread->namespaces_list);
+
+	if (timestamp && curr) {
+		/*
+		 * setns syscall must have changed few or all the namespaces
+		 * of this thread. Update end time for the namespaces
+		 * previously used.
+		 */
+		curr = list_next_entry(new, list);
+		curr->end_time = timestamp;
+	}
+
+	return 0;
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct namespaces_event *event)
+{
+	int ret;
+
+	down_write(&thread->namespaces_lock);
+	ret = __thread__set_namespaces(thread, timestamp, event);
+	up_write(&thread->namespaces_lock);
+	return ret;
+}
+
+struct comm *thread__comm(const struct thread *thread)
+{
+	if (list_empty(&thread->comm_list))
+		return NULL;
+
+	return list_first_entry(&thread->comm_list, struct comm, list);
+}
+
+struct comm *thread__exec_comm(const struct thread *thread)
+{
+	struct comm *comm, *last = NULL;
+
+	list_for_each_entry(comm, &thread->comm_list, list) {
+		if (comm->exec)
+			return comm;
+		last = comm;
+	}
+
+	return last;
+}
+
+static int ____thread__set_comm(struct thread *thread, const char *str,
+				u64 timestamp, bool exec)
+{
+	struct comm *new, *curr = thread__comm(thread);
+
+	/* Override the default :tid entry */
+	if (!thread->comm_set) {
+		int err = comm__override(curr, str, timestamp, exec);
+		if (err)
+			return err;
+	} else {
+		new = comm__new(str, timestamp, exec);
+		if (!new)
+			return -ENOMEM;
+		list_add(&new->list, &thread->comm_list);
+
+		if (exec)
+			unwind__flush_access(thread);
+	}
+
+	thread->comm_set = true;
+
+	return 0;
+}
+
+int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
+		       bool exec)
+{
+	int ret;
+
+	down_write(&thread->comm_lock);
+	ret = ____thread__set_comm(thread, str, timestamp, exec);
+	up_write(&thread->comm_lock);
+	return ret;
+}
+
+int thread__set_comm_from_proc(struct thread *thread)
+{
+	char path[64];
+	char *comm = NULL;
+	size_t sz;
+	int err = -1;
+
+	if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+		       thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+	    procfs__read_str(path, &comm, &sz) == 0) {
+		comm[sz - 1] = '\0';
+		err = thread__set_comm(thread, comm, 0);
+	}
+
+	return err;
+}
+
+static const char *__thread__comm_str(const struct thread *thread)
+{
+	const struct comm *comm = thread__comm(thread);
+
+	if (!comm)
+		return NULL;
+
+	return comm__str(comm);
+}
+
+const char *thread__comm_str(const struct thread *thread)
+{
+	const char *str;
+
+	down_read((struct rw_semaphore *)&thread->comm_lock);
+	str = __thread__comm_str(thread);
+	up_read((struct rw_semaphore *)&thread->comm_lock);
+
+	return str;
+}
+
+/* CHECKME: it should probably better return the max comm len from its comm list */
+int thread__comm_len(struct thread *thread)
+{
+	if (!thread->comm_len) {
+		const char *comm = thread__comm_str(thread);
+		if (!comm)
+			return 0;
+		thread->comm_len = strlen(comm);
+	}
+
+	return thread->comm_len;
+}
+
+size_t thread__fprintf(struct thread *thread, FILE *fp)
+{
+	return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) +
+	       map_groups__fprintf(thread->mg, fp);
+}
+
+int thread__insert_map(struct thread *thread, struct map *map)
+{
+	int ret;
+
+	ret = unwind__prepare_access(thread, map, NULL);
+	if (ret)
+		return ret;
+
+	map_groups__fixup_overlappings(thread->mg, map, stderr);
+	map_groups__insert(thread->mg, map);
+
+	return 0;
+}
+
+static int __thread__prepare_access(struct thread *thread)
+{
+	bool initialized = false;
+	int i, err = 0;
+
+	for (i = 0; i < MAP__NR_TYPES; ++i) {
+		struct maps *maps = &thread->mg->maps[i];
+		struct map *map;
+
+		down_read(&maps->lock);
+
+		for (map = maps__first(maps); map; map = map__next(map)) {
+			err = unwind__prepare_access(thread, map, &initialized);
+			if (err || initialized)
+				break;
+		}
+
+		up_read(&maps->lock);
+	}
+
+	return err;
+}
+
+static int thread__prepare_access(struct thread *thread)
+{
+	int err = 0;
+
+	if (symbol_conf.use_callchain)
+		err = __thread__prepare_access(thread);
+
+	return err;
+}
+
+static int thread__clone_map_groups(struct thread *thread,
+				    struct thread *parent)
+{
+	int i;
+
+	/* This is new thread, we share map groups for process. */
+	if (thread->pid_ == parent->pid_)
+		return thread__prepare_access(thread);
+
+	if (thread->mg == parent->mg) {
+		pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
+			 thread->pid_, thread->tid, parent->pid_, parent->tid);
+		return 0;
+	}
+
+	/* But this one is new process, copy maps. */
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		if (map_groups__clone(thread, parent->mg, i) < 0)
+			return -ENOMEM;
+
+	return 0;
+}
+
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+{
+	if (parent->comm_set) {
+		const char *comm = thread__comm_str(parent);
+		int err;
+		if (!comm)
+			return -ENOMEM;
+		err = thread__set_comm(thread, comm, timestamp);
+		if (err)
+			return err;
+	}
+
+	thread->ppid = parent->tid;
+	return thread__clone_map_groups(thread, parent);
+}
+
+void thread__find_cpumode_addr_location(struct thread *thread,
+					enum map_type type, u64 addr,
+					struct addr_location *al)
+{
+	size_t i;
+	const u8 cpumodes[] = {
+		PERF_RECORD_MISC_USER,
+		PERF_RECORD_MISC_KERNEL,
+		PERF_RECORD_MISC_GUEST_USER,
+		PERF_RECORD_MISC_GUEST_KERNEL
+	};
+
+	for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
+		thread__find_addr_location(thread, cpumodes[i], type, addr, al);
+		if (al->map)
+			break;
+	}
+}
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+	if (thread->pid_ == thread->tid)
+		return thread__get(thread);
+
+	if (thread->pid_ == -1)
+		return NULL;
+
+	return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
diff --git a/util/thread.h b/util/thread.h
new file mode 100644
index 0000000..14d44c3
--- /dev/null
+++ b/util/thread.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_H
+#define __PERF_THREAD_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "symbol.h"
+#include <strlist.h>
+#include <intlist.h>
+#include "rwsem.h"
+
+struct thread_stack;
+struct unwind_libunwind_ops;
+
+struct thread {
+	union {
+		struct rb_node	 rb_node;
+		struct list_head node;
+	};
+	struct map_groups	*mg;
+	pid_t			pid_; /* Not all tools update this */
+	pid_t			tid;
+	pid_t			ppid;
+	int			cpu;
+	refcount_t		refcnt;
+	bool			comm_set;
+	int			comm_len;
+	bool			dead; /* if set thread has exited */
+	struct list_head	namespaces_list;
+	struct rw_semaphore	namespaces_lock;
+	struct list_head	comm_list;
+	struct rw_semaphore	comm_lock;
+	u64			db_id;
+
+	void			*priv;
+	struct thread_stack	*ts;
+	struct nsinfo		*nsinfo;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+	void				*addr_space;
+	struct unwind_libunwind_ops	*unwind_libunwind_ops;
+#endif
+};
+
+struct machine;
+struct namespaces;
+struct comm;
+
+struct thread *thread__new(pid_t pid, pid_t tid);
+int thread__init_map_groups(struct thread *thread, struct machine *machine);
+void thread__delete(struct thread *thread);
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+	thread__put(*thread);
+	*thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
+static inline void thread__exited(struct thread *thread)
+{
+	thread->dead = true;
+}
+
+struct namespaces *thread__namespaces(const struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+			   struct namespaces_event *event);
+
+int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
+		       bool exec);
+static inline int thread__set_comm(struct thread *thread, const char *comm,
+				   u64 timestamp)
+{
+	return __thread__set_comm(thread, comm, timestamp, false);
+}
+
+int thread__set_comm_from_proc(struct thread *thread);
+
+int thread__comm_len(struct thread *thread);
+struct comm *thread__comm(const struct thread *thread);
+struct comm *thread__exec_comm(const struct thread *thread);
+const char *thread__comm_str(const struct thread *thread);
+int thread__insert_map(struct thread *thread, struct map *map);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+size_t thread__fprintf(struct thread *thread, FILE *fp);
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
+void thread__find_addr_map(struct thread *thread,
+			   u8 cpumode, enum map_type type, u64 addr,
+			   struct addr_location *al);
+
+void thread__find_addr_location(struct thread *thread,
+				u8 cpumode, enum map_type type, u64 addr,
+				struct addr_location *al);
+
+void thread__find_cpumode_addr_location(struct thread *thread,
+					enum map_type type, u64 addr,
+					struct addr_location *al);
+
+static inline void *thread__priv(struct thread *thread)
+{
+	return thread->priv;
+}
+
+static inline void thread__set_priv(struct thread *thread, void *p)
+{
+	thread->priv = p;
+}
+
+static inline bool thread__is_filtered(struct thread *thread)
+{
+	if (symbol_conf.comm_list &&
+	    !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) {
+		return true;
+	}
+
+	if (symbol_conf.pid_list &&
+	    !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) {
+		return true;
+	}
+
+	if (symbol_conf.tid_list &&
+	    !intlist__has_entry(symbol_conf.tid_list, thread->tid)) {
+		return true;
+	}
+
+	return false;
+}
+
+#endif	/* __PERF_THREAD_H */
diff --git a/util/thread_map.c b/util/thread_map.c
new file mode 100644
index 0000000..5d467d8
--- /dev/null
+++ b/util/thread_map.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "string2.h"
+#include "strlist.h"
+#include <string.h>
+#include <api/fs/fs.h>
+#include "asm/bug.h"
+#include "thread_map.h"
+#include "util.h"
+#include "debug.h"
+#include "event.h"
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+	if (dir->d_name[0] == '.')
+		return 0;
+	else
+		return 1;
+}
+
+static void thread_map__reset(struct thread_map *map, int start, int nr)
+{
+	size_t size = (nr - start) * sizeof(map->map[0]);
+
+	memset(&map->map[start], 0, size);
+	map->err_thread = -1;
+}
+
+static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
+{
+	size_t size = sizeof(*map) + sizeof(map->map[0]) * nr;
+	int start = map ? map->nr : 0;
+
+	map = realloc(map, size);
+	/*
+	 * We only realloc to add more items, let's reset new items.
+	 */
+	if (map)
+		thread_map__reset(map, start, nr);
+
+	return map;
+}
+
+#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
+
+struct thread_map *thread_map__new_by_pid(pid_t pid)
+{
+	struct thread_map *threads;
+	char name[256];
+	int items;
+	struct dirent **namelist = NULL;
+	int i;
+
+	sprintf(name, "/proc/%d/task", pid);
+	items = scandir(name, &namelist, filter, NULL);
+	if (items <= 0)
+		return NULL;
+
+	threads = thread_map__alloc(items);
+	if (threads != NULL) {
+		for (i = 0; i < items; i++)
+			thread_map__set_pid(threads, i, atoi(namelist[i]->d_name));
+		threads->nr = items;
+		refcount_set(&threads->refcnt, 1);
+	}
+
+	for (i=0; i<items; i++)
+		zfree(&namelist[i]);
+	free(namelist);
+
+	return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+	struct thread_map *threads = thread_map__alloc(1);
+
+	if (threads != NULL) {
+		thread_map__set_pid(threads, 0, tid);
+		threads->nr = 1;
+		refcount_set(&threads->refcnt, 1);
+	}
+
+	return threads;
+}
+
+static struct thread_map *__thread_map__new_all_cpus(uid_t uid)
+{
+	DIR *proc;
+	int max_threads = 32, items, i;
+	char path[NAME_MAX + 1 + 6];
+	struct dirent *dirent, **namelist = NULL;
+	struct thread_map *threads = thread_map__alloc(max_threads);
+
+	if (threads == NULL)
+		goto out;
+
+	proc = opendir("/proc");
+	if (proc == NULL)
+		goto out_free_threads;
+
+	threads->nr = 0;
+	refcount_set(&threads->refcnt, 1);
+
+	while ((dirent = readdir(proc)) != NULL) {
+		char *end;
+		bool grow = false;
+		pid_t pid = strtol(dirent->d_name, &end, 10);
+
+		if (*end) /* only interested in proper numerical dirents */
+			continue;
+
+		snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
+
+		if (uid != UINT_MAX) {
+			struct stat st;
+
+			if (stat(path, &st) != 0 || st.st_uid != uid)
+				continue;
+		}
+
+		snprintf(path, sizeof(path), "/proc/%d/task", pid);
+		items = scandir(path, &namelist, filter, NULL);
+		if (items <= 0)
+			goto out_free_closedir;
+
+		while (threads->nr + items >= max_threads) {
+			max_threads *= 2;
+			grow = true;
+		}
+
+		if (grow) {
+			struct thread_map *tmp;
+
+			tmp = thread_map__realloc(threads, max_threads);
+			if (tmp == NULL)
+				goto out_free_namelist;
+
+			threads = tmp;
+		}
+
+		for (i = 0; i < items; i++) {
+			thread_map__set_pid(threads, threads->nr + i,
+					    atoi(namelist[i]->d_name));
+		}
+
+		for (i = 0; i < items; i++)
+			zfree(&namelist[i]);
+		free(namelist);
+
+		threads->nr += items;
+	}
+
+out_closedir:
+	closedir(proc);
+out:
+	return threads;
+
+out_free_threads:
+	free(threads);
+	return NULL;
+
+out_free_namelist:
+	for (i = 0; i < items; i++)
+		zfree(&namelist[i]);
+	free(namelist);
+
+out_free_closedir:
+	zfree(&threads);
+	goto out_closedir;
+}
+
+struct thread_map *thread_map__new_all_cpus(void)
+{
+	return __thread_map__new_all_cpus(UINT_MAX);
+}
+
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+	return __thread_map__new_all_cpus(uid);
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
+{
+	if (pid != -1)
+		return thread_map__new_by_pid(pid);
+
+	if (tid == -1 && uid != UINT_MAX)
+		return thread_map__new_by_uid(uid);
+
+	return thread_map__new_by_tid(tid);
+}
+
+static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+	struct thread_map *threads = NULL, *nt;
+	char name[256];
+	int items, total_tasks = 0;
+	struct dirent **namelist = NULL;
+	int i, j = 0;
+	pid_t pid, prev_pid = INT_MAX;
+	char *end_ptr;
+	struct str_node *pos;
+	struct strlist_config slist_config = { .dont_dupstr = true, };
+	struct strlist *slist = strlist__new(pid_str, &slist_config);
+
+	if (!slist)
+		return NULL;
+
+	strlist__for_each_entry(pos, slist) {
+		pid = strtol(pos->s, &end_ptr, 10);
+
+		if (pid == INT_MIN || pid == INT_MAX ||
+		    (*end_ptr != '\0' && *end_ptr != ','))
+			goto out_free_threads;
+
+		if (pid == prev_pid)
+			continue;
+
+		sprintf(name, "/proc/%d/task", pid);
+		items = scandir(name, &namelist, filter, NULL);
+		if (items <= 0)
+			goto out_free_threads;
+
+		total_tasks += items;
+		nt = thread_map__realloc(threads, total_tasks);
+		if (nt == NULL)
+			goto out_free_namelist;
+
+		threads = nt;
+
+		for (i = 0; i < items; i++) {
+			thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name));
+			zfree(&namelist[i]);
+		}
+		threads->nr = total_tasks;
+		free(namelist);
+	}
+
+out:
+	strlist__delete(slist);
+	if (threads)
+		refcount_set(&threads->refcnt, 1);
+	return threads;
+
+out_free_namelist:
+	for (i = 0; i < items; i++)
+		zfree(&namelist[i]);
+	free(namelist);
+
+out_free_threads:
+	zfree(&threads);
+	goto out;
+}
+
+struct thread_map *thread_map__new_dummy(void)
+{
+	struct thread_map *threads = thread_map__alloc(1);
+
+	if (threads != NULL) {
+		thread_map__set_pid(threads, 0, -1);
+		threads->nr = 1;
+		refcount_set(&threads->refcnt, 1);
+	}
+	return threads;
+}
+
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+	struct thread_map *threads = NULL, *nt;
+	int ntasks = 0;
+	pid_t tid, prev_tid = INT_MAX;
+	char *end_ptr;
+	struct str_node *pos;
+	struct strlist_config slist_config = { .dont_dupstr = true, };
+	struct strlist *slist;
+
+	/* perf-stat expects threads to be generated even if tid not given */
+	if (!tid_str)
+		return thread_map__new_dummy();
+
+	slist = strlist__new(tid_str, &slist_config);
+	if (!slist)
+		return NULL;
+
+	strlist__for_each_entry(pos, slist) {
+		tid = strtol(pos->s, &end_ptr, 10);
+
+		if (tid == INT_MIN || tid == INT_MAX ||
+		    (*end_ptr != '\0' && *end_ptr != ','))
+			goto out_free_threads;
+
+		if (tid == prev_tid)
+			continue;
+
+		ntasks++;
+		nt = thread_map__realloc(threads, ntasks);
+
+		if (nt == NULL)
+			goto out_free_threads;
+
+		threads = nt;
+		thread_map__set_pid(threads, ntasks - 1, tid);
+		threads->nr = ntasks;
+	}
+out:
+	if (threads)
+		refcount_set(&threads->refcnt, 1);
+	return threads;
+
+out_free_threads:
+	zfree(&threads);
+	strlist__delete(slist);
+	goto out;
+}
+
+struct thread_map *thread_map__new_str(const char *pid, const char *tid,
+				       uid_t uid, bool all_threads)
+{
+	if (pid)
+		return thread_map__new_by_pid_str(pid);
+
+	if (!tid && uid != UINT_MAX)
+		return thread_map__new_by_uid(uid);
+
+	if (all_threads)
+		return thread_map__new_all_cpus();
+
+	return thread_map__new_by_tid_str(tid);
+}
+
+static void thread_map__delete(struct thread_map *threads)
+{
+	if (threads) {
+		int i;
+
+		WARN_ONCE(refcount_read(&threads->refcnt) != 0,
+			  "thread map refcnt unbalanced\n");
+		for (i = 0; i < threads->nr; i++)
+			free(thread_map__comm(threads, i));
+		free(threads);
+	}
+}
+
+struct thread_map *thread_map__get(struct thread_map *map)
+{
+	if (map)
+		refcount_inc(&map->refcnt);
+	return map;
+}
+
+void thread_map__put(struct thread_map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		thread_map__delete(map);
+}
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
+{
+	int i;
+	size_t printed = fprintf(fp, "%d thread%s: ",
+				 threads->nr, threads->nr > 1 ? "s" : "");
+	for (i = 0; i < threads->nr; ++i)
+		printed += fprintf(fp, "%s%d", i ? ", " : "", thread_map__pid(threads, i));
+
+	return printed + fprintf(fp, "\n");
+}
+
+static int get_comm(char **comm, pid_t pid)
+{
+	char *path;
+	size_t size;
+	int err;
+
+	if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1)
+		return -ENOMEM;
+
+	err = filename__read_str(path, comm, &size);
+	if (!err) {
+		/*
+		 * We're reading 16 bytes, while filename__read_str
+		 * allocates data per BUFSIZ bytes, so we can safely
+		 * mark the end of the string.
+		 */
+		(*comm)[size] = 0;
+		rtrim(*comm);
+	}
+
+	free(path);
+	return err;
+}
+
+static void comm_init(struct thread_map *map, int i)
+{
+	pid_t pid = thread_map__pid(map, i);
+	char *comm = NULL;
+
+	/* dummy pid comm initialization */
+	if (pid == -1) {
+		map->map[i].comm = strdup("dummy");
+		return;
+	}
+
+	/*
+	 * The comm name is like extra bonus ;-),
+	 * so just warn if we fail for any reason.
+	 */
+	if (get_comm(&comm, pid))
+		pr_warning("Couldn't resolve comm name for pid %d\n", pid);
+
+	map->map[i].comm = comm;
+}
+
+void thread_map__read_comms(struct thread_map *threads)
+{
+	int i;
+
+	for (i = 0; i < threads->nr; ++i)
+		comm_init(threads, i);
+}
+
+static void thread_map__copy_event(struct thread_map *threads,
+				   struct thread_map_event *event)
+{
+	unsigned i;
+
+	threads->nr = (int) event->nr;
+
+	for (i = 0; i < event->nr; i++) {
+		thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+		threads->map[i].comm = strndup(event->entries[i].comm, 16);
+	}
+
+	refcount_set(&threads->refcnt, 1);
+}
+
+struct thread_map *thread_map__new_event(struct thread_map_event *event)
+{
+	struct thread_map *threads;
+
+	threads = thread_map__alloc(event->nr);
+	if (threads)
+		thread_map__copy_event(threads, event);
+
+	return threads;
+}
+
+bool thread_map__has(struct thread_map *threads, pid_t pid)
+{
+	int i;
+
+	for (i = 0; i < threads->nr; ++i) {
+		if (threads->map[i].pid == pid)
+			return true;
+	}
+
+	return false;
+}
+
+int thread_map__remove(struct thread_map *threads, int idx)
+{
+	int i;
+
+	if (threads->nr < 1)
+		return -EINVAL;
+
+	if (idx >= threads->nr)
+		return -EINVAL;
+
+	/*
+	 * Free the 'idx' item and shift the rest up.
+	 */
+	free(threads->map[idx].comm);
+
+	for (i = idx; i < threads->nr - 1; i++)
+		threads->map[i] = threads->map[i + 1];
+
+	threads->nr--;
+	return 0;
+}
diff --git a/util/thread_map.h b/util/thread_map.h
new file mode 100644
index 0000000..2f689c9
--- /dev/null
+++ b/util/thread_map.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <linux/refcount.h>
+
+struct thread_map_data {
+	pid_t    pid;
+	char	*comm;
+};
+
+struct thread_map {
+	refcount_t refcnt;
+	int nr;
+	int err_thread;
+	struct thread_map_data map[];
+};
+
+struct thread_map_event;
+
+struct thread_map *thread_map__new_dummy(void);
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new_all_cpus(void);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__new_event(struct thread_map_event *event);
+
+struct thread_map *thread_map__get(struct thread_map *map);
+void thread_map__put(struct thread_map *map);
+
+struct thread_map *thread_map__new_str(const char *pid,
+		const char *tid, uid_t uid, bool all_threads);
+
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
+
+static inline int thread_map__nr(struct thread_map *threads)
+{
+	return threads ? threads->nr : 1;
+}
+
+static inline pid_t thread_map__pid(struct thread_map *map, int thread)
+{
+	return map->map[thread].pid;
+}
+
+static inline void
+thread_map__set_pid(struct thread_map *map, int thread, pid_t pid)
+{
+	map->map[thread].pid = pid;
+}
+
+static inline char *thread_map__comm(struct thread_map *map, int thread)
+{
+	return map->map[thread].comm;
+}
+
+void thread_map__read_comms(struct thread_map *threads);
+bool thread_map__has(struct thread_map *threads, pid_t pid);
+int thread_map__remove(struct thread_map *threads, int idx);
+#endif	/* __PERF_THREAD_MAP_H */
diff --git a/util/time-utils.c b/util/time-utils.c
new file mode 100644
index 0000000..6193b46
--- /dev/null
+++ b/util/time-utils.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <linux/time64.h>
+#include <time.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+
+#include "perf.h"
+#include "debug.h"
+#include "time-utils.h"
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+	u64 time_sec, time_nsec;
+	char *end;
+
+	time_sec = strtoul(str, &end, 10);
+	if (*end != '.' && *end != '\0')
+		return -1;
+
+	if (*end == '.') {
+		int i;
+		char nsec_buf[10];
+
+		if (strlen(++end) > 9)
+			return -1;
+
+		strncpy(nsec_buf, end, 9);
+		nsec_buf[9] = '\0';
+
+		/* make it nsec precision */
+		for (i = strlen(nsec_buf); i < 9; i++)
+			nsec_buf[i] = '0';
+
+		time_nsec = strtoul(nsec_buf, &end, 10);
+		if (*end != '\0')
+			return -1;
+	} else
+		time_nsec = 0;
+
+	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
+	return 0;
+}
+
+static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,
+				  char *start_str, char *end_str)
+{
+	if (start_str && (*start_str != '\0') &&
+	    (parse_nsec_time(start_str, &ptime->start) != 0)) {
+		return -1;
+	}
+
+	if (end_str && (*end_str != '\0') &&
+	    (parse_nsec_time(end_str, &ptime->end) != 0)) {
+		return -1;
+	}
+
+	return 0;
+}
+
+static int split_start_end(char **start, char **end, const char *ostr, char ch)
+{
+	char *start_str, *end_str;
+	char *d, *str;
+
+	if (ostr == NULL || *ostr == '\0')
+		return 0;
+
+	/* copy original string because we need to modify it */
+	str = strdup(ostr);
+	if (str == NULL)
+		return -ENOMEM;
+
+	start_str = str;
+	d = strchr(start_str, ch);
+	if (d) {
+		*d = '\0';
+		++d;
+	}
+	end_str = d;
+
+	*start = start_str;
+	*end = end_str;
+
+	return 0;
+}
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+{
+	char *start_str = NULL, *end_str;
+	int rc;
+
+	rc = split_start_end(&start_str, &end_str, ostr, ',');
+	if (rc || !start_str)
+		return rc;
+
+	ptime->start = 0;
+	ptime->end = 0;
+
+	rc = parse_timestr_sec_nsec(ptime, start_str, end_str);
+
+	free(start_str);
+
+	/* make sure end time is after start time if it was given */
+	if (rc == 0 && ptime->end && ptime->end < ptime->start)
+		return -EINVAL;
+
+	pr_debug("start time %" PRIu64 ", ", ptime->start);
+	pr_debug("end time %" PRIu64 "\n", ptime->end);
+
+	return rc;
+}
+
+static int parse_percent(double *pcnt, char *str)
+{
+	char *c, *endptr;
+	double d;
+
+	c = strchr(str, '%');
+	if (c)
+		*c = '\0';
+	else
+		return -1;
+
+	d = strtod(str, &endptr);
+	if (endptr != str + strlen(str))
+		return -1;
+
+	*pcnt = d / 100.0;
+	return 0;
+}
+
+static int percent_slash_split(char *str, struct perf_time_interval *ptime,
+			       u64 start, u64 end)
+{
+	char *p, *end_str;
+	double pcnt, start_pcnt, end_pcnt;
+	u64 total = end - start;
+	int i;
+
+	/*
+	 * Example:
+	 * 10%/2: select the second 10% slice and the third 10% slice
+	 */
+
+	/* We can modify this string since the original one is copied */
+	p = strchr(str, '/');
+	if (!p)
+		return -1;
+
+	*p = '\0';
+	if (parse_percent(&pcnt, str) < 0)
+		return -1;
+
+	p++;
+	i = (int)strtol(p, &end_str, 10);
+	if (*end_str)
+		return -1;
+
+	if (pcnt <= 0.0)
+		return -1;
+
+	start_pcnt = pcnt * (i - 1);
+	end_pcnt = pcnt * i;
+
+	if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+	    end_pcnt < 0.0 || end_pcnt > 1.0) {
+		return -1;
+	}
+
+	ptime->start = start + round(start_pcnt * total);
+	ptime->end = start + round(end_pcnt * total);
+
+	return 0;
+}
+
+static int percent_dash_split(char *str, struct perf_time_interval *ptime,
+			      u64 start, u64 end)
+{
+	char *start_str = NULL, *end_str;
+	double start_pcnt, end_pcnt;
+	u64 total = end - start;
+	int ret;
+
+	/*
+	 * Example: 0%-10%
+	 */
+
+	ret = split_start_end(&start_str, &end_str, str, '-');
+	if (ret || !start_str)
+		return ret;
+
+	if ((parse_percent(&start_pcnt, start_str) != 0) ||
+	    (parse_percent(&end_pcnt, end_str) != 0)) {
+		free(start_str);
+		return -1;
+	}
+
+	free(start_str);
+
+	if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+	    end_pcnt < 0.0 || end_pcnt > 1.0 ||
+	    start_pcnt > end_pcnt) {
+		return -1;
+	}
+
+	ptime->start = start + round(start_pcnt * total);
+	ptime->end = start + round(end_pcnt * total);
+
+	return 0;
+}
+
+typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
+				 u64 start, u64 end);
+
+static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
+			       const char *ostr, u64 start, u64 end,
+			       time_pecent_split func)
+{
+	char *str, *p1, *p2;
+	int len, ret, i = 0;
+
+	str = strdup(ostr);
+	if (str == NULL)
+		return -ENOMEM;
+
+	len = strlen(str);
+	p1 = str;
+
+	while (p1 < str + len) {
+		if (i >= num) {
+			free(str);
+			return -1;
+		}
+
+		p2 = strchr(p1, ',');
+		if (p2)
+			*p2 = '\0';
+
+		ret = (func)(p1, &ptime_buf[i], start, end);
+		if (ret < 0) {
+			free(str);
+			return -1;
+		}
+
+		pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start);
+		pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end);
+
+		i++;
+
+		if (p2)
+			p1 = p2 + 1;
+		else
+			break;
+	}
+
+	free(str);
+	return i;
+}
+
+static int one_percent_convert(struct perf_time_interval *ptime_buf,
+			       const char *ostr, u64 start, u64 end, char *c)
+{
+	char *str;
+	int len = strlen(ostr), ret;
+
+	/*
+	 * c points to '%'.
+	 * '%' should be the last character
+	 */
+	if (ostr + len - 1 != c)
+		return -1;
+
+	/*
+	 * Construct a string like "xx%/1"
+	 */
+	str = malloc(len + 3);
+	if (str == NULL)
+		return -ENOMEM;
+
+	memcpy(str, ostr, len);
+	strcpy(str + len, "/1");
+
+	ret = percent_slash_split(str, ptime_buf, start, end);
+	if (ret == 0)
+		ret = 1;
+
+	free(str);
+	return ret;
+}
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+				 const char *ostr, u64 start, u64 end)
+{
+	char *c;
+
+	/*
+	 * ostr example:
+	 * 10%/2,10%/3: select the second 10% slice and the third 10% slice
+	 * 0%-10%,30%-40%: multiple time range
+	 * 50%: just one percent
+	 */
+
+	memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
+
+	c = strchr(ostr, '/');
+	if (c) {
+		return percent_comma_split(ptime_buf, num, ostr, start,
+					   end, percent_slash_split);
+	}
+
+	c = strchr(ostr, '-');
+	if (c) {
+		return percent_comma_split(ptime_buf, num, ostr, start,
+					   end, percent_dash_split);
+	}
+
+	c = strchr(ostr, '%');
+	if (c)
+		return one_percent_convert(ptime_buf, ostr, start, end, c);
+
+	return -1;
+}
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size)
+{
+	const char *p1, *p2;
+	int i = 1;
+	struct perf_time_interval *ptime;
+
+	/*
+	 * At least allocate one time range.
+	 */
+	if (!ostr)
+		goto alloc;
+
+	p1 = ostr;
+	while (p1 < ostr + strlen(ostr)) {
+		p2 = strchr(p1, ',');
+		if (!p2)
+			break;
+
+		p1 = p2 + 1;
+		i++;
+	}
+
+alloc:
+	*size = i;
+	ptime = calloc(i, sizeof(*ptime));
+	return ptime;
+}
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
+{
+	/* if time is not set don't drop sample */
+	if (timestamp == 0)
+		return false;
+
+	/* otherwise compare sample time to time window */
+	if ((ptime->start && timestamp < ptime->start) ||
+	    (ptime->end && timestamp > ptime->end)) {
+		return true;
+	}
+
+	return false;
+}
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+				   int num, u64 timestamp)
+{
+	struct perf_time_interval *ptime;
+	int i;
+
+	if ((timestamp == 0) || (num == 0))
+		return false;
+
+	if (num == 1)
+		return perf_time__skip_sample(&ptime_buf[0], timestamp);
+
+	/*
+	 * start/end of multiple time ranges must be valid.
+	 */
+	for (i = 0; i < num; i++) {
+		ptime = &ptime_buf[i];
+
+		if (timestamp >= ptime->start &&
+		    ((timestamp < ptime->end && i < num - 1) ||
+		     (timestamp <= ptime->end && i == num - 1))) {
+			break;
+		}
+	}
+
+	return (i == num) ? true : false;
+}
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
+{
+	u64  sec = timestamp / NSEC_PER_SEC;
+	u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC;
+
+	return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+	struct timeval tv;
+	struct tm tm;
+	char dt[32];
+
+	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+		return -1;
+
+	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+		return -1;
+
+	scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+	return 0;
+}
diff --git a/util/time-utils.h b/util/time-utils.h
new file mode 100644
index 0000000..70b177d
--- /dev/null
+++ b/util/time-utils.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TIME_UTILS_H_
+#define _TIME_UTILS_H_
+
+#include <stddef.h>
+#include <linux/types.h>
+
+struct perf_time_interval {
+	u64 start, end;
+};
+
+int parse_nsec_time(const char *str, u64 *ptime);
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+				 const char *ostr, u64 start, u64 end);
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size);
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+				   int num, u64 timestamp);
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
+
+int fetch_current_timestamp(char *buf, size_t sz);
+
+#endif
diff --git a/util/tool.h b/util/tool.h
new file mode 100644
index 0000000..183c914
--- /dev/null
+++ b/util/tool.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOOL_H
+#define __PERF_TOOL_H
+
+#include <stdbool.h>
+
+#include <linux/types.h>
+
+struct perf_session;
+union perf_event;
+struct perf_evlist;
+struct perf_evsel;
+struct perf_sample;
+struct perf_tool;
+struct machine;
+struct ordered_events;
+
+typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+			    struct perf_sample *sample,
+			    struct perf_evsel *evsel, struct machine *machine);
+
+typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+			struct perf_sample *sample, struct machine *machine);
+
+typedef int (*event_attr_op)(struct perf_tool *tool,
+			     union perf_event *event,
+			     struct perf_evlist **pevlist);
+
+typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
+			 struct perf_session *session);
+
+typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+			struct ordered_events *oe);
+
+typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
+			 struct perf_session *session);
+
+enum show_feature_header {
+	SHOW_FEAT_NO_HEADER = 0,
+	SHOW_FEAT_HEADER,
+	SHOW_FEAT_HEADER_FULL_INFO,
+};
+
+struct perf_tool {
+	event_sample	sample,
+			read;
+	event_op	mmap,
+			mmap2,
+			comm,
+			namespaces,
+			fork,
+			exit,
+			lost,
+			lost_samples,
+			aux,
+			itrace_start,
+			context_switch,
+			throttle,
+			unthrottle;
+	event_attr_op	attr;
+	event_attr_op	event_update;
+	event_op2	tracing_data;
+	event_oe	finished_round;
+	event_op2	build_id,
+			id_index,
+			auxtrace_info,
+			auxtrace_error,
+			time_conv,
+			thread_map,
+			cpu_map,
+			stat_config,
+			stat,
+			stat_round,
+			feature;
+	event_op3	auxtrace;
+	bool		ordered_events;
+	bool		ordering_requires_timestamps;
+	bool		namespace_events;
+	bool		no_warn;
+	enum show_feature_header show_feat_hdr;
+};
+
+#endif /* __PERF_TOOL_H */
diff --git a/util/top.c b/util/top.c
new file mode 100644
index 0000000..8e517de
--- /dev/null
+++ b/util/top.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Refactored from builtin-top.c, see that files for further copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "cpumap.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "symbol.h"
+#include "top.h"
+#include <inttypes.h>
+
+#define SNPRINTF(buf, size, fmt, args...) \
+({ \
+	size_t r = snprintf(buf, size, fmt, ## args); \
+	r > size ?  size : r; \
+})
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
+{
+	float samples_per_sec;
+	float ksamples_per_sec;
+	float esamples_percent;
+	struct record_opts *opts = &top->record_opts;
+	struct target *target = &opts->target;
+	size_t ret = 0;
+
+	if (top->samples) {
+		samples_per_sec = top->samples / top->delay_secs;
+		ksamples_per_sec = top->kernel_samples / top->delay_secs;
+		esamples_percent = (100.0 * top->exact_samples) / top->samples;
+	} else {
+		samples_per_sec = ksamples_per_sec = esamples_percent = 0.0;
+	}
+
+	if (!perf_guest) {
+		float ksamples_percent = 0.0;
+
+		if (samples_per_sec)
+			ksamples_percent = (100.0 * ksamples_per_sec) /
+							samples_per_sec;
+		ret = SNPRINTF(bf, size,
+			       "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%%"
+			       "  exact: %4.1f%% [", samples_per_sec,
+			       ksamples_percent, esamples_percent);
+	} else {
+		float us_samples_per_sec = top->us_samples / top->delay_secs;
+		float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
+		float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
+
+		ret = SNPRINTF(bf, size,
+			       "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% us:%4.1f%%"
+			       " guest kernel:%4.1f%% guest us:%4.1f%%"
+			       " exact: %4.1f%% [", samples_per_sec,
+			       100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+						 samples_per_sec)),
+			       100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
+						 samples_per_sec)),
+			       100.0 - (100.0 * ((samples_per_sec -
+						  guest_kernel_samples_per_sec) /
+						 samples_per_sec)),
+			       100.0 - (100.0 * ((samples_per_sec -
+						  guest_us_samples_per_sec) /
+						 samples_per_sec)),
+			       esamples_percent);
+	}
+
+	if (top->evlist->nr_entries == 1) {
+		struct perf_evsel *first = perf_evlist__first(top->evlist);
+		ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
+				(uint64_t)first->attr.sample_period,
+				opts->freq ? "Hz" : "");
+	}
+
+	ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
+
+	ret += SNPRINTF(bf + ret, size - ret, "], ");
+
+	if (target->pid)
+		ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
+				target->pid);
+	else if (target->tid)
+		ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
+				target->tid);
+	else if (target->uid_str != NULL)
+		ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+				target->uid_str);
+	else
+		ret += SNPRINTF(bf + ret, size - ret, " (all");
+
+	if (target->cpu_list)
+		ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
+				top->evlist->cpus->nr > 1 ? "s" : "",
+				target->cpu_list);
+	else {
+		if (target->tid)
+			ret += SNPRINTF(bf + ret, size - ret, ")");
+		else
+			ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
+					top->evlist->cpus->nr,
+					top->evlist->cpus->nr > 1 ? "s" : "");
+	}
+
+	return ret;
+}
+
+void perf_top__reset_sample_counters(struct perf_top *top)
+{
+	top->samples = top->us_samples = top->kernel_samples =
+	top->exact_samples = top->guest_kernel_samples =
+	top->guest_us_samples = 0;
+}
diff --git a/util/top.h b/util/top.h
new file mode 100644
index 0000000..9892323
--- /dev/null
+++ b/util/top.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOP_H
+#define __PERF_TOP_H 1
+
+#include "tool.h"
+#include <linux/types.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ioctl.h>
+
+struct perf_evlist;
+struct perf_evsel;
+struct perf_session;
+
+struct perf_top {
+	struct perf_tool   tool;
+	struct perf_evlist *evlist;
+	struct record_opts record_opts;
+	/*
+	 * Symbols will be added here in perf_event__process_sample and will
+	 * get out after decayed.
+	 */
+	u64		   samples;
+	u64		   kernel_samples, us_samples;
+	u64		   exact_samples;
+	u64		   guest_us_samples, guest_kernel_samples;
+	int		   print_entries, count_filter, delay_secs;
+	int		   max_stack;
+	bool		   hide_kernel_symbols, hide_user_symbols, zero;
+	bool		   use_tui, use_stdio;
+	bool		   vmlinux_warned;
+	bool		   dump_symtab;
+	struct hist_entry  *sym_filter_entry;
+	struct perf_evsel  *sym_evsel;
+	struct perf_session *session;
+	struct winsize	   winsize;
+	int		   realtime_prio;
+	int		   sym_pcnt_filter;
+	const char	   *sym_filter;
+	float		   min_percent;
+	unsigned int	   nr_threads_synthesize;
+};
+
+#define CONSOLE_CLEAR "[H[2J"
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+void perf_top__reset_sample_counters(struct perf_top *top);
+#endif /* __PERF_TOP_H */
diff --git a/util/trace-event-info.c b/util/trace-event-info.c
new file mode 100644
index 0000000..d7f2113
--- /dev/null
+++ b/util/trace-event-info.c
@@ -0,0 +1,617 @@
+/*
+ * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include "util.h"
+#include <dirent.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+
+#include "../perf.h"
+#include "trace-event.h"
+#include <api/fs/tracing_path.h>
+#include "evsel.h"
+#include "debug.h"
+
+#define VERSION "0.6"
+
+static int output_fd;
+
+
+int bigendian(void)
+{
+	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
+	unsigned int *ptr;
+
+	ptr = (unsigned int *)(void *)str;
+	return *ptr == 0x01020304;
+}
+
+/* unfortunately, you can not stat debugfs or proc files for size */
+static int record_file(const char *file, ssize_t hdr_sz)
+{
+	unsigned long long size = 0;
+	char buf[BUFSIZ], *sizep;
+	off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
+	int r, fd;
+	int err = -EIO;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0) {
+		pr_debug("Can't read '%s'", file);
+		return -errno;
+	}
+
+	/* put in zeros for file size, then fill true size later */
+	if (hdr_sz) {
+		if (write(output_fd, &size, hdr_sz) != hdr_sz)
+			goto out;
+	}
+
+	do {
+		r = read(fd, buf, BUFSIZ);
+		if (r > 0) {
+			size += r;
+			if (write(output_fd, buf, r) != r)
+				goto out;
+		}
+	} while (r > 0);
+
+	/* ugh, handle big-endian hdr_size == 4 */
+	sizep = (char*)&size;
+	if (bigendian())
+		sizep += sizeof(u64) - hdr_sz;
+
+	if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) {
+		pr_debug("writing file size failed\n");
+		goto out;
+	}
+
+	err = 0;
+out:
+	close(fd);
+	return err;
+}
+
+static int record_header_files(void)
+{
+	char *path;
+	struct stat st;
+	int err = -EIO;
+
+	path = get_tracing_file("events/header_page");
+	if (!path) {
+		pr_debug("can't get tracing/events/header_page");
+		return -ENOMEM;
+	}
+
+	if (stat(path, &st) < 0) {
+		pr_debug("can't read '%s'", path);
+		goto out;
+	}
+
+	if (write(output_fd, "header_page", 12) != 12) {
+		pr_debug("can't write header_page\n");
+		goto out;
+	}
+
+	if (record_file(path, 8) < 0) {
+		pr_debug("can't record header_page file\n");
+		goto out;
+	}
+
+	put_tracing_file(path);
+
+	path = get_tracing_file("events/header_event");
+	if (!path) {
+		pr_debug("can't get tracing/events/header_event");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (stat(path, &st) < 0) {
+		pr_debug("can't read '%s'", path);
+		goto out;
+	}
+
+	if (write(output_fd, "header_event", 13) != 13) {
+		pr_debug("can't write header_event\n");
+		goto out;
+	}
+
+	if (record_file(path, 8) < 0) {
+		pr_debug("can't record header_event file\n");
+		goto out;
+	}
+
+	err = 0;
+out:
+	put_tracing_file(path);
+	return err;
+}
+
+static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+	while (tps) {
+		if (!strcmp(sys, tps->name))
+			return true;
+		tps = tps->next;
+	}
+
+	return false;
+}
+
+#define for_each_event(dir, dent, tps)				\
+	while ((dent = readdir(dir)))				\
+		if (dent->d_type == DT_DIR &&			\
+		    (strcmp(dent->d_name, ".")) &&		\
+		    (strcmp(dent->d_name, "..")))		\
+
+static int copy_event_system(const char *sys, struct tracepoint_path *tps)
+{
+	struct dirent *dent;
+	struct stat st;
+	char *format;
+	DIR *dir;
+	int count = 0;
+	int ret;
+	int err;
+
+	dir = opendir(sys);
+	if (!dir) {
+		pr_debug("can't read directory '%s'", sys);
+		return -errno;
+	}
+
+	for_each_event(dir, dent, tps) {
+		if (!name_in_tp_list(dent->d_name, tps))
+			continue;
+
+		if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+		ret = stat(format, &st);
+		free(format);
+		if (ret < 0)
+			continue;
+		count++;
+	}
+
+	if (write(output_fd, &count, 4) != 4) {
+		err = -EIO;
+		pr_debug("can't write count\n");
+		goto out;
+	}
+
+	rewinddir(dir);
+	for_each_event(dir, dent, tps) {
+		if (!name_in_tp_list(dent->d_name, tps))
+			continue;
+
+		if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+		ret = stat(format, &st);
+
+		if (ret >= 0) {
+			err = record_file(format, 8);
+			if (err) {
+				free(format);
+				goto out;
+			}
+		}
+		free(format);
+	}
+	err = 0;
+out:
+	closedir(dir);
+	return err;
+}
+
+static int record_ftrace_files(struct tracepoint_path *tps)
+{
+	char *path;
+	int ret;
+
+	path = get_tracing_file("events/ftrace");
+	if (!path) {
+		pr_debug("can't get tracing/events/ftrace");
+		return -ENOMEM;
+	}
+
+	ret = copy_event_system(path, tps);
+
+	put_tracing_file(path);
+
+	return ret;
+}
+
+static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+	while (tps) {
+		if (!strcmp(sys, tps->system))
+			return true;
+		tps = tps->next;
+	}
+
+	return false;
+}
+
+static int record_event_files(struct tracepoint_path *tps)
+{
+	struct dirent *dent;
+	struct stat st;
+	char *path;
+	char *sys;
+	DIR *dir;
+	int count = 0;
+	int ret;
+	int err;
+
+	path = get_tracing_file("events");
+	if (!path) {
+		pr_debug("can't get tracing/events");
+		return -ENOMEM;
+	}
+
+	dir = opendir(path);
+	if (!dir) {
+		err = -errno;
+		pr_debug("can't read directory '%s'", path);
+		goto out;
+	}
+
+	for_each_event(dir, dent, tps) {
+		if (strcmp(dent->d_name, "ftrace") == 0 ||
+		    !system_in_tp_list(dent->d_name, tps))
+			continue;
+
+		count++;
+	}
+
+	if (write(output_fd, &count, 4) != 4) {
+		err = -EIO;
+		pr_debug("can't write count\n");
+		goto out;
+	}
+
+	rewinddir(dir);
+	for_each_event(dir, dent, tps) {
+		if (strcmp(dent->d_name, "ftrace") == 0 ||
+		    !system_in_tp_list(dent->d_name, tps))
+			continue;
+
+		if (asprintf(&sys, "%s/%s", path, dent->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+		ret = stat(sys, &st);
+		if (ret >= 0) {
+			ssize_t size = strlen(dent->d_name) + 1;
+
+			if (write(output_fd, dent->d_name, size) != size ||
+			    copy_event_system(sys, tps) < 0) {
+				err = -EIO;
+				free(sys);
+				goto out;
+			}
+		}
+		free(sys);
+	}
+	err = 0;
+out:
+	closedir(dir);
+	put_tracing_file(path);
+
+	return err;
+}
+
+static int record_proc_kallsyms(void)
+{
+	unsigned long long size = 0;
+	/*
+	 * Just to keep older perf.data file parsers happy, record a zero
+	 * sized kallsyms file, i.e. do the same thing that was done when
+	 * /proc/kallsyms (or something specified via --kallsyms, in a
+	 * different path) couldn't be read.
+	 */
+	return write(output_fd, &size, 4) != 4 ? -EIO : 0;
+}
+
+static int record_ftrace_printk(void)
+{
+	unsigned int size;
+	char *path;
+	struct stat st;
+	int ret, err = 0;
+
+	path = get_tracing_file("printk_formats");
+	if (!path) {
+		pr_debug("can't get tracing/printk_formats");
+		return -ENOMEM;
+	}
+
+	ret = stat(path, &st);
+	if (ret < 0) {
+		/* not found */
+		size = 0;
+		if (write(output_fd, &size, 4) != 4)
+			err = -EIO;
+		goto out;
+	}
+	err = record_file(path, 4);
+
+out:
+	put_tracing_file(path);
+	return err;
+}
+
+static int record_saved_cmdline(void)
+{
+	unsigned int size;
+	char *path;
+	struct stat st;
+	int ret, err = 0;
+
+	path = get_tracing_file("saved_cmdlines");
+	if (!path) {
+		pr_debug("can't get tracing/saved_cmdline");
+		return -ENOMEM;
+	}
+
+	ret = stat(path, &st);
+	if (ret < 0) {
+		/* not found */
+		size = 0;
+		if (write(output_fd, &size, 8) != 8)
+			err = -EIO;
+		goto out;
+	}
+	err = record_file(path, 8);
+
+out:
+	put_tracing_file(path);
+	return err;
+}
+
+static void
+put_tracepoints_path(struct tracepoint_path *tps)
+{
+	while (tps) {
+		struct tracepoint_path *t = tps;
+
+		tps = tps->next;
+		zfree(&t->name);
+		zfree(&t->system);
+		free(t);
+	}
+}
+
+static struct tracepoint_path *
+get_tracepoints_path(struct list_head *pattrs)
+{
+	struct tracepoint_path path, *ppath = &path;
+	struct perf_evsel *pos;
+	int nr_tracepoints = 0;
+
+	list_for_each_entry(pos, pattrs, node) {
+		if (pos->attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+		++nr_tracepoints;
+
+		if (pos->name) {
+			ppath->next = tracepoint_name_to_path(pos->name);
+			if (ppath->next)
+				goto next;
+
+			if (strchr(pos->name, ':') == NULL)
+				goto try_id;
+
+			goto error;
+		}
+
+try_id:
+		ppath->next = tracepoint_id_to_path(pos->attr.config);
+		if (!ppath->next) {
+error:
+			pr_debug("No memory to alloc tracepoints list\n");
+			put_tracepoints_path(&path);
+			return NULL;
+		}
+next:
+		ppath = ppath->next;
+	}
+
+	return nr_tracepoints > 0 ? path.next : NULL;
+}
+
+bool have_tracepoints(struct list_head *pattrs)
+{
+	struct perf_evsel *pos;
+
+	list_for_each_entry(pos, pattrs, node)
+		if (pos->attr.type == PERF_TYPE_TRACEPOINT)
+			return true;
+
+	return false;
+}
+
+static int tracing_data_header(void)
+{
+	char buf[20];
+	ssize_t size;
+
+	/* just guessing this is someone's birthday.. ;) */
+	buf[0] = 23;
+	buf[1] = 8;
+	buf[2] = 68;
+	memcpy(buf + 3, "tracing", 7);
+
+	if (write(output_fd, buf, 10) != 10)
+		return -1;
+
+	size = strlen(VERSION) + 1;
+	if (write(output_fd, VERSION, size) != size)
+		return -1;
+
+	/* save endian */
+	if (bigendian())
+		buf[0] = 1;
+	else
+		buf[0] = 0;
+
+	if (write(output_fd, buf, 1) != 1)
+		return -1;
+
+	/* save size of long */
+	buf[0] = sizeof(long);
+	if (write(output_fd, buf, 1) != 1)
+		return -1;
+
+	/* save page_size */
+	if (write(output_fd, &page_size, 4) != 4)
+		return -1;
+
+	return 0;
+}
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+				      int fd, bool temp)
+{
+	struct tracepoint_path *tps;
+	struct tracing_data *tdata;
+	int err;
+
+	output_fd = fd;
+
+	tps = get_tracepoints_path(pattrs);
+	if (!tps)
+		return NULL;
+
+	tdata = malloc(sizeof(*tdata));
+	if (!tdata)
+		return NULL;
+
+	tdata->temp = temp;
+	tdata->size = 0;
+
+	if (temp) {
+		int temp_fd;
+
+		snprintf(tdata->temp_file, sizeof(tdata->temp_file),
+			 "/tmp/perf-XXXXXX");
+		if (!mkstemp(tdata->temp_file)) {
+			pr_debug("Can't make temp file");
+			return NULL;
+		}
+
+		temp_fd = open(tdata->temp_file, O_RDWR);
+		if (temp_fd < 0) {
+			pr_debug("Can't read '%s'", tdata->temp_file);
+			return NULL;
+		}
+
+		/*
+		 * Set the temp file the default output, so all the
+		 * tracing data are stored into it.
+		 */
+		output_fd = temp_fd;
+	}
+
+	err = tracing_data_header();
+	if (err)
+		goto out;
+	err = record_header_files();
+	if (err)
+		goto out;
+	err = record_ftrace_files(tps);
+	if (err)
+		goto out;
+	err = record_event_files(tps);
+	if (err)
+		goto out;
+	err = record_proc_kallsyms();
+	if (err)
+		goto out;
+	err = record_ftrace_printk();
+	if (err)
+		goto out;
+	err = record_saved_cmdline();
+
+out:
+	/*
+	 * All tracing data are stored by now, we can restore
+	 * the default output file in case we used temp file.
+	 */
+	if (temp) {
+		tdata->size = lseek(output_fd, 0, SEEK_CUR);
+		close(output_fd);
+		output_fd = fd;
+	}
+
+	if (err)
+		zfree(&tdata);
+
+	put_tracepoints_path(tps);
+	return tdata;
+}
+
+int tracing_data_put(struct tracing_data *tdata)
+{
+	int err = 0;
+
+	if (tdata->temp) {
+		err = record_file(tdata->temp_file, 0);
+		unlink(tdata->temp_file);
+	}
+
+	free(tdata);
+	return err;
+}
+
+int read_tracing_data(int fd, struct list_head *pattrs)
+{
+	int err;
+	struct tracing_data *tdata;
+
+	/*
+	 * We work over the real file, so we can write data
+	 * directly, no temp file is needed.
+	 */
+	tdata = tracing_data_get(pattrs, fd, false);
+	if (!tdata)
+		return -ENOMEM;
+
+	err = tracing_data_put(tdata);
+	return err;
+}
diff --git a/util/trace-event-parse.c b/util/trace-event-parse.c
new file mode 100644
index 0000000..e0a6e9a
--- /dev/null
+++ b/util/trace-event-parse.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "debug.h"
+#include "trace-event.h"
+
+#include "sane_ctype.h"
+
+static int get_common_field(struct scripting_context *context,
+			    int *offset, int *size, const char *type)
+{
+	struct pevent *pevent = context->pevent;
+	struct event_format *event;
+	struct format_field *field;
+
+	if (!*size) {
+		if (!pevent->events)
+			return 0;
+
+		event = pevent->events[0];
+		field = pevent_find_common_field(event, type);
+		if (!field)
+			return 0;
+		*offset = field->offset;
+		*size = field->size;
+	}
+
+	return pevent_read_number(pevent, context->event_data + *offset, *size);
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+	static int offset;
+	static int size;
+	int ret;
+
+	ret = get_common_field(context, &size, &offset,
+			       "common_lock_depth");
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+int common_flags(struct scripting_context *context)
+{
+	static int offset;
+	static int size;
+	int ret;
+
+	ret = get_common_field(context, &size, &offset,
+			       "common_flags");
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+int common_pc(struct scripting_context *context)
+{
+	static int offset;
+	static int size;
+	int ret;
+
+	ret = get_common_field(context, &size, &offset,
+			       "common_preempt_count");
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+unsigned long long
+raw_field_value(struct event_format *event, const char *name, void *data)
+{
+	struct format_field *field;
+	unsigned long long val;
+
+	field = pevent_find_any_field(event, name);
+	if (!field)
+		return 0ULL;
+
+	pevent_read_number_field(field, data, &val);
+
+	return val;
+}
+
+unsigned long long read_size(struct event_format *event, void *ptr, int size)
+{
+	return pevent_read_number(event->pevent, ptr, size);
+}
+
+void event_format__fprintf(struct event_format *event,
+			   int cpu, void *data, int size, FILE *fp)
+{
+	struct pevent_record record;
+	struct trace_seq s;
+
+	memset(&record, 0, sizeof(record));
+	record.cpu = cpu;
+	record.size = size;
+	record.data = data;
+
+	trace_seq_init(&s);
+	pevent_event_info(&s, event, &record);
+	trace_seq_do_fprintf(&s, fp);
+	trace_seq_destroy(&s);
+}
+
+void event_format__print(struct event_format *event,
+			 int cpu, void *data, int size)
+{
+	return event_format__fprintf(event, cpu, data, size, stdout);
+}
+
+void parse_ftrace_printk(struct pevent *pevent,
+			 char *file, unsigned int size __maybe_unused)
+{
+	unsigned long long addr;
+	char *printk;
+	char *line;
+	char *next = NULL;
+	char *addr_str;
+	char *fmt = NULL;
+
+	line = strtok_r(file, "\n", &next);
+	while (line) {
+		addr_str = strtok_r(line, ":", &fmt);
+		if (!addr_str) {
+			pr_warning("printk format with empty entry");
+			break;
+		}
+		addr = strtoull(addr_str, NULL, 16);
+		/* fmt still has a space, skip it */
+		printk = strdup(fmt+1);
+		line = strtok_r(NULL, "\n", &next);
+		pevent_register_print_string(pevent, printk, addr);
+	}
+}
+
+void parse_saved_cmdline(struct pevent *pevent,
+			 char *file, unsigned int size __maybe_unused)
+{
+	char *comm;
+	char *line;
+	char *next = NULL;
+	int pid;
+
+	line = strtok_r(file, "\n", &next);
+	while (line) {
+		sscanf(line, "%d %ms", &pid, &comm);
+		pevent_register_comm(pevent, comm, pid);
+		free(comm);
+		line = strtok_r(NULL, "\n", &next);
+	}
+}
+
+int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size)
+{
+	return pevent_parse_event(pevent, buf, size, "ftrace");
+}
+
+int parse_event_file(struct pevent *pevent,
+		     char *buf, unsigned long size, char *sys)
+{
+	return pevent_parse_event(pevent, buf, size, sys);
+}
+
+struct event_format *trace_find_next_event(struct pevent *pevent,
+					   struct event_format *event)
+{
+	static int idx;
+
+	if (!pevent || !pevent->events)
+		return NULL;
+
+	if (!event) {
+		idx = 0;
+		return pevent->events[0];
+	}
+
+	if (idx < pevent->nr_events && event == pevent->events[idx]) {
+		idx++;
+		if (idx == pevent->nr_events)
+			return NULL;
+		return pevent->events[idx];
+	}
+
+	for (idx = 1; idx < pevent->nr_events; idx++) {
+		if (event == pevent->events[idx - 1])
+			return pevent->events[idx];
+	}
+	return NULL;
+}
+
+struct flag {
+	const char *name;
+	unsigned long long value;
+};
+
+static const struct flag flags[] = {
+	{ "HI_SOFTIRQ", 0 },
+	{ "TIMER_SOFTIRQ", 1 },
+	{ "NET_TX_SOFTIRQ", 2 },
+	{ "NET_RX_SOFTIRQ", 3 },
+	{ "BLOCK_SOFTIRQ", 4 },
+	{ "IRQ_POLL_SOFTIRQ", 5 },
+	{ "TASKLET_SOFTIRQ", 6 },
+	{ "SCHED_SOFTIRQ", 7 },
+	{ "HRTIMER_SOFTIRQ", 8 },
+	{ "RCU_SOFTIRQ", 9 },
+
+	{ "HRTIMER_NORESTART", 0 },
+	{ "HRTIMER_RESTART", 1 },
+};
+
+unsigned long long eval_flag(const char *flag)
+{
+	int i;
+
+	/*
+	 * Some flags in the format files do not get converted.
+	 * If the flag is not numeric, see if it is something that
+	 * we already know about.
+	 */
+	if (isdigit(flag[0]))
+		return strtoull(flag, NULL, 0);
+
+	for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+		if (strcmp(flags[i].name, flag) == 0)
+			return flags[i].value;
+
+	return 0;
+}
diff --git a/util/trace-event-read.c b/util/trace-event-read.c
new file mode 100644
index 0000000..40b4259
--- /dev/null
+++ b/util/trace-event-read.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "trace-event.h"
+#include "debug.h"
+
+static int input_fd;
+
+static ssize_t trace_data_size;
+static bool repipe;
+
+static int __do_read(int fd, void *buf, int size)
+{
+	int rsize = size;
+
+	while (size) {
+		int ret = read(fd, buf, size);
+
+		if (ret <= 0)
+			return -1;
+
+		if (repipe) {
+			int retw = write(STDOUT_FILENO, buf, ret);
+
+			if (retw <= 0 || retw != ret) {
+				pr_debug("repiping input file");
+				return -1;
+			}
+		}
+
+		size -= ret;
+		buf += ret;
+	}
+
+	return rsize;
+}
+
+static int do_read(void *data, int size)
+{
+	int r;
+
+	r = __do_read(input_fd, data, size);
+	if (r <= 0) {
+		pr_debug("reading input file (size expected=%d received=%d)",
+			 size, r);
+		return -1;
+	}
+
+	trace_data_size += r;
+
+	return r;
+}
+
+/* If it fails, the next read will report it */
+static void skip(int size)
+{
+	char buf[BUFSIZ];
+	int r;
+
+	while (size) {
+		r = size > BUFSIZ ? BUFSIZ : size;
+		do_read(buf, r);
+		size -= r;
+	};
+}
+
+static unsigned int read4(struct pevent *pevent)
+{
+	unsigned int data;
+
+	if (do_read(&data, 4) < 0)
+		return 0;
+	return __data2host4(pevent, data);
+}
+
+static unsigned long long read8(struct pevent *pevent)
+{
+	unsigned long long data;
+
+	if (do_read(&data, 8) < 0)
+		return 0;
+	return __data2host8(pevent, data);
+}
+
+static char *read_string(void)
+{
+	char buf[BUFSIZ];
+	char *str = NULL;
+	int size = 0;
+	off_t r;
+	char c;
+
+	for (;;) {
+		r = read(input_fd, &c, 1);
+		if (r < 0) {
+			pr_debug("reading input file");
+			goto out;
+		}
+
+		if (!r) {
+			pr_debug("no data");
+			goto out;
+		}
+
+		if (repipe) {
+			int retw = write(STDOUT_FILENO, &c, 1);
+
+			if (retw <= 0 || retw != r) {
+				pr_debug("repiping input file string");
+				goto out;
+			}
+		}
+
+		buf[size++] = c;
+
+		if (!c)
+			break;
+	}
+
+	trace_data_size += size;
+
+	str = malloc(size);
+	if (str)
+		memcpy(str, buf, size);
+out:
+	return str;
+}
+
+static int read_proc_kallsyms(struct pevent *pevent)
+{
+	unsigned int size;
+
+	size = read4(pevent);
+	if (!size)
+		return 0;
+	/*
+	 * Just skip it, now that we configure libtraceevent to use the
+	 * tools/perf/ symbol resolver.
+	 *
+	 * We need to skip it so that we can continue parsing old perf.data
+	 * files, that contains this /proc/kallsyms payload.
+	 *
+	 * Newer perf.data files will have just the 4-bytes zeros "kallsyms
+	 * payload", so that older tools can continue reading it and interpret
+	 * it as "no kallsyms payload is present".
+	 */
+	lseek(input_fd, size, SEEK_CUR);
+	trace_data_size += size;
+	return 0;
+}
+
+static int read_ftrace_printk(struct pevent *pevent)
+{
+	unsigned int size;
+	char *buf;
+
+	/* it can have 0 size */
+	size = read4(pevent);
+	if (!size)
+		return 0;
+
+	buf = malloc(size + 1);
+	if (buf == NULL)
+		return -1;
+
+	if (do_read(buf, size) < 0) {
+		free(buf);
+		return -1;
+	}
+
+	buf[size] = '\0';
+
+	parse_ftrace_printk(pevent, buf, size);
+
+	free(buf);
+	return 0;
+}
+
+static int read_header_files(struct pevent *pevent)
+{
+	unsigned long long size;
+	char *header_page;
+	char buf[BUFSIZ];
+	int ret = 0;
+
+	if (do_read(buf, 12) < 0)
+		return -1;
+
+	if (memcmp(buf, "header_page", 12) != 0) {
+		pr_debug("did not read header page");
+		return -1;
+	}
+
+	size = read8(pevent);
+
+	header_page = malloc(size);
+	if (header_page == NULL)
+		return -1;
+
+	if (do_read(header_page, size) < 0) {
+		pr_debug("did not read header page");
+		free(header_page);
+		return -1;
+	}
+
+	if (!pevent_parse_header_page(pevent, header_page, size,
+				      pevent_get_long_size(pevent))) {
+		/*
+		 * The commit field in the page is of type long,
+		 * use that instead, since it represents the kernel.
+		 */
+		pevent_set_long_size(pevent, pevent->header_page_size_size);
+	}
+	free(header_page);
+
+	if (do_read(buf, 13) < 0)
+		return -1;
+
+	if (memcmp(buf, "header_event", 13) != 0) {
+		pr_debug("did not read header event");
+		return -1;
+	}
+
+	size = read8(pevent);
+	skip(size);
+
+	return ret;
+}
+
+static int read_ftrace_file(struct pevent *pevent, unsigned long long size)
+{
+	int ret;
+	char *buf;
+
+	buf = malloc(size);
+	if (buf == NULL) {
+		pr_debug("memory allocation failure\n");
+		return -1;
+	}
+
+	ret = do_read(buf, size);
+	if (ret < 0) {
+		pr_debug("error reading ftrace file.\n");
+		goto out;
+	}
+
+	ret = parse_ftrace_file(pevent, buf, size);
+	if (ret < 0)
+		pr_debug("error parsing ftrace file.\n");
+out:
+	free(buf);
+	return ret;
+}
+
+static int read_event_file(struct pevent *pevent, char *sys,
+			    unsigned long long size)
+{
+	int ret;
+	char *buf;
+
+	buf = malloc(size);
+	if (buf == NULL) {
+		pr_debug("memory allocation failure\n");
+		return -1;
+	}
+
+	ret = do_read(buf, size);
+	if (ret < 0) {
+		free(buf);
+		goto out;
+	}
+
+	ret = parse_event_file(pevent, buf, size, sys);
+	if (ret < 0)
+		pr_debug("error parsing event file.\n");
+out:
+	free(buf);
+	return ret;
+}
+
+static int read_ftrace_files(struct pevent *pevent)
+{
+	unsigned long long size;
+	int count;
+	int i;
+	int ret;
+
+	count = read4(pevent);
+
+	for (i = 0; i < count; i++) {
+		size = read8(pevent);
+		ret = read_ftrace_file(pevent, size);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int read_event_files(struct pevent *pevent)
+{
+	unsigned long long size;
+	char *sys;
+	int systems;
+	int count;
+	int i,x;
+	int ret;
+
+	systems = read4(pevent);
+
+	for (i = 0; i < systems; i++) {
+		sys = read_string();
+		if (sys == NULL)
+			return -1;
+
+		count = read4(pevent);
+
+		for (x=0; x < count; x++) {
+			size = read8(pevent);
+			ret = read_event_file(pevent, sys, size);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+static int read_saved_cmdline(struct pevent *pevent)
+{
+	unsigned long long size;
+	char *buf;
+	int ret;
+
+	/* it can have 0 size */
+	size = read8(pevent);
+	if (!size)
+		return 0;
+
+	buf = malloc(size + 1);
+	if (buf == NULL) {
+		pr_debug("memory allocation failure\n");
+		return -1;
+	}
+
+	ret = do_read(buf, size);
+	if (ret < 0) {
+		pr_debug("error reading saved cmdlines\n");
+		goto out;
+	}
+
+	parse_saved_cmdline(pevent, buf, size);
+	ret = 0;
+out:
+	free(buf);
+	return ret;
+}
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
+{
+	char buf[BUFSIZ];
+	char test[] = { 23, 8, 68 };
+	char *version;
+	int show_version = 0;
+	int show_funcs = 0;
+	int show_printk = 0;
+	ssize_t size = -1;
+	int file_bigendian;
+	int host_bigendian;
+	int file_long_size;
+	int file_page_size;
+	struct pevent *pevent = NULL;
+	int err;
+
+	repipe = __repipe;
+	input_fd = fd;
+
+	if (do_read(buf, 3) < 0)
+		return -1;
+	if (memcmp(buf, test, 3) != 0) {
+		pr_debug("no trace data in the file");
+		return -1;
+	}
+
+	if (do_read(buf, 7) < 0)
+		return -1;
+	if (memcmp(buf, "tracing", 7) != 0) {
+		pr_debug("not a trace file (missing 'tracing' tag)");
+		return -1;
+	}
+
+	version = read_string();
+	if (version == NULL)
+		return -1;
+	if (show_version)
+		printf("version = %s\n", version);
+
+	if (do_read(buf, 1) < 0) {
+		free(version);
+		return -1;
+	}
+	file_bigendian = buf[0];
+	host_bigendian = bigendian();
+
+	if (trace_event__init(tevent)) {
+		pr_debug("trace_event__init failed");
+		goto out;
+	}
+
+	pevent = tevent->pevent;
+
+	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
+	pevent_set_file_bigendian(pevent, file_bigendian);
+	pevent_set_host_bigendian(pevent, host_bigendian);
+
+	if (do_read(buf, 1) < 0)
+		goto out;
+	file_long_size = buf[0];
+
+	file_page_size = read4(pevent);
+	if (!file_page_size)
+		goto out;
+
+	pevent_set_long_size(pevent, file_long_size);
+	pevent_set_page_size(pevent, file_page_size);
+
+	err = read_header_files(pevent);
+	if (err)
+		goto out;
+	err = read_ftrace_files(pevent);
+	if (err)
+		goto out;
+	err = read_event_files(pevent);
+	if (err)
+		goto out;
+	err = read_proc_kallsyms(pevent);
+	if (err)
+		goto out;
+	err = read_ftrace_printk(pevent);
+	if (err)
+		goto out;
+	if (atof(version) >= 0.6) {
+		err = read_saved_cmdline(pevent);
+		if (err)
+			goto out;
+	}
+
+	size = trace_data_size;
+	repipe = false;
+
+	if (show_funcs) {
+		pevent_print_funcs(pevent);
+	} else if (show_printk) {
+		pevent_print_printk(pevent);
+	}
+
+	pevent = NULL;
+
+out:
+	if (pevent)
+		trace_event__cleanup(tevent);
+	free(version);
+	return size;
+}
diff --git a/util/trace-event-scripting.c b/util/trace-event-scripting.c
new file mode 100644
index 0000000..b1e5c3a
--- /dev/null
+++ b/util/trace-event-scripting.c
@@ -0,0 +1,176 @@
+/*
+ * trace-event-scripting.  Scripting engine common and initialization code.
+ *
+ * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "debug.h"
+#include "util.h"
+#include "trace-event.h"
+
+struct scripting_context *scripting_context;
+
+static int flush_script_unsupported(void)
+{
+	return 0;
+}
+
+static int stop_script_unsupported(void)
+{
+	return 0;
+}
+
+static void process_event_unsupported(union perf_event *event __maybe_unused,
+				      struct perf_sample *sample __maybe_unused,
+				      struct perf_evsel *evsel __maybe_unused,
+				      struct addr_location *al __maybe_unused)
+{
+}
+
+static void print_python_unsupported_msg(void)
+{
+	fprintf(stderr, "Python scripting not supported."
+		"  Install libpython and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install python-dev (ubuntu)"
+		"\n  # yum install python-devel (Fedora)"
+		"\n  etc.\n");
+}
+
+static int python_start_script_unsupported(const char *script __maybe_unused,
+					   int argc __maybe_unused,
+					   const char **argv __maybe_unused)
+{
+	print_python_unsupported_msg();
+
+	return -1;
+}
+
+static int python_generate_script_unsupported(struct pevent *pevent
+					      __maybe_unused,
+					      const char *outfile
+					      __maybe_unused)
+{
+	print_python_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops python_scripting_unsupported_ops = {
+	.name = "Python",
+	.start_script = python_start_script_unsupported,
+	.flush_script = flush_script_unsupported,
+	.stop_script = stop_script_unsupported,
+	.process_event = process_event_unsupported,
+	.generate_script = python_generate_script_unsupported,
+};
+
+static void register_python_scripting(struct scripting_ops *scripting_ops)
+{
+	if (scripting_context == NULL)
+		scripting_context = malloc(sizeof(*scripting_context));
+
+       if (scripting_context == NULL ||
+	   script_spec_register("Python", scripting_ops) ||
+	   script_spec_register("py", scripting_ops)) {
+		pr_err("Error registering Python script extension: disabling it\n");
+		zfree(&scripting_context);
+	}
+}
+
+#ifndef HAVE_LIBPYTHON_SUPPORT
+void setup_python_scripting(void)
+{
+	register_python_scripting(&python_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops python_scripting_ops;
+
+void setup_python_scripting(void)
+{
+	register_python_scripting(&python_scripting_ops);
+}
+#endif
+
+static void print_perl_unsupported_msg(void)
+{
+	fprintf(stderr, "Perl scripting not supported."
+		"  Install libperl and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install libperl-dev (ubuntu)"
+		"\n  # yum install 'perl(ExtUtils::Embed)' (Fedora)"
+		"\n  etc.\n");
+}
+
+static int perl_start_script_unsupported(const char *script __maybe_unused,
+					 int argc __maybe_unused,
+					 const char **argv __maybe_unused)
+{
+	print_perl_unsupported_msg();
+
+	return -1;
+}
+
+static int perl_generate_script_unsupported(struct pevent *pevent
+					    __maybe_unused,
+					    const char *outfile __maybe_unused)
+{
+	print_perl_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops perl_scripting_unsupported_ops = {
+	.name = "Perl",
+	.start_script = perl_start_script_unsupported,
+	.flush_script = flush_script_unsupported,
+	.stop_script = stop_script_unsupported,
+	.process_event = process_event_unsupported,
+	.generate_script = perl_generate_script_unsupported,
+};
+
+static void register_perl_scripting(struct scripting_ops *scripting_ops)
+{
+	if (scripting_context == NULL)
+		scripting_context = malloc(sizeof(*scripting_context));
+
+       if (scripting_context == NULL ||
+	   script_spec_register("Perl", scripting_ops) ||
+	   script_spec_register("pl", scripting_ops)) {
+		pr_err("Error registering Perl script extension: disabling it\n");
+		zfree(&scripting_context);
+	}
+}
+
+#ifndef HAVE_LIBPERL_SUPPORT
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops perl_scripting_ops;
+
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_ops);
+}
+#endif
diff --git a/util/trace-event.c b/util/trace-event.c
new file mode 100644
index 0000000..16a7763
--- /dev/null
+++ b/util/trace-event.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <traceevent/event-parse.h>
+#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
+#include "trace-event.h"
+#include "machine.h"
+#include "util.h"
+
+/*
+ * global trace_event object used by trace_event__tp_format
+ *
+ * TODO There's no cleanup call for this. Add some sort of
+ * __exit function support and call trace_event__cleanup
+ * there.
+ */
+static struct trace_event tevent;
+static bool tevent_initialized;
+
+int trace_event__init(struct trace_event *t)
+{
+	struct pevent *pevent = pevent_alloc();
+
+	if (pevent) {
+		t->plugin_list = traceevent_load_plugins(pevent);
+		t->pevent  = pevent;
+	}
+
+	return pevent ? 0 : -1;
+}
+
+static int trace_event__init2(void)
+{
+	int be = traceevent_host_bigendian();
+	struct pevent *pevent;
+
+	if (trace_event__init(&tevent))
+		return -1;
+
+	pevent = tevent.pevent;
+	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
+	pevent_set_file_bigendian(pevent, be);
+	pevent_set_host_bigendian(pevent, be);
+	tevent_initialized = true;
+	return 0;
+}
+
+int trace_event__register_resolver(struct machine *machine,
+				   pevent_func_resolver_t *func)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return -1;
+
+	return pevent_set_function_resolver(tevent.pevent, func, machine);
+}
+
+void trace_event__cleanup(struct trace_event *t)
+{
+	traceevent_unload_plugins(t->plugin_list, t->pevent);
+	pevent_free(t->pevent);
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static struct event_format*
+tp_format(const char *sys, const char *name)
+{
+	struct pevent *pevent = tevent.pevent;
+	struct event_format *event = NULL;
+	char path[PATH_MAX];
+	size_t size;
+	char *data;
+	int err;
+
+	scnprintf(path, PATH_MAX, "%s/%s/%s/format",
+		  tracing_events_path, sys, name);
+
+	err = filename__read_str(path, &data, &size);
+	if (err)
+		return ERR_PTR(err);
+
+	pevent_parse_format(pevent, &event, data, size, sys);
+
+	free(data);
+	return event;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct event_format*
+trace_event__tp_format(const char *sys, const char *name)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return ERR_PTR(-ENOMEM);
+
+	return tp_format(sys, name);
+}
+
+struct event_format *trace_event__tp_format_id(int id)
+{
+	if (!tevent_initialized && trace_event__init2())
+		return ERR_PTR(-ENOMEM);
+
+	return pevent_find_event(tevent.pevent, id);
+}
diff --git a/util/trace-event.h b/util/trace-event.h
new file mode 100644
index 0000000..dcbdb53
--- /dev/null
+++ b/util/trace-event.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UTIL_TRACE_EVENT_H
+#define _PERF_UTIL_TRACE_EVENT_H
+
+#include <traceevent/event-parse.h>
+#include "parse-events.h"
+
+struct machine;
+struct perf_sample;
+union perf_event;
+struct perf_tool;
+struct thread;
+struct plugin_list;
+
+struct trace_event {
+	struct pevent		*pevent;
+	struct plugin_list	*plugin_list;
+};
+
+int trace_event__init(struct trace_event *t);
+void trace_event__cleanup(struct trace_event *t);
+int trace_event__register_resolver(struct machine *machine,
+				   pevent_func_resolver_t *func);
+struct event_format*
+trace_event__tp_format(const char *sys, const char *name);
+
+struct event_format *trace_event__tp_format_id(int id);
+
+int bigendian(void);
+
+void event_format__fprintf(struct event_format *event,
+			   int cpu, void *data, int size, FILE *fp);
+
+void event_format__print(struct event_format *event,
+			 int cpu, void *data, int size);
+
+int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size);
+int parse_event_file(struct pevent *pevent,
+		     char *buf, unsigned long size, char *sys);
+
+unsigned long long
+raw_field_value(struct event_format *event, const char *name, void *data);
+
+void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size);
+void parse_saved_cmdline(struct pevent *pevent, char *file, unsigned int size);
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
+
+struct event_format *trace_find_next_event(struct pevent *pevent,
+					   struct event_format *event);
+unsigned long long read_size(struct event_format *event, void *ptr, int size);
+unsigned long long eval_flag(const char *flag);
+
+int read_tracing_data(int fd, struct list_head *pattrs);
+
+struct tracing_data {
+	/* size is only valid if temp is 'true' */
+	ssize_t size;
+	bool temp;
+	char temp_file[50];
+};
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+				      int fd, bool temp);
+int tracing_data_put(struct tracing_data *tdata);
+
+
+struct addr_location;
+
+struct perf_session;
+struct perf_stat_config;
+
+struct scripting_ops {
+	const char *name;
+	int (*start_script) (const char *script, int argc, const char **argv);
+	int (*flush_script) (void);
+	int (*stop_script) (void);
+	void (*process_event) (union perf_event *event,
+			       struct perf_sample *sample,
+			       struct perf_evsel *evsel,
+			       struct addr_location *al);
+	void (*process_stat)(struct perf_stat_config *config,
+			     struct perf_evsel *evsel, u64 tstamp);
+	void (*process_stat_interval)(u64 tstamp);
+	int (*generate_script) (struct pevent *pevent, const char *outfile);
+};
+
+extern unsigned int scripting_max_stack;
+
+int script_spec_register(const char *spec, struct scripting_ops *ops);
+
+void setup_perl_scripting(void);
+void setup_python_scripting(void);
+
+struct scripting_context {
+	struct pevent *pevent;
+	void *event_data;
+};
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
+#endif /* _PERF_UTIL_TRACE_EVENT_H */
diff --git a/util/trigger.h b/util/trigger.h
new file mode 100644
index 0000000..88223bc
--- /dev/null
+++ b/util/trigger.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "util/debug.h"
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ *  OFF--> ON --> READY --(hit)--> HIT
+ *                 ^               |
+ *                 |            (ready)
+ *                 |               |
+ *                  \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+	volatile enum {
+		TRIGGER_ERROR		= -2,
+		TRIGGER_OFF		= -1,
+		TRIGGER_ON		= 0,
+		TRIGGER_READY		= 1,
+		TRIGGER_HIT		= 2,
+	} state;
+	const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+	WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+		  t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+	return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+	return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+	TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+	t->state = TRIGGER_ON;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+	t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+	t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+	return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+	return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif
diff --git a/util/tsc.c b/util/tsc.c
new file mode 100644
index 0000000..bfa7824
--- /dev/null
+++ b/util/tsc.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+	u64 t, quot, rem;
+
+	t = ns - tc->time_zero;
+	quot = t / tc->time_mult;
+	rem  = t % tc->time_mult;
+	return (quot << tc->time_shift) +
+	       (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+	u64 quot, rem;
+
+	quot = cyc >> tc->time_shift;
+	rem  = cyc & (((u64)1 << tc->time_shift) - 1);
+	return tc->time_zero + quot * tc->time_mult +
+	       ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+u64 __weak rdtsc(void)
+{
+	return 0;
+}
diff --git a/util/tsc.h b/util/tsc.h
new file mode 100644
index 0000000..e0c3af3
--- /dev/null
+++ b/util/tsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TSC_H
+#define __PERF_TSC_H
+
+#include <linux/types.h>
+
+#include "event.h"
+
+struct perf_tsc_conversion {
+	u16 time_shift;
+	u32 time_mult;
+	u64 time_zero;
+};
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+u64 rdtsc(void);
+
+struct perf_event_mmap_page;
+struct perf_tool;
+struct machine;
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+				struct perf_tool *tool,
+				perf_event__handler_t process,
+				struct machine *machine);
+
+#endif
diff --git a/util/units.c b/util/units.c
new file mode 100644
index 0000000..a46762a
--- /dev/null
+++ b/util/units.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "units.h"
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+	struct parse_tag *i = tags;
+
+	while (i->tag) {
+		char *s = strchr(str, i->tag);
+
+		if (s) {
+			unsigned long int value;
+			char *endptr;
+
+			value = strtoul(str, &endptr, 10);
+			if (s != endptr)
+				break;
+
+			if (value > ULONG_MAX / i->mult)
+				break;
+			value *= i->mult;
+			return value;
+		}
+		i++;
+	}
+
+	return (unsigned long) -1;
+}
+
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+	*unit = ' ';
+
+	if (value > 1000) {
+		value /= 1000;
+		*unit = 'K';
+	}
+
+	if (value > 1000) {
+		value /= 1000;
+		*unit = 'M';
+	}
+
+	if (value > 1000) {
+		value /= 1000;
+		*unit = 'G';
+	}
+
+	return value;
+}
+
+int unit_number__scnprintf(char *buf, size_t size, u64 n)
+{
+	char unit[4] = "BKMG";
+	int i = 0;
+
+	while (((n / 1024) > 1) && (i < 3)) {
+		n /= 1024;
+		i++;
+	}
+
+	return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
+}
diff --git a/util/units.h b/util/units.h
new file mode 100644
index 0000000..99263b6
--- /dev/null
+++ b/util/units.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UNIT_H
+#define PERF_UNIT_H
+
+#include <stddef.h>
+#include <linux/types.h>
+
+struct parse_tag {
+	char tag;
+	int  mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
+unsigned long convert_unit(unsigned long value, char *unit);
+int unit_number__scnprintf(char *buf, size_t size, u64 n);
+
+#endif /* PERF_UNIT_H */
diff --git a/util/unwind-libdw.c b/util/unwind-libdw.c
new file mode 100644
index 0000000..7bdd239
--- /dev/null
+++ b/util/unwind-libdw.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include "debug.h"
+#include "unwind.h"
+#include "unwind-libdw.h"
+#include "machine.h"
+#include "thread.h"
+#include <linux/types.h>
+#include "event.h"
+#include "perf_regs.h"
+#include "callchain.h"
+#include "util.h"
+
+static char *debuginfo_path;
+
+static const Dwfl_Callbacks offline_callbacks = {
+	.find_debuginfo		= dwfl_standard_find_debuginfo,
+	.debuginfo_path		= &debuginfo_path,
+	.section_address	= dwfl_offline_section_address,
+};
+
+static int __report_module(struct addr_location *al, u64 ip,
+			    struct unwind_info *ui)
+{
+	Dwfl_Module *mod;
+	struct dso *dso = NULL;
+
+	thread__find_addr_location(ui->thread,
+				   PERF_RECORD_MISC_USER,
+				   MAP__FUNCTION, ip, al);
+
+	if (al->map)
+		dso = al->map->dso;
+
+	if (!dso)
+		return 0;
+
+	mod = dwfl_addrmodule(ui->dwfl, ip);
+	if (mod) {
+		Dwarf_Addr s;
+
+		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+		if (s != al->map->start)
+			mod = 0;
+	}
+
+	if (!mod)
+		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
+				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+				      false);
+
+	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
+}
+
+static int report_module(u64 ip, struct unwind_info *ui)
+{
+	struct addr_location al;
+
+	return __report_module(&al, ip, ui);
+}
+
+/*
+ * Store all entries within entries array,
+ * we will process it after we finish unwind.
+ */
+static int entry(u64 ip, struct unwind_info *ui)
+
+{
+	struct unwind_entry *e = &ui->entries[ui->idx++];
+	struct addr_location al;
+
+	if (__report_module(&al, ip, ui))
+		return -1;
+
+	e->ip  = al.addr;
+	e->map = al.map;
+	e->sym = al.sym;
+
+	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+		 al.sym ? al.sym->name : "''",
+		 ip,
+		 al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+	return 0;
+}
+
+static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp)
+{
+	/* We want only single thread to be processed. */
+	if (*thread_argp != NULL)
+		return 0;
+
+	*thread_argp = arg;
+	return dwfl_pid(dwfl);
+}
+
+static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
+			  Dwarf_Word *data)
+{
+	struct addr_location al;
+	ssize_t size;
+
+	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+			      MAP__FUNCTION, addr, &al);
+	if (!al.map) {
+		/*
+		 * We've seen cases (softice) where DWARF unwinder went
+		 * through non executable mmaps, which we need to lookup
+		 * in MAP__VARIABLE tree.
+		 */
+		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+				      MAP__VARIABLE, addr, &al);
+	}
+
+	if (!al.map) {
+		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+		return -1;
+	}
+
+	if (!al.map->dso)
+		return -1;
+
+	size = dso__data_read_addr(al.map->dso, al.map, ui->machine,
+				   addr, (u8 *) data, sizeof(*data));
+
+	return !(size == sizeof(*data));
+}
+
+static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result,
+			void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct stack_dump *stack = &ui->sample->user_stack;
+	u64 start, end;
+	int offset;
+	int ret;
+
+	ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
+	if (ret)
+		return false;
+
+	end = start + stack->size;
+
+	/* Check overflow. */
+	if (addr + sizeof(Dwarf_Word) < addr)
+		return false;
+
+	if (addr < start || addr + sizeof(Dwarf_Word) > end) {
+		ret = access_dso_mem(ui, addr, result);
+		if (ret) {
+			pr_debug("unwind: access_mem 0x%" PRIx64 " not inside range"
+				 " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+				addr, start, end);
+			return false;
+		}
+		return true;
+	}
+
+	offset  = addr - start;
+	*result = *(Dwarf_Word *)&stack->data[offset];
+	pr_debug("unwind: access_mem addr 0x%" PRIx64 ", val %lx, offset %d\n",
+		 addr, (unsigned long)*result, offset);
+	return true;
+}
+
+static const Dwfl_Thread_Callbacks callbacks = {
+	.next_thread		= next_thread,
+	.memory_read		= memory_read,
+	.set_initial_registers	= libdw__arch_set_initial_registers,
+};
+
+static int
+frame_callback(Dwfl_Frame *state, void *arg)
+{
+	struct unwind_info *ui = arg;
+	Dwarf_Addr pc;
+	bool isactivation;
+
+	if (!dwfl_frame_pc(state, &pc, NULL)) {
+		pr_err("%s", dwfl_errmsg(-1));
+		return DWARF_CB_ABORT;
+	}
+
+	// report the module before we query for isactivation
+	report_module(pc, ui);
+
+	if (!dwfl_frame_pc(state, &pc, &isactivation)) {
+		pr_err("%s", dwfl_errmsg(-1));
+		return DWARF_CB_ABORT;
+	}
+
+	if (!isactivation)
+		--pc;
+
+	return entry(pc, ui) || !(--ui->max_stack) ?
+	       DWARF_CB_ABORT : DWARF_CB_OK;
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data,
+			int max_stack)
+{
+	struct unwind_info *ui, ui_buf = {
+		.sample		= data,
+		.thread		= thread,
+		.machine	= thread->mg->machine,
+		.cb		= cb,
+		.arg		= arg,
+		.max_stack	= max_stack,
+	};
+	Dwarf_Word ip;
+	int err = -EINVAL, i;
+
+	if (!data->user_regs.regs)
+		return -EINVAL;
+
+	ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
+	if (!ui)
+		return -ENOMEM;
+
+	*ui = ui_buf;
+
+	ui->dwfl = dwfl_begin(&offline_callbacks);
+	if (!ui->dwfl)
+		goto out;
+
+	err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
+	if (err)
+		goto out;
+
+	err = report_module(ip, ui);
+	if (err)
+		goto out;
+
+	err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui);
+	if (err)
+		goto out;
+
+	err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
+
+	if (err && ui->max_stack != max_stack)
+		err = 0;
+
+	/*
+	 * Display what we got based on the order setup.
+	 */
+	for (i = 0; i < ui->idx && !err; i++) {
+		int j = i;
+
+		if (callchain_param.order == ORDER_CALLER)
+			j = ui->idx - i - 1;
+
+		err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0;
+	}
+
+ out:
+	if (err)
+		pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1));
+
+	dwfl_end(ui->dwfl);
+	free(ui);
+	return 0;
+}
diff --git a/util/unwind-libdw.h b/util/unwind-libdw.h
new file mode 100644
index 0000000..0cbd265
--- /dev/null
+++ b/util/unwind-libdw.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UNWIND_LIBDW_H
+#define __PERF_UNWIND_LIBDW_H
+
+#include <elfutils/libdwfl.h>
+#include "unwind.h"
+
+struct machine;
+struct perf_sample;
+struct thread;
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg);
+
+struct unwind_info {
+	Dwfl			*dwfl;
+	struct perf_sample      *sample;
+	struct machine          *machine;
+	struct thread           *thread;
+	unwind_entry_cb_t	cb;
+	void			*arg;
+	int			max_stack;
+	int			idx;
+	struct unwind_entry	entries[];
+};
+
+#endif /* __PERF_UNWIND_LIBDW_H */
diff --git a/util/unwind-libunwind-local.c b/util/unwind-libunwind-local.c
new file mode 100644
index 0000000..af87304
--- /dev/null
+++ b/util/unwind-libunwind-local.c
@@ -0,0 +1,754 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ *	Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+#include "asm/bug.h"
+#include "dso.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+				    unw_word_t ip,
+				    unw_dyn_info_t *di,
+				    unw_proc_info_t *pi,
+				    int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+				 unw_word_t ip,
+				 unw_word_t segbase,
+				 const char *obj_name, unw_word_t start,
+				 unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK	0x0f	/* format of the encoded value */
+#define DW_EH_PE_APPL_MASK	0x70	/* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit		0xff
+#define DW_EH_PE_ptr		0x00	/* pointer-sized unsigned value */
+#define DW_EH_PE_udata4		0x03	/* unsigned 32-bit value */
+#define DW_EH_PE_udata8		0x04	/* unsigned 64-bit value */
+#define DW_EH_PE_sdata4		0x0b	/* signed 32-bit value */
+#define DW_EH_PE_sdata8		0x0c	/* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr		0x00	/* absolute value */
+#define DW_EH_PE_pcrel		0x10	/* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel	0x40	/* start-of-procedure-relative */
+#define DW_EH_PE_aligned	0x50	/* aligned pointer */
+
+/* Flags intentionaly not handled, since they're not needed:
+ * #define DW_EH_PE_indirect      0x80
+ * #define DW_EH_PE_uleb128       0x01
+ * #define DW_EH_PE_udata2        0x02
+ * #define DW_EH_PE_sleb128       0x09
+ * #define DW_EH_PE_sdata2        0x0a
+ * #define DW_EH_PE_textrel       0x20
+ * #define DW_EH_PE_datarel       0x30
+ */
+
+struct unwind_info {
+	struct perf_sample	*sample;
+	struct machine		*machine;
+	struct thread		*thread;
+};
+
+#define dw_read(ptr, type, end) ({	\
+	type *__p = (type *) ptr;	\
+	type  __v;			\
+	if ((__p + 1) > (type *) end)	\
+		return -EINVAL;		\
+	__v = *__p++;			\
+	ptr = (typeof(ptr)) __p;	\
+	__v;				\
+	})
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+				   u8 encoding)
+{
+	u8 *cur = *p;
+	*val = 0;
+
+	switch (encoding) {
+	case DW_EH_PE_omit:
+		*val = 0;
+		goto out;
+	case DW_EH_PE_ptr:
+		*val = dw_read(cur, unsigned long, end);
+		goto out;
+	default:
+		break;
+	}
+
+	switch (encoding & DW_EH_PE_APPL_MASK) {
+	case DW_EH_PE_absptr:
+		break;
+	case DW_EH_PE_pcrel:
+		*val = (unsigned long) cur;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if ((encoding & 0x07) == 0x00)
+		encoding |= DW_EH_PE_udata4;
+
+	switch (encoding & DW_EH_PE_FORMAT_MASK) {
+	case DW_EH_PE_sdata4:
+		*val += dw_read(cur, s32, end);
+		break;
+	case DW_EH_PE_udata4:
+		*val += dw_read(cur, u32, end);
+		break;
+	case DW_EH_PE_sdata8:
+		*val += dw_read(cur, s64, end);
+		break;
+	case DW_EH_PE_udata8:
+		*val += dw_read(cur, u64, end);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+ out:
+	*p = cur;
+	return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({			\
+	u64 __v;						\
+	if (__dw_read_encoded_value(&ptr, end, &__v, enc)) {	\
+		return -EINVAL;                                 \
+	}                                                       \
+	__v;                                                    \
+	})
+
+static u64 elf_section_offset(int fd, const char *name)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	u64 offset = 0;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return 0;
+
+	do {
+		if (gelf_getehdr(elf, &ehdr) == NULL)
+			break;
+
+		if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+			break;
+
+		offset = shdr.sh_offset;
+	} while (0);
+
+	elf_end(elf);
+	return offset;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	int retval = 0;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return 0;
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto out;
+
+	retval = (ehdr.e_type == ET_EXEC);
+
+out:
+	elf_end(elf);
+	pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+	return retval;
+}
+#endif
+
+struct table_entry {
+	u32 start_ip_offset;
+	u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+	unsigned char version;
+	unsigned char eh_frame_ptr_enc;
+	unsigned char fde_count_enc;
+	unsigned char table_enc;
+
+	/*
+	 * The rest of the header is variable-length and consists of the
+	 * following members:
+	 *
+	 *	encoded_t eh_frame_ptr;
+	 *	encoded_t fde_count;
+	 */
+
+	/* A single encoded pointer should not be more than 8 bytes. */
+	u64 enc[2];
+
+	/*
+	 * struct {
+	 *    encoded_t start_ip;
+	 *    encoded_t fde_addr;
+	 * } binary_search_table[fde_count];
+	 */
+	char data[0];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+			       u64 offset, u64 *table_data, u64 *segbase,
+			       u64 *fde_count)
+{
+	struct eh_frame_hdr hdr;
+	u8 *enc = (u8 *) &hdr.enc;
+	u8 *end = (u8 *) &hdr.data;
+	ssize_t r;
+
+	r = dso__data_read_offset(dso, machine, offset,
+				  (u8 *) &hdr, sizeof(hdr));
+	if (r != sizeof(hdr))
+		return -EINVAL;
+
+	/* We dont need eh_frame_ptr, just skip it. */
+	dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+	*fde_count  = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+	*segbase    = offset;
+	*table_data = (enc - (u8 *) &hdr) + offset;
+	return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+				     u64 *table_data, u64 *segbase,
+				     u64 *fde_count)
+{
+	int ret = -EINVAL, fd;
+	u64 offset = dso->data.eh_frame_hdr_offset;
+
+	if (offset == 0) {
+		fd = dso__data_get_fd(dso, machine);
+		if (fd < 0)
+			return -EINVAL;
+
+		/* Check the .eh_frame section for unwinding info */
+		offset = elf_section_offset(fd, ".eh_frame_hdr");
+		dso->data.eh_frame_hdr_offset = offset;
+		dso__data_put_fd(dso);
+	}
+
+	if (offset)
+		ret = unwind_spec_ehframe(dso, machine, offset,
+					  table_data, segbase,
+					  fde_count);
+
+	return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+					struct machine *machine, u64 *offset)
+{
+	int fd;
+	u64 ofs = dso->data.debug_frame_offset;
+
+	/* debug_frame can reside in:
+	 *  - dso
+	 *  - debug pointed by symsrc_filename
+	 *  - gnu_debuglink, which doesn't necessary
+	 *    has to be pointed by symsrc_filename
+	 */
+	if (ofs == 0) {
+		fd = dso__data_get_fd(dso, machine);
+		if (fd >= 0) {
+			ofs = elf_section_offset(fd, ".debug_frame");
+			dso__data_put_fd(dso);
+		}
+
+		if (ofs <= 0) {
+			fd = open(dso->symsrc_filename, O_RDONLY);
+			if (fd >= 0) {
+				ofs = elf_section_offset(fd, ".debug_frame");
+				close(fd);
+			}
+		}
+
+		if (ofs <= 0) {
+			char *debuglink = malloc(PATH_MAX);
+			int ret = 0;
+
+			ret = dso__read_binary_type_filename(
+				dso, DSO_BINARY_TYPE__DEBUGLINK,
+				machine->root_dir, debuglink, PATH_MAX);
+			if (!ret) {
+				fd = open(debuglink, O_RDONLY);
+				if (fd >= 0) {
+					ofs = elf_section_offset(fd,
+							".debug_frame");
+					close(fd);
+				}
+			}
+			if (ofs > 0) {
+				if (dso->symsrc_filename != NULL) {
+					pr_warning(
+						"%s: overwrite symsrc(%s,%s)\n",
+							__func__,
+							dso->symsrc_filename,
+							debuglink);
+					free(dso->symsrc_filename);
+				}
+				dso->symsrc_filename = debuglink;
+			} else {
+				free(debuglink);
+			}
+		}
+
+		dso->data.debug_frame_offset = ofs;
+	}
+
+	*offset = ofs;
+	if (*offset)
+		return 0;
+
+	return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+	struct addr_location al;
+
+	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+			      MAP__FUNCTION, ip, &al);
+	if (!al.map) {
+		/*
+		 * We've seen cases (softice) where DWARF unwinder went
+		 * through non executable mmaps, which we need to lookup
+		 * in MAP__VARIABLE tree.
+		 */
+		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+				      MAP__VARIABLE, ip, &al);
+	}
+	return al.map;
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+	       int need_unwind_info, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct map *map;
+	unw_dyn_info_t di;
+	u64 table_data, segbase, fde_count;
+	int ret = -EINVAL;
+
+	map = find_map(ip, ui);
+	if (!map || !map->dso)
+		return -EINVAL;
+
+	pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+	/* Check the .eh_frame section for unwinding info */
+	if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+				       &table_data, &segbase, &fde_count)) {
+		memset(&di, 0, sizeof(di));
+		di.format   = UNW_INFO_FORMAT_REMOTE_TABLE;
+		di.start_ip = map->start;
+		di.end_ip   = map->end;
+		di.u.rti.segbase    = map->start + segbase - map->pgoff;
+		di.u.rti.table_data = map->start + table_data - map->pgoff;
+		di.u.rti.table_len  = fde_count * sizeof(struct table_entry)
+				      / sizeof(unw_word_t);
+		ret = dwarf_search_unwind_table(as, ip, &di, pi,
+						need_unwind_info, arg);
+	}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+	/* Check the .debug_frame section for unwinding info */
+	if (ret < 0 &&
+	    !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+		int fd = dso__data_get_fd(map->dso, ui->machine);
+		int is_exec = elf_is_exec(fd, map->dso->name);
+		unw_word_t base = is_exec ? 0 : map->start;
+		const char *symfile;
+
+		if (fd >= 0)
+			dso__data_put_fd(map->dso);
+
+		symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+		memset(&di, 0, sizeof(di));
+		if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+					   map->start, map->end))
+			return dwarf_search_unwind_table(as, ip, &di, pi,
+							 need_unwind_info, arg);
+	}
+#endif
+
+	return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+			unw_regnum_t __maybe_unused num,
+			unw_fpreg_t __maybe_unused *val,
+			int __maybe_unused __write,
+			void __maybe_unused *arg)
+{
+	pr_err("unwind: access_fpreg unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+				  unw_word_t __maybe_unused *dil_addr,
+				  void __maybe_unused *arg)
+{
+	return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+		  unw_cursor_t __maybe_unused *cu,
+		  void __maybe_unused *arg)
+{
+	pr_err("unwind: resume unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+	      unw_word_t __maybe_unused addr,
+		char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+		unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+	pr_err("unwind: get_proc_name unsupported\n");
+	return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+			  unw_word_t *data)
+{
+	struct map *map;
+	ssize_t size;
+
+	map = find_map(addr, ui);
+	if (!map) {
+		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+		return -1;
+	}
+
+	if (!map->dso)
+		return -1;
+
+	size = dso__data_read_addr(map->dso, map, ui->machine,
+				   addr, (u8 *) data, sizeof(*data));
+
+	return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+		      unw_word_t addr, unw_word_t *valp,
+		      int __write, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct stack_dump *stack = &ui->sample->user_stack;
+	u64 start, end;
+	int offset;
+	int ret;
+
+	/* Don't support write, probably not needed. */
+	if (__write || !stack || !ui->sample->user_regs.regs) {
+		*valp = 0;
+		return 0;
+	}
+
+	ret = perf_reg_value(&start, &ui->sample->user_regs,
+			     LIBUNWIND__ARCH_REG_SP);
+	if (ret)
+		return ret;
+
+	end = start + stack->size;
+
+	/* Check overflow. */
+	if (addr + sizeof(unw_word_t) < addr)
+		return -EINVAL;
+
+	if (addr < start || addr + sizeof(unw_word_t) >= end) {
+		ret = access_dso_mem(ui, addr, valp);
+		if (ret) {
+			pr_debug("unwind: access_mem %p not inside range"
+				 " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+				 (void *) (uintptr_t) addr, start, end);
+			*valp = 0;
+			return ret;
+		}
+		return 0;
+	}
+
+	offset = addr - start;
+	*valp  = *(unw_word_t *)&stack->data[offset];
+	pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+		 (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+	return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+		      unw_regnum_t regnum, unw_word_t *valp,
+		      int __write, void *arg)
+{
+	struct unwind_info *ui = arg;
+	int id, ret;
+	u64 val;
+
+	/* Don't support write, I suspect we don't need it. */
+	if (__write) {
+		pr_err("unwind: access_reg w %d\n", regnum);
+		return 0;
+	}
+
+	if (!ui->sample->user_regs.regs) {
+		*valp = 0;
+		return 0;
+	}
+
+	id = LIBUNWIND__ARCH_REG_ID(regnum);
+	if (id < 0)
+		return -EINVAL;
+
+	ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+	if (ret) {
+		pr_err("unwind: can't read reg %d\n", regnum);
+		return ret;
+	}
+
+	*valp = (unw_word_t) val;
+	pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+	return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+			    unw_proc_info_t *pi __maybe_unused,
+			    void *arg __maybe_unused)
+{
+	pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+		 unwind_entry_cb_t cb, void *arg)
+{
+	struct unwind_entry e;
+	struct addr_location al;
+
+	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
+				   MAP__FUNCTION, ip, &al);
+
+	e.ip = al.addr;
+	e.map = al.map;
+	e.sym = al.sym;
+
+	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+		 al.sym ? al.sym->name : "''",
+		 ip,
+		 al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+	return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+	switch (err) {
+	case UNW_EINVAL:
+		pr_err("unwind: Only supports local.\n");
+		break;
+	case UNW_EUNSPEC:
+		pr_err("unwind: Unspecified error.\n");
+		break;
+	case UNW_EBADREG:
+		pr_err("unwind: Register unavailable.\n");
+		break;
+	default:
+		break;
+	}
+}
+
+static unw_accessors_t accessors = {
+	.find_proc_info		= find_proc_info,
+	.put_unwind_info	= put_unwind_info,
+	.get_dyn_info_list_addr	= get_dyn_info_list_addr,
+	.access_mem		= access_mem,
+	.access_reg		= access_reg,
+	.access_fpreg		= access_fpreg,
+	.resume			= resume,
+	.get_proc_name		= get_proc_name,
+};
+
+static int _unwind__prepare_access(struct thread *thread)
+{
+	if (!dwarf_callchain_users)
+		return 0;
+	thread->addr_space = unw_create_addr_space(&accessors, 0);
+	if (!thread->addr_space) {
+		pr_err("unwind: Can't create unwind address space.\n");
+		return -ENOMEM;
+	}
+
+	unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
+	return 0;
+}
+
+static void _unwind__flush_access(struct thread *thread)
+{
+	if (!dwarf_callchain_users)
+		return;
+	unw_flush_cache(thread->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct thread *thread)
+{
+	if (!dwarf_callchain_users)
+		return;
+	unw_destroy_addr_space(thread->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+		       void *arg, int max_stack)
+{
+	u64 val;
+	unw_word_t ips[max_stack];
+	unw_addr_space_t addr_space;
+	unw_cursor_t c;
+	int ret, i = 0;
+
+	ret = perf_reg_value(&val, &ui->sample->user_regs,
+			     LIBUNWIND__ARCH_REG_IP);
+	if (ret)
+		return ret;
+
+	ips[i++] = (unw_word_t) val;
+
+	/*
+	 * If we need more than one entry, do the DWARF
+	 * unwind itself.
+	 */
+	if (max_stack - 1 > 0) {
+		WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+		addr_space = ui->thread->addr_space;
+
+		if (addr_space == NULL)
+			return -1;
+
+		ret = unw_init_remote(&c, addr_space, ui);
+		if (ret)
+			display_error(ret);
+
+		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+			/*
+			 * Decrement the IP for any non-activation frames.
+			 * this is required to properly find the srcline
+			 * for caller frames.
+			 * See also the documentation for dwfl_frame_pc(),
+			 * which this code tries to replicate.
+			 */
+			if (unw_is_signal_frame(&c) <= 0)
+				--ips[i];
+
+			++i;
+		}
+
+		max_stack = i;
+	}
+
+	/*
+	 * Display what we got based on the order setup.
+	 */
+	for (i = 0; i < max_stack && !ret; i++) {
+		int j = i;
+
+		if (callchain_param.order == ORDER_CALLER)
+			j = max_stack - i - 1;
+		ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+	}
+
+	return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data, int max_stack)
+{
+	struct unwind_info ui = {
+		.sample       = data,
+		.thread       = thread,
+		.machine      = thread->mg->machine,
+	};
+
+	if (!data->user_regs.regs)
+		return -EINVAL;
+
+	if (max_stack <= 0)
+		return -EINVAL;
+
+	return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+	.prepare_access = _unwind__prepare_access,
+	.flush_access   = _unwind__flush_access,
+	.finish_access  = _unwind__finish_access,
+	.get_entries    = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/util/unwind-libunwind.c b/util/unwind-libunwind.c
new file mode 100644
index 0000000..b029a5e
--- /dev/null
+++ b/util/unwind-libunwind.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "unwind.h"
+#include "thread.h"
+#include "session.h"
+#include "debug.h"
+#include "env.h"
+
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
+
+static void unwind__register_ops(struct thread *thread,
+			  struct unwind_libunwind_ops *ops)
+{
+	thread->unwind_libunwind_ops = ops;
+}
+
+int unwind__prepare_access(struct thread *thread, struct map *map,
+			   bool *initialized)
+{
+	const char *arch;
+	enum dso_type dso_type;
+	struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
+	int err;
+
+	if (thread->addr_space) {
+		pr_debug("unwind: thread map already set, dso=%s\n",
+			 map->dso->name);
+		if (initialized)
+			*initialized = true;
+		return 0;
+	}
+
+	/* env->arch is NULL for live-mode (i.e. perf top) */
+	if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
+		goto out_register;
+
+	dso_type = dso__type(map->dso, thread->mg->machine);
+	if (dso_type == DSO__TYPE_UNKNOWN)
+		return 0;
+
+	arch = perf_env__arch(thread->mg->machine->env);
+
+	if (!strcmp(arch, "x86")) {
+		if (dso_type != DSO__TYPE_64BIT)
+			ops = x86_32_unwind_libunwind_ops;
+	} else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+		if (dso_type == DSO__TYPE_64BIT)
+			ops = arm64_unwind_libunwind_ops;
+	}
+
+	if (!ops) {
+		pr_err("unwind: target platform=%s is not supported\n", arch);
+		return 0;
+	}
+out_register:
+	unwind__register_ops(thread, ops);
+
+	err = thread->unwind_libunwind_ops->prepare_access(thread);
+	if (initialized)
+		*initialized = err ? false : true;
+	return err;
+}
+
+void unwind__flush_access(struct thread *thread)
+{
+	if (thread->unwind_libunwind_ops)
+		thread->unwind_libunwind_ops->flush_access(thread);
+}
+
+void unwind__finish_access(struct thread *thread)
+{
+	if (thread->unwind_libunwind_ops)
+		thread->unwind_libunwind_ops->finish_access(thread);
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			 struct thread *thread,
+			 struct perf_sample *data, int max_stack)
+{
+	if (thread->unwind_libunwind_ops)
+		return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+	return 0;
+}
diff --git a/util/unwind.h b/util/unwind.h
new file mode 100644
index 0000000..8a44a15
--- /dev/null
+++ b/util/unwind.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UNWIND_H
+#define __UNWIND_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct map;
+struct perf_sample;
+struct symbol;
+struct thread;
+
+struct unwind_entry {
+	struct map	*map;
+	struct symbol	*sym;
+	u64		ip;
+};
+
+typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
+
+struct unwind_libunwind_ops {
+	int (*prepare_access)(struct thread *thread);
+	void (*flush_access)(struct thread *thread);
+	void (*finish_access)(struct thread *thread);
+	int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+			   struct thread *thread,
+			   struct perf_sample *data, int max_stack);
+};
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+			struct thread *thread,
+			struct perf_sample *data, int max_stack);
+/* libunwind specific */
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_SP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_IP
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct thread *thread, struct map *map,
+			   bool *initialized);
+void unwind__flush_access(struct thread *thread);
+void unwind__finish_access(struct thread *thread);
+#else
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+					 struct map *map __maybe_unused,
+					 bool *initialized __maybe_unused)
+{
+	return 0;
+}
+
+static inline void unwind__flush_access(struct thread *thread __maybe_unused) {}
+static inline void unwind__finish_access(struct thread *thread __maybe_unused) {}
+#endif
+#else
+static inline int
+unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
+		    void *arg __maybe_unused,
+		    struct thread *thread __maybe_unused,
+		    struct perf_sample *data __maybe_unused,
+		    int max_stack __maybe_unused)
+{
+	return 0;
+}
+
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+					 struct map *map __maybe_unused,
+					 bool *initialized __maybe_unused)
+{
+	return 0;
+}
+
+static inline void unwind__flush_access(struct thread *thread __maybe_unused) {}
+static inline void unwind__finish_access(struct thread *thread __maybe_unused) {}
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+#endif /* __UNWIND_H */
diff --git a/util/usage.c b/util/usage.c
new file mode 100644
index 0000000..070d25c
--- /dev/null
+++ b/util/usage.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * usage.c
+ *
+ * Various reporting routines.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+#include "debug.h"
+
+static __noreturn void usage_builtin(const char *err)
+{
+	fprintf(stderr, "\n Usage: %s\n", err);
+	exit(129);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
+
+void usage(const char *err)
+{
+	usage_routine(err);
+}
diff --git a/util/util-cxx.h b/util/util-cxx.h
new file mode 100644
index 0000000..80a99e4
--- /dev/null
+++ b/util/util-cxx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support C++ source use utilities defined in util.h
+ */
+
+#ifndef PERF_UTIL_UTIL_CXX_H
+#define PERF_UTIL_UTIL_CXX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Now 'new' is the only C++ keyword found in util.h:
+ * in tools/include/linux/rbtree.h
+ *
+ * Other keywords, like class and delete, should be
+ * redefined if necessary.
+ */
+#define new _new
+#include "util.h"
+#undef new
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/util/util.c b/util/util.c
new file mode 100644
index 0000000..1019bbc
--- /dev/null
+++ b/util/util.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../perf.h"
+#include "util.h"
+#include "debug.h"
+#include <api/fs/fs.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/time64.h>
+#include <unistd.h>
+#include "strlist.h"
+
+/*
+ * XXX We need to find a better place for these things...
+ */
+
+bool perf_singlethreaded = true;
+
+void perf_set_singlethreaded(void)
+{
+	perf_singlethreaded = true;
+}
+
+void perf_set_multithreaded(void)
+{
+	perf_singlethreaded = false;
+}
+
+unsigned int page_size;
+int cacheline_size;
+
+int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
+
+bool test_attr__enabled;
+
+bool perf_host  = true;
+bool perf_guest = false;
+
+void event_attr_init(struct perf_event_attr *attr)
+{
+	if (!perf_host)
+		attr->exclude_host  = 1;
+	if (!perf_guest)
+		attr->exclude_guest = 1;
+	/* to capture ABI version */
+	attr->size = sizeof(*attr);
+}
+
+int mkdir_p(char *path, mode_t mode)
+{
+	struct stat st;
+	int err;
+	char *d = path;
+
+	if (*d != '/')
+		return -1;
+
+	if (stat(path, &st) == 0)
+		return 0;
+
+	while (*++d == '/');
+
+	while ((d = strchr(d, '/'))) {
+		*d = '\0';
+		err = stat(path, &st) && mkdir(path, mode);
+		*d++ = '/';
+		if (err)
+			return -1;
+		while (*d == '/')
+			++d;
+	}
+	return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+int rm_rf(const char *path)
+{
+	DIR *dir;
+	int ret = 0;
+	struct dirent *d;
+	char namebuf[PATH_MAX];
+
+	dir = opendir(path);
+	if (dir == NULL)
+		return 0;
+
+	while ((d = readdir(dir)) != NULL && !ret) {
+		struct stat statbuf;
+
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+
+		scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+			  path, d->d_name);
+
+		/* We have to check symbolic link itself */
+		ret = lstat(namebuf, &statbuf);
+		if (ret < 0) {
+			pr_debug("stat failed: %s\n", namebuf);
+			break;
+		}
+
+		if (S_ISDIR(statbuf.st_mode))
+			ret = rm_rf(namebuf);
+		else
+			ret = unlink(namebuf);
+	}
+	closedir(dir);
+
+	if (ret < 0)
+		return ret;
+
+	return rmdir(path);
+}
+
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+	return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+		      bool (*filter)(const char *, struct dirent *))
+{
+	struct strlist *list = NULL;
+	DIR *dir;
+	struct dirent *d;
+
+	dir = opendir(name);
+	if (!dir)
+		return NULL;
+
+	list = strlist__new(NULL, NULL);
+	if (!list) {
+		errno = ENOMEM;
+		goto out;
+	}
+
+	while ((d = readdir(dir)) != NULL) {
+		if (!filter || filter(name, d))
+			strlist__add(list, d->d_name);
+	}
+
+out:
+	closedir(dir);
+	return list;
+}
+
+static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
+{
+	int err = -1;
+	char *line = NULL;
+	size_t n;
+	FILE *from_fp, *to_fp;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	from_fp = fopen(from, "r");
+	nsinfo__mountns_exit(&nsc);
+	if (from_fp == NULL)
+		goto out;
+
+	to_fp = fopen(to, "w");
+	if (to_fp == NULL)
+		goto out_fclose_from;
+
+	while (getline(&line, &n, from_fp) > 0)
+		if (fputs(line, to_fp) == EOF)
+			goto out_fclose_to;
+	err = 0;
+out_fclose_to:
+	fclose(to_fp);
+	free(line);
+out_fclose_from:
+	fclose(from_fp);
+out:
+	return err;
+}
+
+static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+	void *ptr;
+	loff_t pgoff;
+
+	pgoff = off_in & ~(page_size - 1);
+	off_in -= pgoff;
+
+	ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+	if (ptr == MAP_FAILED)
+		return -1;
+
+	while (size) {
+		ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+		if (ret < 0 && errno == EINTR)
+			continue;
+		if (ret <= 0)
+			break;
+
+		size -= ret;
+		off_in += ret;
+		off_out += ret;
+	}
+	munmap(ptr, off_in + size);
+
+	return size ? -1 : 0;
+}
+
+static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
+			    struct nsinfo *nsi)
+{
+	int fromfd, tofd;
+	struct stat st;
+	int err;
+	char *tmp = NULL, *ptr = NULL;
+	struct nscookie nsc;
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	err = stat(from, &st);
+	nsinfo__mountns_exit(&nsc);
+	if (err)
+		goto out;
+	err = -1;
+
+	/* extra 'x' at the end is to reserve space for '.' */
+	if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+		tmp = NULL;
+		goto out;
+	}
+	ptr = strrchr(tmp, '/');
+	if (!ptr)
+		goto out;
+	ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+	*ptr = '.';
+
+	tofd = mkstemp(tmp);
+	if (tofd < 0)
+		goto out;
+
+	if (fchmod(tofd, mode))
+		goto out_close_to;
+
+	if (st.st_size == 0) { /* /proc? do it slowly... */
+		err = slow_copyfile(from, tmp, nsi);
+		goto out_close_to;
+	}
+
+	nsinfo__mountns_enter(nsi, &nsc);
+	fromfd = open(from, O_RDONLY);
+	nsinfo__mountns_exit(&nsc);
+	if (fromfd < 0)
+		goto out_close_to;
+
+	err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+	close(fromfd);
+out_close_to:
+	close(tofd);
+	if (!err)
+		err = link(tmp, to);
+	unlink(tmp);
+out:
+	free(tmp);
+	return err;
+}
+
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
+{
+	return copyfile_mode_ns(from, to, 0755, nsi);
+}
+
+int copyfile_mode(const char *from, const char *to, mode_t mode)
+{
+	return copyfile_mode_ns(from, to, mode, NULL);
+}
+
+int copyfile(const char *from, const char *to)
+{
+	return copyfile_mode(from, to, 0755);
+}
+
+static ssize_t ion(bool is_read, int fd, void *buf, size_t n)
+{
+	void *buf_start = buf;
+	size_t left = n;
+
+	while (left) {
+		/* buf must be treated as const if !is_read. */
+		ssize_t ret = is_read ? read(fd, buf, left) :
+					write(fd, buf, left);
+
+		if (ret < 0 && errno == EINTR)
+			continue;
+		if (ret <= 0)
+			return ret;
+
+		left -= ret;
+		buf  += ret;
+	}
+
+	BUG_ON((size_t)(buf - buf_start) != n);
+	return n;
+}
+
+/*
+ * Read exactly 'n' bytes or return an error.
+ */
+ssize_t readn(int fd, void *buf, size_t n)
+{
+	return ion(true, fd, buf, n);
+}
+
+/*
+ * Write exactly 'n' bytes or return an error.
+ */
+ssize_t writen(int fd, const void *buf, size_t n)
+{
+	/* ion does not modify buf. */
+	return ion(false, fd, (void *)buf, n);
+}
+
+size_t hex_width(u64 v)
+{
+	size_t n = 1;
+
+	while ((v >>= 4))
+		++n;
+
+	return n;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2u64(const char *ptr, u64 *long_val)
+{
+	char *p;
+
+	*long_val = strtoull(ptr, &p, 16);
+
+	return p - ptr;
+}
+
+int perf_event_paranoid(void)
+{
+	int value;
+
+	if (sysctl__read_int("kernel/perf_event_paranoid", &value))
+		return INT_MAX;
+
+	return value;
+}
+static int
+fetch_ubuntu_kernel_version(unsigned int *puint)
+{
+	ssize_t len;
+	size_t line_len = 0;
+	char *ptr, *line = NULL;
+	int version, patchlevel, sublevel, err;
+	FILE *vsig;
+
+	if (!puint)
+		return 0;
+
+	vsig = fopen("/proc/version_signature", "r");
+	if (!vsig) {
+		pr_debug("Open /proc/version_signature failed: %s\n",
+			 strerror(errno));
+		return -1;
+	}
+
+	len = getline(&line, &line_len, vsig);
+	fclose(vsig);
+	err = -1;
+	if (len <= 0) {
+		pr_debug("Reading from /proc/version_signature failed: %s\n",
+			 strerror(errno));
+		goto errout;
+	}
+
+	ptr = strrchr(line, ' ');
+	if (!ptr) {
+		pr_debug("Parsing /proc/version_signature failed: %s\n", line);
+		goto errout;
+	}
+
+	err = sscanf(ptr + 1, "%d.%d.%d",
+		     &version, &patchlevel, &sublevel);
+	if (err != 3) {
+		pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n",
+			 line);
+		goto errout;
+	}
+
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	err = 0;
+errout:
+	free(line);
+	return err;
+}
+
+int
+fetch_kernel_version(unsigned int *puint, char *str,
+		     size_t str_size)
+{
+	struct utsname utsname;
+	int version, patchlevel, sublevel, err;
+	bool int_ver_ready = false;
+
+	if (access("/proc/version_signature", R_OK) == 0)
+		if (!fetch_ubuntu_kernel_version(puint))
+			int_ver_ready = true;
+
+	if (uname(&utsname))
+		return -1;
+
+	if (str && str_size) {
+		strncpy(str, utsname.release, str_size);
+		str[str_size - 1] = '\0';
+	}
+
+	if (!puint || int_ver_ready)
+		return 0;
+
+	err = sscanf(utsname.release, "%d.%d.%d",
+		     &version, &patchlevel, &sublevel);
+
+	if (err != 3) {
+		pr_debug("Unable to get kernel version from uname '%s'\n",
+			 utsname.release);
+		return -1;
+	}
+
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	return 0;
+}
+
+const char *perf_tip(const char *dirpath)
+{
+	struct strlist *tips;
+	struct str_node *node;
+	char *tip = NULL;
+	struct strlist_config conf = {
+		.dirname = dirpath,
+		.file_only = true,
+	};
+
+	tips = strlist__new("tips.txt", &conf);
+	if (tips == NULL)
+		return errno == ENOENT ? NULL :
+			"Tip: check path of tips.txt or get more memory! ;-p";
+
+	if (strlist__nr_entries(tips) == 0)
+		goto out;
+
+	node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+	if (asprintf(&tip, "Tip: %s", node->s) < 0)
+		tip = (char *)"Tip: get more memory! ;-)";
+
+out:
+	strlist__delete(tips);
+
+	return tip;
+}
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 0000000..c9626c2
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _BSD_SOURCE 1
+/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
+#define _DEFAULT_SOURCE 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+/* General helper functions */
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
+
+static inline void *zalloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+
+struct dirent;
+struct nsinfo;
+struct strlist;
+
+int mkdir_p(char *path, mode_t mode);
+int rm_rf(const char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
+int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+
+ssize_t readn(int fd, void *buf, size_t n);
+ssize_t writen(int fd, const void *buf, size_t n);
+
+size_t hex_width(u64 v);
+int hex2u64(const char *ptr, u64 *val);
+
+extern unsigned int page_size;
+extern int cacheline_size;
+
+int fetch_kernel_version(unsigned int *puint,
+			 char *str, size_t str_sz);
+#define KVER_VERSION(x)		(((x) >> 16) & 0xff)
+#define KVER_PATCHLEVEL(x)	(((x) >> 8) & 0xff)
+#define KVER_SUBLEVEL(x)	((x) & 0xff)
+#define KVER_FMT	"%d.%d.%d"
+#define KVER_PARAM(x)	KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+
+const char *perf_tip(const char *dirpath);
+
+#ifndef HAVE_SCHED_GETCPU_SUPPORT
+int sched_getcpu(void);
+#endif
+
+#ifndef HAVE_SETNS_SUPPORT
+int setns(int fd, int nstype);
+#endif
+
+extern bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void);
+void perf_set_multithreaded(void);
+
+#ifndef O_CLOEXEC
+#ifdef __sparc__
+#define O_CLOEXEC      0x400000
+#elif defined(__alpha__) || defined(__hppa__)
+#define O_CLOEXEC      010000000
+#else
+#define O_CLOEXEC      02000000
+#endif
+#endif
+
+#endif /* GIT_COMPAT_UTIL_H */
diff --git a/util/values.c b/util/values.c
new file mode 100644
index 0000000..4b7a303
--- /dev/null
+++ b/util/values.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "util.h"
+#include "values.h"
+#include "debug.h"
+
+int perf_read_values_init(struct perf_read_values *values)
+{
+	values->threads_max = 16;
+	values->pid = malloc(values->threads_max * sizeof(*values->pid));
+	values->tid = malloc(values->threads_max * sizeof(*values->tid));
+	values->value = zalloc(values->threads_max * sizeof(*values->value));
+	if (!values->pid || !values->tid || !values->value) {
+		pr_debug("failed to allocate read_values threads arrays");
+		goto out_free_pid;
+	}
+	values->threads = 0;
+
+	values->counters_max = 16;
+	values->counterrawid = malloc(values->counters_max
+				      * sizeof(*values->counterrawid));
+	values->countername = malloc(values->counters_max
+				     * sizeof(*values->countername));
+	if (!values->counterrawid || !values->countername) {
+		pr_debug("failed to allocate read_values counters arrays");
+		goto out_free_counter;
+	}
+	values->counters = 0;
+
+	return 0;
+
+out_free_counter:
+	zfree(&values->counterrawid);
+	zfree(&values->countername);
+out_free_pid:
+	zfree(&values->pid);
+	zfree(&values->tid);
+	zfree(&values->value);
+	return -ENOMEM;
+}
+
+void perf_read_values_destroy(struct perf_read_values *values)
+{
+	int i;
+
+	if (!values->threads_max || !values->counters_max)
+		return;
+
+	for (i = 0; i < values->threads; i++)
+		zfree(&values->value[i]);
+	zfree(&values->value);
+	zfree(&values->pid);
+	zfree(&values->tid);
+	zfree(&values->counterrawid);
+	for (i = 0; i < values->counters; i++)
+		zfree(&values->countername[i]);
+	zfree(&values->countername);
+}
+
+static int perf_read_values__enlarge_threads(struct perf_read_values *values)
+{
+	int nthreads_max = values->threads_max * 2;
+	void *npid = realloc(values->pid, nthreads_max * sizeof(*values->pid)),
+	     *ntid = realloc(values->tid, nthreads_max * sizeof(*values->tid)),
+	     *nvalue = realloc(values->value, nthreads_max * sizeof(*values->value));
+
+	if (!npid || !ntid || !nvalue)
+		goto out_err;
+
+	values->threads_max = nthreads_max;
+	values->pid = npid;
+	values->tid = ntid;
+	values->value = nvalue;
+	return 0;
+out_err:
+	free(npid);
+	free(ntid);
+	free(nvalue);
+	pr_debug("failed to enlarge read_values threads arrays");
+	return -ENOMEM;
+}
+
+static int perf_read_values__findnew_thread(struct perf_read_values *values,
+					    u32 pid, u32 tid)
+{
+	int i;
+
+	for (i = 0; i < values->threads; i++)
+		if (values->pid[i] == pid && values->tid[i] == tid)
+			return i;
+
+	if (values->threads == values->threads_max) {
+		i = perf_read_values__enlarge_threads(values);
+		if (i < 0)
+			return i;
+	}
+
+	i = values->threads;
+
+	values->value[i] = zalloc(values->counters_max * sizeof(**values->value));
+	if (!values->value[i]) {
+		pr_debug("failed to allocate read_values counters array");
+		return -ENOMEM;
+	}
+	values->pid[i] = pid;
+	values->tid[i] = tid;
+	values->threads = i + 1;
+
+	return i;
+}
+
+static int perf_read_values__enlarge_counters(struct perf_read_values *values)
+{
+	char **countername;
+	int i, counters_max = values->counters_max * 2;
+	u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid));
+
+	if (!counterrawid) {
+		pr_debug("failed to enlarge read_values rawid array");
+		goto out_enomem;
+	}
+
+	countername = realloc(values->countername, counters_max * sizeof(*values->countername));
+	if (!countername) {
+		pr_debug("failed to enlarge read_values rawid array");
+		goto out_free_rawid;
+	}
+
+	for (i = 0; i < values->threads; i++) {
+		u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value));
+		int j;
+
+		if (!value) {
+			pr_debug("failed to enlarge read_values ->values array");
+			goto out_free_name;
+		}
+
+		for (j = values->counters_max; j < counters_max; j++)
+			value[j] = 0;
+
+		values->value[i] = value;
+	}
+
+	values->counters_max = counters_max;
+	values->counterrawid = counterrawid;
+	values->countername  = countername;
+
+	return 0;
+out_free_name:
+	free(countername);
+out_free_rawid:
+	free(counterrawid);
+out_enomem:
+	return -ENOMEM;
+}
+
+static int perf_read_values__findnew_counter(struct perf_read_values *values,
+					     u64 rawid, const char *name)
+{
+	int i;
+
+	for (i = 0; i < values->counters; i++)
+		if (values->counterrawid[i] == rawid)
+			return i;
+
+	if (values->counters == values->counters_max) {
+		i = perf_read_values__enlarge_counters(values);
+		if (i)
+			return i;
+	}
+
+	i = values->counters++;
+	values->counterrawid[i] = rawid;
+	values->countername[i] = strdup(name);
+
+	return i;
+}
+
+int perf_read_values_add_value(struct perf_read_values *values,
+				u32 pid, u32 tid,
+				u64 rawid, const char *name, u64 value)
+{
+	int tindex, cindex;
+
+	tindex = perf_read_values__findnew_thread(values, pid, tid);
+	if (tindex < 0)
+		return tindex;
+	cindex = perf_read_values__findnew_counter(values, rawid, name);
+	if (cindex < 0)
+		return cindex;
+
+	values->value[tindex][cindex] += value;
+	return 0;
+}
+
+static void perf_read_values__display_pretty(FILE *fp,
+					     struct perf_read_values *values)
+{
+	int i, j;
+	int pidwidth, tidwidth;
+	int *counterwidth;
+
+	counterwidth = malloc(values->counters * sizeof(*counterwidth));
+	if (!counterwidth) {
+		fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n");
+		return;
+	}
+	tidwidth = 3;
+	pidwidth = 3;
+	for (j = 0; j < values->counters; j++)
+		counterwidth[j] = strlen(values->countername[j]);
+	for (i = 0; i < values->threads; i++) {
+		int width;
+
+		width = snprintf(NULL, 0, "%d", values->pid[i]);
+		if (width > pidwidth)
+			pidwidth = width;
+		width = snprintf(NULL, 0, "%d", values->tid[i]);
+		if (width > tidwidth)
+			tidwidth = width;
+		for (j = 0; j < values->counters; j++) {
+			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+			if (width > counterwidth[j])
+				counterwidth[j] = width;
+		}
+	}
+
+	fprintf(fp, "# %*s  %*s", pidwidth, "PID", tidwidth, "TID");
+	for (j = 0; j < values->counters; j++)
+		fprintf(fp, "  %*s", counterwidth[j], values->countername[j]);
+	fprintf(fp, "\n");
+
+	for (i = 0; i < values->threads; i++) {
+		fprintf(fp, "  %*d  %*d", pidwidth, values->pid[i],
+			tidwidth, values->tid[i]);
+		for (j = 0; j < values->counters; j++)
+			fprintf(fp, "  %*" PRIu64,
+				counterwidth[j], values->value[i][j]);
+		fprintf(fp, "\n");
+	}
+	free(counterwidth);
+}
+
+static void perf_read_values__display_raw(FILE *fp,
+					  struct perf_read_values *values)
+{
+	int width, pidwidth, tidwidth, namewidth, rawwidth, countwidth;
+	int i, j;
+
+	tidwidth = 3; /* TID */
+	pidwidth = 3; /* PID */
+	namewidth = 4; /* "Name" */
+	rawwidth = 3; /* "Raw" */
+	countwidth = 5; /* "Count" */
+
+	for (i = 0; i < values->threads; i++) {
+		width = snprintf(NULL, 0, "%d", values->pid[i]);
+		if (width > pidwidth)
+			pidwidth = width;
+		width = snprintf(NULL, 0, "%d", values->tid[i]);
+		if (width > tidwidth)
+			tidwidth = width;
+	}
+	for (j = 0; j < values->counters; j++) {
+		width = strlen(values->countername[j]);
+		if (width > namewidth)
+			namewidth = width;
+		width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
+		if (width > rawwidth)
+			rawwidth = width;
+	}
+	for (i = 0; i < values->threads; i++) {
+		for (j = 0; j < values->counters; j++) {
+			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+			if (width > countwidth)
+				countwidth = width;
+		}
+	}
+
+	fprintf(fp, "# %*s  %*s  %*s  %*s  %*s\n",
+		pidwidth, "PID", tidwidth, "TID",
+		namewidth, "Name", rawwidth, "Raw",
+		countwidth, "Count");
+	for (i = 0; i < values->threads; i++)
+		for (j = 0; j < values->counters; j++)
+			fprintf(fp, "  %*d  %*d  %*s  %*" PRIx64 "  %*" PRIu64,
+				pidwidth, values->pid[i],
+				tidwidth, values->tid[i],
+				namewidth, values->countername[j],
+				rawwidth, values->counterrawid[j],
+				countwidth, values->value[i][j]);
+}
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw)
+{
+	if (raw)
+		perf_read_values__display_raw(fp, values);
+	else
+		perf_read_values__display_pretty(fp, values);
+}
diff --git a/util/values.h b/util/values.h
new file mode 100644
index 0000000..8c41f22
--- /dev/null
+++ b/util/values.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VALUES_H
+#define __PERF_VALUES_H
+
+#include <linux/types.h>
+
+struct perf_read_values {
+	int threads;
+	int threads_max;
+	u32 *pid, *tid;
+	int counters;
+	int counters_max;
+	u64 *counterrawid;
+	char **countername;
+	u64 **value;
+};
+
+int perf_read_values_init(struct perf_read_values *values);
+void perf_read_values_destroy(struct perf_read_values *values);
+
+int perf_read_values_add_value(struct perf_read_values *values,
+				u32 pid, u32 tid,
+				u64 rawid, const char *name, u64 value);
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values,
+			      int raw);
+
+#endif /* __PERF_VALUES_H */
diff --git a/util/vdso.c b/util/vdso.c
new file mode 100644
index 0000000..0acb1ec
--- /dev/null
+++ b/util/vdso.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+
+#include "vdso.h"
+#include "util.h"
+#include "symbol.h"
+#include "machine.h"
+#include "thread.h"
+#include "linux/string.h"
+#include "debug.h"
+
+/*
+ * Include definition of find_vdso_map() also used in perf-read-vdso.c for
+ * building perf-read-vdso32 and perf-read-vdsox32.
+ */
+#include "find-vdso-map.c"
+
+#define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX"
+
+struct vdso_file {
+	bool found;
+	bool error;
+	char temp_file_name[sizeof(VDSO__TEMP_FILE_NAME)];
+	const char *dso_name;
+	const char *read_prog;
+};
+
+struct vdso_info {
+	struct vdso_file vdso;
+#if BITS_PER_LONG == 64
+	struct vdso_file vdso32;
+	struct vdso_file vdsox32;
+#endif
+};
+
+static struct vdso_info *vdso_info__new(void)
+{
+	static const struct vdso_info vdso_info_init = {
+		.vdso    = {
+			.temp_file_name = VDSO__TEMP_FILE_NAME,
+			.dso_name = DSO__NAME_VDSO,
+		},
+#if BITS_PER_LONG == 64
+		.vdso32  = {
+			.temp_file_name = VDSO__TEMP_FILE_NAME,
+			.dso_name = DSO__NAME_VDSO32,
+			.read_prog = "perf-read-vdso32",
+		},
+		.vdsox32  = {
+			.temp_file_name = VDSO__TEMP_FILE_NAME,
+			.dso_name = DSO__NAME_VDSOX32,
+			.read_prog = "perf-read-vdsox32",
+		},
+#endif
+	};
+
+	return memdup(&vdso_info_init, sizeof(vdso_info_init));
+}
+
+static char *get_file(struct vdso_file *vdso_file)
+{
+	char *vdso = NULL;
+	char *buf = NULL;
+	void *start, *end;
+	size_t size;
+	int fd;
+
+	if (vdso_file->found)
+		return vdso_file->temp_file_name;
+
+	if (vdso_file->error || find_vdso_map(&start, &end))
+		return NULL;
+
+	size = end - start;
+
+	buf = memdup(start, size);
+	if (!buf)
+		return NULL;
+
+	fd = mkstemp(vdso_file->temp_file_name);
+	if (fd < 0)
+		goto out;
+
+	if (size == (size_t) write(fd, buf, size))
+		vdso = vdso_file->temp_file_name;
+
+	close(fd);
+
+ out:
+	free(buf);
+
+	vdso_file->found = (vdso != NULL);
+	vdso_file->error = !vdso_file->found;
+	return vdso;
+}
+
+void machine__exit_vdso(struct machine *machine)
+{
+	struct vdso_info *vdso_info = machine->vdso_info;
+
+	if (!vdso_info)
+		return;
+
+	if (vdso_info->vdso.found)
+		unlink(vdso_info->vdso.temp_file_name);
+#if BITS_PER_LONG == 64
+	if (vdso_info->vdso32.found)
+		unlink(vdso_info->vdso32.temp_file_name);
+	if (vdso_info->vdsox32.found)
+		unlink(vdso_info->vdsox32.temp_file_name);
+#endif
+
+	zfree(&machine->vdso_info);
+}
+
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+					  const char *long_name)
+{
+	struct dso *dso;
+
+	dso = dso__new(short_name);
+	if (dso != NULL) {
+		__dsos__add(&machine->dsos, dso);
+		dso__set_long_name(dso, long_name, false);
+	}
+
+	return dso;
+}
+
+static enum dso_type machine__thread_dso_type(struct machine *machine,
+					      struct thread *thread)
+{
+	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+	struct map *map;
+	struct dso *dso;
+
+	map = map_groups__first(thread->mg, MAP__FUNCTION);
+	for (; map ; map = map_groups__next(map)) {
+		dso = map->dso;
+		if (!dso || dso->long_name[0] != '/')
+			continue;
+		dso_type = dso__type(dso, machine);
+		if (dso_type != DSO__TYPE_UNKNOWN)
+			break;
+	}
+
+	return dso_type;
+}
+
+#if BITS_PER_LONG == 64
+
+static int vdso__do_copy_compat(FILE *f, int fd)
+{
+	char buf[4096];
+	size_t count;
+
+	while (1) {
+		count = fread(buf, 1, sizeof(buf), f);
+		if (ferror(f))
+			return -errno;
+		if (feof(f))
+			break;
+		if (count && writen(fd, buf, count) != (ssize_t)count)
+			return -errno;
+	}
+
+	return 0;
+}
+
+static int vdso__copy_compat(const char *prog, int fd)
+{
+	FILE *f;
+	int err;
+
+	f = popen(prog, "r");
+	if (!f)
+		return -errno;
+
+	err = vdso__do_copy_compat(f, fd);
+
+	if (pclose(f) == -1)
+		return -errno;
+
+	return err;
+}
+
+static int vdso__create_compat_file(const char *prog, char *temp_name)
+{
+	int fd, err;
+
+	fd = mkstemp(temp_name);
+	if (fd < 0)
+		return -errno;
+
+	err = vdso__copy_compat(prog, fd);
+
+	if (close(fd) == -1)
+		return -errno;
+
+	return err;
+}
+
+static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
+{
+	int err;
+
+	if (vdso_file->found)
+		return vdso_file->temp_file_name;
+
+	if (vdso_file->error)
+		return NULL;
+
+	err = vdso__create_compat_file(vdso_file->read_prog,
+				       vdso_file->temp_file_name);
+	if (err) {
+		pr_err("%s failed, error %d\n", vdso_file->read_prog, err);
+		vdso_file->error = true;
+		return NULL;
+	}
+
+	vdso_file->found = true;
+
+	return vdso_file->temp_file_name;
+}
+
+static struct dso *__machine__findnew_compat(struct machine *machine,
+					     struct vdso_file *vdso_file)
+{
+	const char *file_name;
+	struct dso *dso;
+
+	dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
+	if (dso)
+		goto out;
+
+	file_name = vdso__get_compat_file(vdso_file);
+	if (!file_name)
+		goto out;
+
+	dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out:
+	return dso;
+}
+
+static int __machine__findnew_vdso_compat(struct machine *machine,
+					  struct thread *thread,
+					  struct vdso_info *vdso_info,
+					  struct dso **dso)
+{
+	enum dso_type dso_type;
+
+	dso_type = machine__thread_dso_type(machine, thread);
+
+#ifndef HAVE_PERF_READ_VDSO32
+	if (dso_type == DSO__TYPE_32BIT)
+		return 0;
+#endif
+#ifndef HAVE_PERF_READ_VDSOX32
+	if (dso_type == DSO__TYPE_X32BIT)
+		return 0;
+#endif
+
+	switch (dso_type) {
+	case DSO__TYPE_32BIT:
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
+		return 1;
+	case DSO__TYPE_X32BIT:
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
+		return 1;
+	case DSO__TYPE_UNKNOWN:
+	case DSO__TYPE_64BIT:
+	default:
+		return 0;
+	}
+}
+
+#endif
+
+static struct dso *machine__find_vdso(struct machine *machine,
+				      struct thread *thread)
+{
+	struct dso *dso = NULL;
+	enum dso_type dso_type;
+
+	dso_type = machine__thread_dso_type(machine, thread);
+	switch (dso_type) {
+	case DSO__TYPE_32BIT:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+		if (!dso) {
+			dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
+					   true);
+			if (dso && dso_type != dso__type(dso, machine))
+				dso = NULL;
+		}
+		break;
+	case DSO__TYPE_X32BIT:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+		break;
+	case DSO__TYPE_64BIT:
+	case DSO__TYPE_UNKNOWN:
+	default:
+		dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+		break;
+	}
+
+	return dso;
+}
+
+struct dso *machine__findnew_vdso(struct machine *machine,
+				  struct thread *thread)
+{
+	struct vdso_info *vdso_info;
+	struct dso *dso = NULL;
+
+	down_write(&machine->dsos.lock);
+	if (!machine->vdso_info)
+		machine->vdso_info = vdso_info__new();
+
+	vdso_info = machine->vdso_info;
+	if (!vdso_info)
+		goto out_unlock;
+
+	dso = machine__find_vdso(machine, thread);
+	if (dso)
+		goto out_unlock;
+
+#if BITS_PER_LONG == 64
+	if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+		goto out_unlock;
+#endif
+
+	dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+	if (!dso) {
+		char *file;
+
+		file = get_file(&vdso_info->vdso);
+		if (file)
+			dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
+	}
+
+out_unlock:
+	dso__get(dso);
+	up_write(&machine->dsos.lock);
+	return dso;
+}
+
+bool dso__is_vdso(struct dso *dso)
+{
+	return !strcmp(dso->short_name, DSO__NAME_VDSO) ||
+	       !strcmp(dso->short_name, DSO__NAME_VDSO32) ||
+	       !strcmp(dso->short_name, DSO__NAME_VDSOX32);
+}
diff --git a/util/vdso.h b/util/vdso.h
new file mode 100644
index 0000000..bc74ace
--- /dev/null
+++ b/util/vdso.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VDSO__
+#define __PERF_VDSO__
+
+#include <linux/types.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+#define DSO__NAME_VDSO    "[vdso]"
+#define DSO__NAME_VDSO32  "[vdso32]"
+#define DSO__NAME_VDSOX32 "[vdsox32]"
+
+static inline bool is_vdso_map(const char *filename)
+{
+	return !strcmp(filename, VDSO__MAP_NAME);
+}
+
+struct dso;
+
+bool dso__is_vdso(struct dso *dso);
+
+struct machine;
+struct thread;
+
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
+
+#endif /* __PERF_VDSO__ */
diff --git a/util/xyarray.c b/util/xyarray.c
new file mode 100644
index 0000000..dc95154
--- /dev/null
+++ b/util/xyarray.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "xyarray.h"
+#include "util.h"
+#include <stdlib.h>
+#include <string.h>
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
+{
+	size_t row_size = ylen * entry_size;
+	struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
+
+	if (xy != NULL) {
+		xy->entry_size = entry_size;
+		xy->row_size   = row_size;
+		xy->entries    = xlen * ylen;
+		xy->max_x      = xlen;
+		xy->max_y      = ylen;
+	}
+
+	return xy;
+}
+
+void xyarray__reset(struct xyarray *xy)
+{
+	size_t n = xy->entries * xy->entry_size;
+
+	memset(xy->contents, 0, n);
+}
+
+void xyarray__delete(struct xyarray *xy)
+{
+	free(xy);
+}
diff --git a/util/xyarray.h b/util/xyarray.h
new file mode 100644
index 0000000..7ffe562
--- /dev/null
+++ b/util/xyarray.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_XYARRAY_H_
+#define _PERF_XYARRAY_H_ 1
+
+#include <sys/types.h>
+
+struct xyarray {
+	size_t row_size;
+	size_t entry_size;
+	size_t entries;
+	size_t max_x;
+	size_t max_y;
+	char contents[];
+};
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
+void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
+
+static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+{
+	return &xy->contents[x * xy->row_size + y * xy->entry_size];
+}
+
+static inline int xyarray__max_y(struct xyarray *xy)
+{
+	return xy->max_y;
+}
+
+static inline int xyarray__max_x(struct xyarray *xy)
+{
+	return xy->max_x;
+}
+
+#endif /* _PERF_XYARRAY_H_ */
diff --git a/util/zlib.c b/util/zlib.c
new file mode 100644
index 0000000..a725b95
--- /dev/null
+++ b/util/zlib.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <zlib.h>
+
+#include "util/compress.h"
+#include "util/util.h"
+#include "util/debug.h"
+
+
+#define CHUNK_SIZE  16384
+
+int gzip_decompress_to_file(const char *input, int output_fd)
+{
+	int ret = Z_STREAM_ERROR;
+	int input_fd;
+	void *ptr;
+	int len;
+	struct stat stbuf;
+	unsigned char buf[CHUNK_SIZE];
+	z_stream zs = {
+		.zalloc		= Z_NULL,
+		.zfree		= Z_NULL,
+		.opaque		= Z_NULL,
+		.avail_in	= 0,
+		.next_in	= Z_NULL,
+	};
+
+	input_fd = open(input, O_RDONLY);
+	if (input_fd < 0)
+		return -1;
+
+	if (fstat(input_fd, &stbuf) < 0)
+		goto out_close;
+
+	ptr = mmap(NULL, stbuf.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0);
+	if (ptr == MAP_FAILED)
+		goto out_close;
+
+	if (inflateInit2(&zs, 16 + MAX_WBITS) != Z_OK)
+		goto out_unmap;
+
+	zs.next_in = ptr;
+	zs.avail_in = stbuf.st_size;
+
+	do {
+		zs.next_out = buf;
+		zs.avail_out = CHUNK_SIZE;
+
+		ret = inflate(&zs, Z_NO_FLUSH);
+		switch (ret) {
+		case Z_NEED_DICT:
+			ret = Z_DATA_ERROR;
+			/* fall through */
+		case Z_DATA_ERROR:
+		case Z_MEM_ERROR:
+			goto out;
+		default:
+			break;
+		}
+
+		len = CHUNK_SIZE - zs.avail_out;
+		if (writen(output_fd, buf, len) != len) {
+			ret = Z_DATA_ERROR;
+			goto out;
+		}
+
+	} while (ret != Z_STREAM_END);
+
+out:
+	inflateEnd(&zs);
+out_unmap:
+	munmap(ptr, stbuf.st_size);
+out_close:
+	close(input_fd);
+
+	return ret == Z_STREAM_END ? 0 : -1;
+}